#!/usr/bin/perl
#
# Rotate the logs on a continuous experiment and pull them.
#
# usage: PullLogs.pl [options] <exp_file>
#

use strict;
use lib "$ENV{HOME}/Colyseus/run";
use Travertine;
use Getopt::Std;
use IO::File;
use vars qw($opt_r $opt_d $opt_p $opt_b $opt_l $opt_t $opt_O $opt_R $opt_H);

getopts("r:d:p:b:l:t:O:H:R");

our $MERCHOME = "$ENV{HOME}/Merc";

# remote logdir
our $LOGDIR  = $opt_r || "/tmp";
# local logdir
our $PULLDIR = $opt_d || "/tmp";
our $PORT   = $opt_b || 30000;
our $PASSWD = $opt_p;
our $LOGIN_REX = defined($opt_l) ? $opt_l : undef;
our $TITLE_REX = defined($opt_t) ? $opt_t : undef;
our $OLD = $opt_O;
our $SKIPROTATE = $opt_R;
our @HOSTS = defined $opt_H ? split(/,/, $opt_H) : ();

if (!$ARGV[0] && !@HOSTS) {
    print STDERR "usage: PullLogs.pl [options] <exp_file>\n\n";
    print STDERR "       -l rex   only hosts with login matching regexp\n";
    print STDERR "       -t rex   only hosts with title matching regexp\n";
    print STDERR "       -p pass  terminal password if required\n";
    print STDERR "       -b port  base port which vservers start from\n";
    print STDERR "       -r dir   remote log dir\n";
    print STDERR "       -d dir   local pull dir\n";
    print STDERR "       -R       skip rotate (just collect from previous failed attempts)\n";
    print STDERR "       -O time  organize an old partially failed pull\n";
    print STDERR "       -H list  pull from this list of user\@host pairs\n";
    exit 1;
}

my $EXP;

our %seen;
our @logins;

if (!@HOSTS) {
    $EXP = $ARGV[0];
    our $INPUT = new IO::File("<$EXP");
    tdie "can't open $EXP: $!" if !$INPUT;

    while (<$INPUT>) {
	chomp $_;
	my ($login, $title, $ser) = ($_ =~ /^([^\t]+)\t'([^\t]+)'\t([^\t]+)$/);
	tdie "bad line: $_" if !defined $login or !defined $title or !$ser;
	
	next if ($LOGIN_REX && $login !~ $LOGIN_REX);
	next if ($TITLE_REX && $title !~ $TITLE_REX);
	
	my ($user, $host, $iface) = ($login =~ /(.*)\@(.*):(.*)/);
	
	next if $title =~ /bootstrap/;
	
	if ($user && $host) {
	    if ($seen{$host}) {
		tdie "can't handle multiple vservers on a host!";
	    }
	    push @logins, [$user, $host];
	    $seen{$host} = 1;
	} else {
	    twarn "bad login: $login";
	}
    }
} else {
    @logins = map { [split(/\@/, $_)] } @HOSTS;
    open(T, ">/tmp/tmp.pulllogs.exp") or tdie "can't create /tmp/tmp.pulllogs.exp";
    foreach my $l (@logins) {
	tdie "bad login in @HOSTS" if @$l != 2;
	print T "$l->[0]\@$l->[1]:$l->[1]\t'$l->[1]:0'\t1\n";
    }
    close(T);
    $EXP = "/tmp/tmp.pulllogs.exp";
}

if (@logins == 0) {
    tdie "no matching logins/titles";
}

my $pullid = time();

if ($OLD) {
    $pullid = $OLD;
}

my $dest = "$PULLDIR/tmp/pull.$pullid";

if ($OLD) {
    goto organize;
}
if ($SKIPROTATE) {
    goto pull;
}

rotate:
# first rotate the logs
tinfo "** rotating logs...";

my $cmd = "$MERCHOME/../Colyseus/run/TermExec.pl -o -b $PORT " . 
    ($LOGIN_REX?" -l $LOGIN_REX ":"") .
    ($TITLE_REX?" -t $TITLE_REX ":"") .
    ($PASSWD?" -p $PASSWD ":"") . " $EXP ROTATE";
my $out = `$cmd`;
my @lines = split(/\n/, $out);

my %resps;

my $curr = undef;
foreach my $line (@lines) {
    if ($line =~ /^[\w\d_-]+\@([\w\d_\-\.]+):([\w\d_\-\.]+)/) {
	my ($host, $iface) = ($1, $2);
	$curr = $host;
    }
    if ($line =~ /^(OK:|ERROR:|NO RESPONSE)/) {
	tdie "bad format: $out" if !$curr;
	$resps{$curr} .= $line;
    }
}

sub isok($)
{
    tdie "no response from $_[0]?!" if !$resps{$_[0]};
    return $resps{$_[0]} =~ /^OK:/;
}

pull:
psystem("mkdir -p $dest") and tdie "can't create $dest";

tinfo "** pulling logs...";
ParallelExec3(10, sub {
    my ($user, $host) = @_;
    my $ret;

    if (!$SKIPROTATE) {
    # first gzip and move logs
    $ret = rsystem($user, $host, sub {
	my $dir = shift;
	my $id  = shift;
	my $manual_rotate = shift;

	# nodes appear to be dead, 
	# so need to manually rotate them
	if ($manual_rotate) {
	    tinfo "manually rotating";
	    foreach my $l (glob("$dir/*.{log,out}")) {
		psystem("mv $l $l.$id-manual");
	    }
	}

	my @logs = glob("$dir/*.{log,out}.[0-9]*");
	my $ret;

	if (!$manual_rotate && !@logs) {
	    twarn "no logs?";
	}

	tinfo ((scalar @logs) . " logs");

	foreach my $l (@logs) {
	    next if $l =~ /\.gz$/;
	    $ret = psystem("gzip -f $l");
	    if ($ret) {
		twarn "could not gzip $l";
		psystem("rm -f $l.gz");
	    }
	}
	$ret = psystem("mkdir -p $dir/pull.$id");
	if ($ret) {
	    twarn "could not mkdir $dir/pull.$id";
	} else {
	    if (@logs > 0) {
		$ret = psystem("mv -f $dir/*.gz $dir/pull.$id");
		twarn "could not move gziped logs to $dir/pull.$id" if $ret;
	    }
	    
	    # also get the most recent output log
	    foreach my $o (glob("$dir/*.out*")) {
		my $f = $o; $f =~ s|^.*/||g;
		# remove most recent out from old pulls, so we don't get dups
		psystem("rm -f $dir/pull.*/$f");
		my $r2 = psystem("ln -s $o $dir/pull.$id/$f");
		twarn "could not link $o to $dir/pull.$id/$f" if $r2;
		$ret = $ret || $r2;
	    }
	}

	return scalar(glob("$dir/pull.*/*"));
    }, $LOGDIR, $pullid, !isok($host));
    }

    # now rsync them -- must do it in this direction on planetlab
    # since nodes don't forward ssh-agent!
    #
    # try to collect logs from old pull.* dirs also for failed pulls
    if (1) { # xxx: ret doesn't seem to be working?
	$ret = psystem("rsync -e ssh -L -azb $user\@$host:'$LOGDIR/pull.*/*' $dest/");

	if ($ret) {
	    twarn "rsync from $host failed! NOT removing logs!";
	} else {
	    #tinfo "succeeded for $host, removing logs";
	    rsystem($user, $host, sub {
		my $dir = shift;
		
		psystem("rm -rf $dir/pull.*");
	    }, $LOGDIR);
	}
    }
}, @logins);

organize:
tinfo "** organizing logs...";

my @all = glob("$PULLDIR/tmp/pull.*/*");
my @logs = glob("$dest/*");

tinfo ((scalar @logs) . " logs collected (" . (scalar @all) . " total)");

my %names;

foreach my $l (@all) {
    my $f = $l; $f =~ s|^.*/||g;
    $f =~ /^([^\.]+)/ or twarn "bad logname: $l";
    $names{$1} = 1;
}

foreach my $n (keys %names) {
    psystem("mkdir -p $PULLDIR/$n") and twarn "could not create $PULLDIR/$n";
}

my $err = 0;
foreach my $l (@all) {
    my $f = $l; $f =~ s|^.*/||g;
    $f =~ /^([^\.]+)/;
    my $name = $1;
    my $ret;
    # most recent output log keeps same name; gets overwritten
    # xxx should not if output log was from an old pull
    if ($l !~ /\.out$/ && -f "$PULLDIR/$name/$l") {
	twarn "$PULLDIR/$name/$l already exists!";
	$ret = 1;
    } else {
	$ret = psystem("mv -f $l $PULLDIR/$name/");
    }
    if ($ret) {
	twarn "could not move $l to $PULLDIR/$name/";
    }
    $err = $err || $ret;
}

if (!$err) {
    tinfo "** removing temp pull dir(s)";
    psystem("rmdir $PULLDIR/tmp/pull.*");
}

tinfo "** done!";
