#!/usr/bin/perl
#
# First start the emulab experiment from emulab's web interface.
# Then run this script from Merc/run/
#
# This script assumes the default names are used for nodes
# (e.g., node0, node1, ...)

BEGIN {
# cmu machines are stupid and still use ssh v1
    $ENV{TRAVERTINE_SSHVERSION} = "1,2";
    $ENV{TRAVERTINE_PRINTCMDS} = 1;
}

use strict;
use Getopt::Std;
use File::Temp qw( :POSIX );
use vars qw($opt_v $opt_x $opt_g $opt_e $opt_o $opt_t $opt_l $opt_T $opt_I $opt_N $opt_c $opt_P $opt_E $opt_C $opt_d $opt_k $opt_s $opt_i $opt_B $opt_w);
use lib "./emulab";
use Travertine;
use EmulabConf;
require "./emulab/common.pl";

our $MAX_PARALLEL = 10;
# xxx: make this a config option and define exactly needs to be in here!
our $UTIL_DIR = "/proj/DNA/jeffpang";

getopts("v:t:e:T:o:xgI:cPE:Cdklsi:Bw");

our $EMULAB = "emulab.net";
if (defined $opt_e) { 
    if ($opt_e =~ /schooner/) { $EMULAB = "schooner.wail.wisc.edu"; }
    elsif ($opt_e =~ /utah/) { $EMULAB = "emulab.net"; }
}
our $VSERVERS = $opt_v || 1;
our $IMAGE    = $opt_i || "LINUX-2431-HZ1024";

## where should this be, really?? we need it here for "sleeping" 
## for the appropriate amount of time
our $TIMELIM  = $opt_t || 30*60;
our $TIMELIMM = $TIMELIM*1000; # in msec
our $WAITTIME = defined $opt_T ? $opt_T  : 600;         # wait max 10 mins by default
our $IGNORES   = defined $opt_I ? $opt_I : "";
our $DISABLE_CACHING = defined $opt_c ? 1 : 0;          # enabled by default;
our $PAUSE_FOREVER = $opt_P;
our $DELETE_OLD_LOGS = !defined $opt_d;
our $CHECK_ONLY = defined $opt_C;
our $KILL = defined $opt_k;
our $GET_LOGS = defined $opt_l;
our $SKIP_BOOTSTRAP_CHECK = defined $opt_B;

our $SAVEDIR = $opt_o || "$PUSHLOGHOST:$PUSHLOGDIR";

if (@ARGV < 3) {
    print STDERR "usage: EmulabRun.pl [options] <app_config> <exp_name> <num_nodes> [app_options]\n\n";
    print STDERR "       ([app_options] are transparently passed to RunV2.pl)\n";
    print STDERR "       \n";
    print STDERR "       NOTE!!!! -t <time> and the timelimit for the actual app\n";
    print STDERR "                have to be correlated!!!!!!!\n";
    print STDERR "       -x       invoke xterms\n";
    print STDERR "       -g       invoke gterm\n";
    print STDERR "       -e       emulab (utah,schooner)\n";
    print STDERR "       -i name  kernel image name (for loading modules)\n";
    print STDERR "       -P       pause forever after execution\n";
    print STDERR "       -t       time to run for (seconds)\n";
    print STDERR "       -T       time to wait for everybody to join\n";
    print STDERR "       -v num   number of virtual servers per node\n";
    print STDERR "       -o login [user\@]host:/dir to push logs to\n";
    print STDERR "       -I list  comma-separated list of to-be ignored nodes\n";
    print STDERR "       -c       DISABLE caching (default enabled)\n";
    print STDERR "       -E       mode {gdb, valgrind, valgrindmem}\n";
    print STDERR "       -d       don't delete any old logs on remote mach before rsync\n";
    print STDERR "       -C       only check the status of the experiments\n";
    print STDERR "       -s       run the program with sudo\n";
    print STDERR "       -k       kill experiment\n";
    print STDERR "       -l       retrieve logs\n";
    print STDERR "       -B       skip bootstrap check\n";
    exit 1;
}

our $app_config = shift @ARGV;
our $exp_name  = shift @ARGV;
our $num_nodes = shift @ARGV;
our $app_args = join " ", map { "\'$_\'" } @ARGV;

die if (!defined $app_config or !defined $exp_name or !defined $num_nodes);
my ($login, $host, $dir) = ($SAVEDIR =~ /^(\w+)\@([^:]+):(.*)$/);
if (!$login || !$host || !$dir) {
	tdie "invalid savedir: $SAVEDIR";
}

if ($CHECK_ONLY) { 
    goto checkstatus; 
}

our %NODE_MAP = GetExperimentNodeMapping ($EMULAB, $exp_name);

our $args = " -t $TOPDIR -l $LOGDIR -L -U -v $VSERVERS -P $MAX_PARALLEL " .
    " --waspnet-dir $TOPDIR/Merc/run/emulab/util/$IMAGE " .
    " --waitjoin " . # need this for the chkjoin thingy
#    " --waspnet-dir $UTIL_DIR/waspnet/$IMAGE " .
    " $app_args";
if ($DISABLE_CACHING) { 
    $args .= " -c ";
}

if ($opt_x) { $args .= " -x "; }
if ($opt_g) { $args .= " -g "; }
if ($opt_E) { $args .= " -E $opt_E "; }
if ($opt_s) { $args .= " -s "; }

our @logins;
our @logins_plain;

my %IGNORES;
map { $IGNORES{$_} = 1 } split(/,/, $IGNORES);

for (my $i=0; $i<$num_nodes; $i++) {
    if ($IGNORES{"node$i"}) {
	next;
    }

    my $hname = $NODE_MAP{"node$i"} . ".$EMULAB";
    push @logins, "$USERNAME\@$hname:node$i";
    push @logins_plain, ["$USERNAME", "$hname"];
}

if (!$KILL) {
    rsystem($login, $host, sub {
	    my $dir = shift;
	    psystem ("mkdir -p $dir");
	    }, $dir);
}

###############################################################################

if ($KILL) {
    tinfo "* killing experiment";
    psystem ("./RunV2.pl $app_config $args -k " . join (" ", @logins));
    tinfo "* stopping auxiliary functions on emulab";
    ParallelExec2(sub {
	    rsystem($_[0], $_[1], sub {
		psystem("sudo killall time-resync.sh >/dev/null 2>&1");
		});
	    }, @logins_plain);
    if ($GET_LOGS) {
	goto getlogs;
    }
    else {
	exit 0;
    }
}

###############################################################################

my $exp = "/tmp/emulab.exp";

## Do preconfiguration
tinfo "* starting auxiliary functions on emulab";
ParallelExec2(sub {
    rsystem($_[0], $_[1], sub {
	my $dir = shift;
	psystem("sudo $dir/kern_recv_bump.sh 15000000");
	psystem("sudo killall time-resync.sh >/dev/null 2>&1");
	psystem("sudo $dir/time-resync.sh >/dev/null 2>&1 &");
    }, $UTIL_DIR);
}, @logins_plain);

## Start the experiment
tinfo "* starting experiment";
psystem("./RunV2.pl $app_config $args " . join(" ", @logins) . " > $exp");

## Wait for experiment to complete
tinfo "* experiment file is: $exp";

if (!$SKIP_BOOTSTRAP_CHECK) {

## Wait for all nodes to start
## schooner blocks all ports except ssh from outside. GAAAAAAAAAAAAH. 
## so, we tunnel this through ssh...
    

#    psystem("$MERCDIR/build/chkjoin --bsaddr $NODE_MAP{node0}.$EMULAB:15000 --maxtime $WAITTIME");
#    my $ev = $? >> 8;

    my $ev = 1;
    my $period = 10;
    for (my $i = 0; $i < $WAITTIME; $i += $period) { 
	my $out = `ssh -n $USERNAME\@$NODE_MAP{node0}.$EMULAB LD_LIBRARY_PATH=$TOPDIR/Merc/ $TOPDIR/Merc/build/chkjoin2 --bsaddr node0:15000 2>/dev/null`;
	chomp $out;
	
	if ($out =~ /all joined/) { 
	    $ev = 0;
	    last;
	}
	elsif ($out =~ /giving up/) {
	    twarn "looks like bootstrap is dead or stuck";
	    $ev++;
	    last if ($ev >= 10);
	}
	else {
	    tinfo "waiting for joining";
	}
	sleep ($period);
    }
    
    if ($ev != 0) { 
	twarn "ERROR in joining -- all nodes do not seem to have joined!";
	goto cleanup;
    }
    else {
	tinfo "all nodes joined SUCCESSFULLY! starting timer...";
    }

}

# apps idle for some time at the end; leave a couple mins slack, what the heck...
my $min = int(($TIMELIM) / 60) + int (($num_nodes * $VSERVERS) / 60) + 2;

tinfo "* should complete in ~$min min...";

for (my $i = 0; $i < $min; $i++) {
    sleep (60);
    tinfo " ** " . ($i + 1) . " mins elapsed..." ;
}

cleanup:

## Stop the experiment
# make sure remote nodes are dead...
if ($PAUSE_FOREVER) {
    sleep(1000000000);
} else {
    tinfo "* killing experiment";
    psystem("./RunV2.pl $app_config $args -k " . join(" ", @logins));
}
unlink $exp;

## Shutdown auxiliary functions on emulab
tinfo "* stopping auxiliary functions on emulab";
ParallelExec2(sub {
    rsystem($_[0], $_[1], sub {
	psystem("sudo killall time-resync.sh >/dev/null 2>&1");
    });
}, @logins_plain);

## Checking time synchronization
if (0) {
    tinfo "* checking time synchronization";

    my $tsync = tmpnam ();
    psystem("./emulab/EmulabTimeTest.pl -e $EMULAB -E $exp_name -n $num_nodes > $tsync");

    psystem("rsync -azb -e ssh -v $tsync $SAVEDIR/TimeSyncInfo.out 1>&2");
    unlink "$tsync";
}

getlogs:
if ($DELETE_OLD_LOGS) {
    tinfo " deleting old logs on the $login\@$host ";
    rsystem($login, $host, sub {
	my $dir = shift;
	my @logs = glob("$dir/*");
	unlink @logs;
    }, $dir);
}
tinfo "* collecting log files to $SAVEDIR";

# don't do more than this many rsyncs at once
my $MAX_PARALLEL_RSYNCS = 7;

for (my $i=0; $i<@logins; $i += $MAX_PARALLEL_RSYNCS) {
    my $begin = $i;
    my $end   = $i+$MAX_PARALLEL_RSYNCS-1 > $#logins ? $#logins : $i+$MAX_PARALLEL_RSYNCS-1;

    ParallelExec2(sub {
	my $pref = shift;
	$pref =~ s/:.*$//;
	my ($user, $host) = split(/@/, $pref);
	
	rsystem($user, $host, sub {
	    my ($pref, $LOGDIR, $SAVEDIR) = @_;
	    tinfo "zipping all logs";
	    my @logs = glob ("$LOGDIR/*.log");
	    foreach my $log (@logs) {
	    #if ($log =~ /DiscoveryLatLog/) {
            #	system("rm -f $log");
	    #} else { 
	        system ("gzip $log");
	    #}
	    }

	    my $stat = psystem("rsync -v -e ssh -azb $LOGDIR/* $SAVEDIR 1>&2");
	    if ($stat) {
		twarn "rsync of $pref:$LOGDIR failed! not deleting logs!";
	    } else {
		# now delete them
		psystem("rm -f $LOGDIR/*");
	    }
	}, $pref, $LOGDIR, $SAVEDIR);

    }, @logins[$begin..$end]);
        
    sleep (2);     # too many ssh connections require some "rest time" for the machine :)
}

tinfo "* waiting 30 secs for rsync's to settle...";
sleep(30);

checkstatus:
## Checking exp status
tinfo "* checking exp status...";

my $rv = Travertine::rsystem2 ($login, $host, \&CheckStatus, $dir);
if ($rv->[0] == 0) {
    tinfo "ok ($rv->[1] logs processed)";
}
elsif ($rv->[0] == -1) { 
    tinfo "error: missing logs (only $rv->[1] logs present)\n";
}
else {
    tinfo "error: $rv->[0] (of $rv->[1]) servers did not finish";
}

tinfo "* zipping logs...";
rsystem ($login, $host, \&Ziplogs, $dir);

sub Ziplogs () {
    my $dir = shift;
    my @logs = glob ("$dir/*.log");
    foreach my $log (@logs) { 
	system ("gzip $log");
    }
}
    
###############################################################################
sub CheckStatus() {
    my $dir = shift;
    my @logs;
    foreach my $l (glob("$dir/OutputLog.*")) {
	push @logs, $l if $l !~ /bootstrap/;
    }
    
    if (scalar(@logs) < ($num_nodes - scalar(keys %IGNORES))*$VSERVERS) {
	tinfo "expected=", ($num_nodes - scalar(keys %IGNORES))*$VSERVERS;
	tinfo " found=", scalar(@logs);
	return [-1, scalar @logs];
    }

    my $failed = 0;
    my $index  = 0;
    foreach my $log (@logs) {
	$index++;
	if ($index % 20 == 0) { 
	    tinfo " ## $index logs processed ";
	}

	my $out = `pcregrep 'hit timelimit' $log`;
	chomp $out;

	if ($out eq '') {
	    $failed++;
	}
    }
    
    return [$failed, $index];
}

###############################################################################
