#!/usr/bin/perl
#
# This script executes a Colyseus application on a remote machine.
#
# The first host will be used as the master host and as the bootstrap host.
#

use strict;
use Net::hostent;
use Socket;
use IO::File;
use POSIX qw(strftime);
use Getopt::Std;
use Travertine;
use Options;
use vars qw($opt_B $opt_o $opt_x $opt_g $opt_k $opt_t $opt_l $opt_L $opt_U
	    $opt_v $opt_P $opt_E $opt_V $opt_G $opt_A $opt_a $opt_c $opt_N
	    $opt_T $opt_J $opt_O $opt_s $opt_h $opt_I $opt_R $opt_Z $opt_Q 
	    $opt_y $opt_M $opt_outlogrot $DISABLE_PUBTRIGGERS 
	    $USE_WASPNET $WASPNET_DIR $WASPNET_IN $WASPNET_OUT $WASPNET_LAT
	    $TOPO2WASPNET 
	    $opt_bsaddr $opt_waitjoin $opt_noclean $opt_nobootstrap $opt_nomaster
	    $NOHACKYSLEEP $MOREHACKYSLEEP $BOOTSTRAP_IDENTMAP $SPBOOTSTRAP
	    $opt_logbufsize $opt_logrotsize $opt_logzip $opt_logflushint
	    $opt_logenableonly $opt_logsampleparams
	    $opt_mercports $opt_joinlocations
	    );

our $BASEDIR = `dirname $0`; chomp $BASEDIR;

###############################################################################

our @MERC_PORTS   = (20000, 20001, 20002, 20003, 20004, 20005, 20006, 20007,
		     20008, 20009, 20010, 20011, 20012, 20013, 20014, 20015,
		     20016, 20017, 20018, 20019, 20020, 20021, 20022, 20023,
		     20024, 20025, 20026, 20027, 20028, 20029, 20030, 20031,
		     20032, 20033, 20034, 20035, 20036, 20037, 20038, 20039,);
# SIDPort         = MERC_PORT + 5000
# TerminalPort    = MERC_PORT + 10000

our $BASE_BROADCAST_LOAD_PORT = 80000;

###############################################################################

# we dont use default values here most of the time since i dont
# want to disturb the getopt-assuming option processing code down below

our @optable = (
### GENERAL STARTUP OPTIONS
	Options::Boolean("Z", "multiple", "start vinstances in one tunnel (can't attach)", \$opt_Z),
	Options::Boolean("B", "bg", "run as bg procs instead of screen (can't attach)", \$opt_B),
        Options::Boolean("#", "nosleep", "no hacky sleeps on startup", \$NOHACKYSLEEP),
	Options::Boolean("#", "moresleep", "more hacky sleeps on startup", \$MOREHACKYSLEEP),
	Options::Boolean("x", "xterm", "invoke xterm (one per daemon)", \$opt_x),
	Options::Boolean("g", "gterm", "invoke gterm (one tab per daemon)", \$opt_g),
	Options::Boolean("k", "kill", "just kill all relevant processes on servers", \$opt_k),

	Options::String("o", "output", "[file] output experiment info to (default stdout)", \$opt_o, undef),
	Options::String("t", "topdir", "[dir] remote pubsub top directory", \$opt_t, undef),
        Options::String("v", "vservers", "[num] number of virtual servers per machine", \$opt_v, undef),
        Options::String("#", "bootstrap", "[host:port] existing bootstrap node", \$opt_bsaddr, undef),
        Options::Boolean("#", "waitjoin", "wait for everyone to join before starting", \$opt_waitjoin),
        Options::Boolean("#", "nobootstrap", "don't start new bootstrap", \$opt_nobootstrap),
	Options::String("#", "joinlocations", "use these join locations (round robin) for bootstrapping merc nodes", \$opt_joinlocations, undef),
        Options::Boolean("#", "nomaster", "don't start new master node", \$opt_nomaster),
        Options::Boolean("#", "noclean", "don't kill and cleanup logs on hosts", \$opt_noclean),
        Options::Boolean("U", "permsudo", "run dir creation/perm change with sudo", \$opt_U),
        Options::Boolean("s", "runsudo", "run the program with sudo", \$opt_s),
	Options::String("P", "maxparallel", "[num] max parallel ssh connections on startup", \$opt_P, undef),
### LOGGING OPTIONS
        Options::String("#", "outlogrot", "[int] rotate output log every x lines (-B, not -Z)", \$opt_outlogrot, undef),
        Options::Boolean("N", "nolog", "DISABLE measurement logging", \$opt_N),
        Options::Boolean("L", "cleanremotelog", "clean + create remote logdir", \$opt_L),
	Options::String("l", "logdir", "[dir] directory to output logs to", \$opt_l, undef),
        Options::String("#", "logenableonly", "[list] comma separated list of logs to enable", \$opt_logenableonly, undef),
        Options::String("#", "logsampleparams", "[list] comma separated list of logname=rate/size", \$opt_logsampleparams, undef),
        Options::String("#", "logflushint", "[int] log flush interval (in msec)", \$opt_logflushint, "60000"),
        Options::String("#", "logbufsize", "[int] memory buffer size of each log (in entries)", \$opt_logbufsize, "1024"),
        Options::String("#", "logrotsize", "[int] size after which to rotate a log (in bytes)", \$opt_logrotsize, "536870912"), # 512MB
        Options::Boolean("#", "logzip", "gzip rotated logs", \$opt_logzip),
        Options::Boolean("#", "flushimmed", "flush logs almost immediately", \$opt_h),
### ARTIFICIAL LATENCY OPTIONS
	Options::Boolean("A", "nolat", "DISABLE artificial latency", \$opt_A),
        Options::String("a", "latency", "[file] latency file name (in Merc/topologies/)", \$opt_a, undef),
	Options::Boolean("#", "waspnet", "use waspnet vserver topology (implies -A)", \$USE_WASPNET),
	Options::String("#", "waspnet-dir", "[dir] remote dir containing waspnet.o, waspnetctl", \$WASPNET_DIR, undef),
        Options::String("#", "waspnet-in", "[int] override inbound node bwidth (kbps)", \$WASPNET_IN, undef),
        Options::String("#", "waspnet-out", "[int] override outbound node bwidth (kbps)", \$WASPNET_OUT, undef),
        Options::String("#", "waspnet-rtt", "[int] override inter-node rtt (msec)", \$WASPNET_LAT, undef),
	Options::String("#", "topo2waspnet", "[path] remote path to Topology2Waspnet.pl", \$TOPO2WASPNET, undef),
        Options::String("y", "slowdown", "[real] slowdown factor", \$opt_y, undef),
### DEBUGGING OPTIONS
        Options::String("E", "mode", "[mode] one of {normal, pprof, heapcheck, heapprof, gdb, valgrind, valgrindmem}", \$opt_E, ""),
        Options::String("V", "verbosity", "[num] debug verbosity level", \$opt_V, -1),
	Options::String("G", "debugfiles", "[files] debug files", \$opt_G, undef),
	Options::String("J", "debugfuncs", "[funcs] debug functions", \$opt_J, undef),
### BOOTSTRAP OPTIONS
        Options::Boolean("#", "spbootstrap", "do not run any apps on bootstrap node", \$SPBOOTSTRAP),
	Options::Boolean("O", "oneperhub", "one node per mercury hub", \$opt_O),
        Options::String("#", "identmap", "[file] ident map file name", \$BOOTSTRAP_IDENTMAP, undef),
### MERCURY OPTIONS
        Options::String("#", "mercports", "comma separated list of ports to use for merc vservers", \$opt_mercports, undef),
	Options::Boolean("c", "nocache", "DISABLE caching", \$opt_c),
        Options::Boolean("#", "nopubtriggers", "DISABLE pub triggers", \$DISABLE_PUBTRIGGERS),
### OM OPTIONS
	Options::Boolean("M", "nomanager", "DISABLE manager options", \$opt_M),
	Options::String("Q", "migpolicy", "[policy] migration policy {static,rand,load}", \$opt_Q, undef),

	Options::Boolean("I", "recload", "record load info", \$opt_I),
	Options::Boolean("R", "bcastload", "broadcast load info (debugging)", \$opt_R),

	Options::String("T", "termpass", "[passwd] terminal password", \$opt_T, undef),
### Let's try to keep this organized. Don't just add more opts at the end :)
	);

our $APP_CONF = shift @ARGV;
Usage() if (!$APP_CONF);
if ($APP_CONF =~ /help/) {
    PrintUsage(\@optable);
    exit 1;
}
	
# application defined parameters
our $APP_DIR;         # directory containing $APP_EXE in relation to $TOPDIR
our $APP_EXE;         # application executable name in $TOPDIR/$APP_DIR
our $APP_HANDLE_ARGS; # function ref to process getopt parameters
our $APP_MASTER_ARGS; # string containing arguments to pass to master server
our $APP_SLAVE_ARGS;  # string containing arguments to pass to slave server
our $APP_INDIV_ARGS;  # function that generates inividual args for each server
our $APP_BOOTSTRAP_ARGS; # additional arguments to bootstrap server
our $APP_TOPO2WASPNET;# path to Topology2Netem.pl rlt to $TOPDIR/$APP_DIR
our $APP_WASPNETDIR;  # path to waspnet module dir (waspnet.o, waspnetctl)
our $APP_LIBPATH;     # library path (to libcolyseus) rlt to $TOPDIR/$APP_DIR
our $APP_BOOTSTRAP;   # path to bootstrap exe rlt to $TOPDIR/$APP_DIR
our $APP_TOPODIR;     # path to artificial topologies (i.e. ../Merc/topologies)
our $APP_SCHEMA;      # default application mercury schema (may be changed)
our $APP_PARAMS_CONF; # default application mercury conf
our @APP_OPT_TABLE;   # application specific options

tdie "bad conf file: '$APP_CONF'" if ! -f $APP_CONF;
no strict;
require "$APP_CONF";
use strict;

if (!defined $APP_DIR) {
    tdie "config variable \$APP_DIR not defined"
}
if (!defined $APP_EXE) {
    tdie "config variable \$APP_EXE not defined"
}
if (!defined $APP_LIBPATH) {
    tdie "config variable \$APP_LIBPATH not defined"
}
if (!defined $APP_BOOTSTRAP) {
    tdie "config variable \$APP_BOOTSTRAP not defined"
}
if (!defined $APP_SCHEMA) {
    tdie "config variable \$APP_SCHEMA not defined"
}
if (!defined $APP_INDIV_ARGS || ref($APP_INDIV_ARGS) ne "CODE") {
    tdie "config variable \$APP_INDIV_ARGS not defined"
}

push @optable, @APP_OPT_TABLE;

@ARGV = ProcessOptions(\@optable, \@ARGV, -complain => 0);
# PrintOptionValues(\@optable);


# cannonicalize format to ( name@host[:iface] strings )
our @logins = 
    map { 
	if ($_ !~ /\@/) { 
	    $_ = "$ENV{USER}\@$_"; 
	}
	if ($_ !~ /:/) {
	    my ($user, $host) = split(/@/, $_);
	    $_ = "$user\@$host:$host";
	}
	$_;
    } @ARGV;

Usage() if (@ARGV == 0);

sub Usage() {
    print STDERR "usage: RunV2.pl AppConf.pl [options] [user1\@]master[:iface] [user2\@]host2[:iface] ...\n\n";
    print STDERR "  You can optionally specify an internal interface after the hostname that\n";
    print STDERR "  will be used as the hostname/IP in the application (needed on emulab).\n";
    print STDERR "  The experiment will start and run *detached* -- that is, the script\n";
    print STDERR "  does not wait until the experiment finishes. Instead it outputs\n";
    print STDERR "  info about the experiment to a filehandle (default stdout) so that\n";
    print STDERR "  you can 're-attach' to it later. The output format is:\n\n";
    print STDERR "    login<tab>'node name'<tab>serialized_remote_daemon_ref\n\n";

    PrintUsage(\@optable);
    exit 1;
}

# handle generic arguments
our $RUN_MULTIPLE    = defined $opt_Z;
our $RUN_AS_BG       = defined $opt_B;
our $KILL            = $opt_k;
our $INVOKE_XTERM    = $opt_x;
our $INVOKE_GTERM    = $opt_g && !$opt_x;

our $LOG_DIR         = $opt_l || "/tmp";
our $CREATE_LOGDIR   = defined $opt_L;
our $USE_SUDO        = defined $opt_U;

our $BOOTSTRAP_LOGIN;

if ($SPBOOTSTRAP) {
    $BOOTSTRAP_LOGIN = shift @logins;
    shift @ARGV;
} else {
    $BOOTSTRAP_LOGIN = $logins[0];
}

our $HOSTS = scalar(@ARGV);
our $VIRTUAL_SERVERS = $opt_v || 1;
our $NSERVERS        = $HOSTS * $VIRTUAL_SERVERS;
#if (defined $SPBOOTSTRAP) {
#    $NSERVERS = (scalar @ARGV - 1) * $VIRTUAL_SERVERS + 1;
#}

our @JOIN_LOCATIONS = ();
our @SCHEMA_INFO = ();
if (defined $opt_mercports) {
    @MERC_PORTS = split(/,/, $opt_mercports);
}
our $MAXPARALLEL     = $opt_P || 16;

our $MODE         = defined $opt_E ? $opt_E : "";
our $FASTLOGFLUSH = defined $opt_h;
our $VERBOSITY    = defined $opt_V ? $opt_V : -1;
our $DBG_FILES    = defined $opt_G ? $opt_G : "";
our $DBG_FUNCS    = defined $opt_J ? $opt_J : "";
our $ONE_NODE_PER_HUB = defined $opt_O;
our $ENABLE_MANAGER_OPTS = !$opt_M;
our $ENABLE_CACHE = !defined $opt_c;
our $USE_LATENCY  = !defined $opt_A && !defined $USE_WASPNET;
our $LATENCY_FILE = defined $opt_a ? $opt_a : "n50.v20.lat";
our $DO_MEASUREMENT = !defined $opt_N;
our $TERM_PASSWD  = $opt_T || "";
our $MIGRATE_POLICY = defined $opt_Q ? $opt_Q : "static";
our $LOADINFO     = $opt_I;
our $BCAST_LI     = $opt_R;
our $SLOWDOWN     = defined $opt_y ? $opt_y : "0";
our $RUNWITHSUDO = defined $opt_s;

our $OUTPUT = *STDOUT;
if ($opt_o && !$opt_k) {
    $OUTPUT = new IO::File(">$opt_o");
    tdie "can't open $opt_o: $!" if !$OUTPUT;
}

if ($USE_WASPNET) {
    tinfo "** Loading vserver map";
    LoadVServerMap("$BASEDIR/../topologies/$LATENCY_FILE");
}

# handle application specific arguments
tdie "config variable \$APP_HANDLE_ARGS not defined" 
    if @APP_OPT_TABLE > 0 && !defined $APP_HANDLE_ARGS;
if (defined $APP_HANDLE_ARGS) {
    if ($USE_WASPNET) {
	no strict;
	&{$APP_HANDLE_ARGS}(1, RemapVServerLogins(@ARGV));
	use strict;
    } else {
	no strict;
	&{$APP_HANDLE_ARGS}($VIRTUAL_SERVERS, @ARGV);
	use strict;
    }
}

if ($USE_WASPNET && (!$APP_TOPO2WASPNET &&!$TOPO2WASPNET || 
		     !$APP_WASPNETDIR && !$WASPNET_DIR)) {
    tdie "Config must define \$APP_TOPO2WASPNET and \$APP_WASPNETDIR if using waspnet, or pass in --waspnet-dir and --topo2waspnet";
}

our $TOPDIR       = defined $opt_t ? "$opt_t/$APP_DIR" : "$ENV{HOME}/$APP_DIR";
our $LIBRARY_PATH   = $APP_LIBPATH;
our $BOOTSTRAP_PROG = $APP_BOOTSTRAP;
our $PROGRAM        = $APP_EXE;
our $MERC_SCHEMA    = $APP_SCHEMA;
our $TOPODIR        = $APP_TOPODIR;
our $PARAMS_CONF    = $APP_PARAMS_CONF || "params.conf";

# override app defaults                                                       
$TOPO2WASPNET = $TOPO2WASPNET ? $TOPO2WASPNET : "$TOPDIR/$APP_TOPO2WASPNET";
$WASPNET_DIR   = $WASPNET_DIR ? $WASPNET_DIR : "$TOPDIR/$APP_WASPNETDIR";

our $STARTED         = 0;
our $TOTAL_INSTANCES = 0;

$TOTAL_INSTANCES = scalar @logins * $VIRTUAL_SERVERS;

# map from "$iface:$vserv" => "$vaddr" if $USE_WASPNET is enabled
our %VSERVER_MAP;

if (defined $opt_joinlocations) {    # does not imply opt_nomaster; 
    $opt_nobootstrap = 1;
    @JOIN_LOCATIONS = split(/,/, $opt_joinlocations);
    ParseSchema("$TOPDIR/$APP_SCHEMA");
}

# parse mercury schema file
sub ParseSchema($) {
    my $file = shift;
    
    open F, $file or die "cant open $file: $!";
    while (<F>) {
	chomp;
	next if (/^#/ or /^\s*$/);
	my ($attr, $min, $max) = split(/\s+/, $_);
	push @SCHEMA_INFO, [ $attr, $min, $max ];	
    }
}

###############################################################################

# create ssh tunnels
our @ssh = ParallelExec2(sub {
    my $login = shift;
    my ($user, $host, $iface) = SplitLogin($login);
    my $ssh = Travertine::SSHTunnel->new($user, $host);
    tdie "can't open connection to $login" if !$ssh;
    return $ssh;
}, @logins);

my ($bsuser, $bshost, undef) = SplitLogin($BOOTSTRAP_LOGIN); 
my $bsssh = ssh($bsuser, $bshost);
my $bootstrap_server = [ $BOOTSTRAP_LOGIN, $bsssh, 1 ];
if ($SPBOOTSTRAP) {
   push @ssh, $bsssh;
}

# ( [ login, ssh, num_to_start ] )
our @servers;
for (my $i=0; $i<@logins; $i++) {
    $servers[$i] = [ $logins[$i], $ssh[$i], $VIRTUAL_SERVERS ];
}

my $master = shift @servers;

my ($bsUser, $bsHost, $bsIface) = SplitLogin($bootstrap_server->[0]);
my ($mUser, $mHost, $mIface) = SplitLogin($master->[0]);

unshift @servers, $master;

our $BOOTSTRAP_ADDR;
if (!$opt_bsaddr) {
    $BOOTSTRAP_ADDR = "$bsIface:15000";
} else {
    $BOOTSTRAP_ADDR = $opt_bsaddr;
}

###############################################################################
# Arguments

my $cachesize    = int(2 * log($NSERVERS)/log(2));
# my $maxttl       = $NSERVERS + 10;
my $maxttl = 30;
my $latency_file = "$TOPODIR/$LATENCY_FILE";

my $cache_args = "";
if ($ENABLE_CACHE) {
    $cache_args = "--cache --cachesize $cachesize";
}

my $latency_args = "";
if ($USE_LATENCY) {
    $latency_args = " --latency --latency-file $latency_file ";
}

my $measurement_args = "";
if ($DO_MEASUREMENT) {
#    $measurement_args = " --measurement --log-binary --log-dir '$LOG_DIR' " .
#	"--record-obj-deltas --record-obj-interests ";
    $measurement_args = " --measurement --log-aggregate --log-dir '$LOG_DIR'" .
	" --log-rotate-size $opt_logrotsize " .
	($opt_logzip ? " --log-gzip " : "");
}

my $log_args = "";
if ($FASTLOGFLUSH) {
    $log_args = " --log-flush-interval 500 --log-buffer-size 128 ";
} else {
    $log_args = " --log-flush-interval $opt_logflushint " .
	" --log-buffer-size $opt_logbufsize ";
}
$log_args .= " --log-enable-only $opt_logenableonly " 
    if defined $opt_logenableonly;
$log_args .= " --log-sample-params $opt_logsampleparams "
    if defined $opt_logsampleparams;

# XXX HACK - how to make sure the load hub is the same as in schema?
my $load_args  = ($LOADINFO ? "--loadinfo --loadinfo-hub load " : "");

my $bcast_args = "";
if ($BCAST_LI) {
    my @hosts = map { my @trip = SplitLogin($_); $trip[2] } @logins;
    my $addrs;
    for (my $i=0; $i<$VIRTUAL_SERVERS; $i++) {
	my $port = $BASE_BROADCAST_LOAD_PORT + $i;
	foreach my $h (@hosts) {
	    $addrs .= "," if $addrs;
	    $addrs .= "$h:$port";
	}
    }

    $bcast_args = "--loadinfo-bcast --loadinfo-bcast-others '$addrs' ";
}

my $manager_opts = "";
if ($ENABLE_MANAGER_OPTS) {
    $manager_opts = 
	($opt_waitjoin ? "--waitjoin " : "") .
	"--proactive " .
	"--term-passwd '$TERM_PASSWD' " .
	"--migrate '$MIGRATE_POLICY' ";
}

my $pubtrigger_opts = "--pubtriggers ";
if ($DISABLE_PUBTRIGGERS) {
    $pubtrigger_opts = "";
}

my $common_args =
    "--verbosity $VERBOSITY " .
    "--rconfig $PARAMS_CONF " .
    "--debugfiles '$DBG_FILES' " .
    "--debugfuncs '$DBG_FUNCS' " .
    "$pubtrigger_opts " .
    "--maxttl $maxttl " .
    "--fanoutpubs " .
    "$manager_opts " .
    "$measurement_args " .
    "$latency_args " .
    "$cache_args " .
    "$log_args " .
    "$load_args $bcast_args ";

my $master_args = "$common_args $APP_MASTER_ARGS";
my $slave_args  = "$common_args $APP_SLAVE_ARGS";

###############################################################################

if ($KILL) { 
    tinfo "** Cleaning processes; NOT cleaning the logs";
    ParallelExec3($MAXPARALLEL, \&StopServer, @ssh);
    exit (0);
}

# do complete cleanup only when starting an experiment...
if (!$opt_noclean) {
    tinfo "** Cleaning up processes and clearing old logs"; 
    ParallelExec3($MAXPARALLEL, \&CleanServer, @ssh);
}

if ($USE_WASPNET) {
    tinfo "** Loading vserver map";
    LoadVServerMap("$BASEDIR/../topologies/$LATENCY_FILE");
    tinfo "** Configuring virtual topology on remote machines"; 
    my @ret = ParallelExec3($MAXPARALLEL, \&InitServer, @servers);
    foreach my $r (@ret) {
	tdie "failed to configure virtual topology!" if !$r;
    }
}

my @refs;
for (my $i=0; $i<@servers; $i++) {
    $refs[$i] = [];
}

if (!$opt_nobootstrap) {
    # start bootstrap server
    tinfo "** Starting bootstrap server...";
    # bootstrap will start on a non-virtualized address (no emulation)
    my $bootstrap = StartBootstrap($bootstrap_server->[0], $bootstrap_server->[1]);
    tdie "couldn't start bootstrap" if !$bootstrap;
    my $bsref = [ $bootstrap, "bootstrap (" . ToHost($bootstrap_server->[0]) . ")" ];
    if ($SPBOOTSTRAP) {
	push @refs, [ $bsref ];
    } else {
	push @{$refs[0]}, $bsref;
    }

    while (1) {
	# wait for it to startup 
	#sleep 1;
	my $tail = $bootstrap->headLog(300);
	if ($tail =~ /read schema file successfully/) {
	    last;
	} else {
	    tinfo "bootstrap not ready yet... (log):\n" . thighlight($tail);
	    if (! $bootstrap->isAlive()) {
		tdie "bootstrap is dead!";
	    }
	} 
    }
}

if (!$opt_nomaster) {
    # start master server
    tinfo "** Starting master server...";
    
    my $mserver = StartProgram($master->[0], $master->[1], 0, $master_args);
    
    tdie "couldn't start master" if !$mserver;
    push @{$refs[0]}, [ $mserver, "master (" . ToHost($master->[0]) . ")" ];
    
    while (1) {
	# wait for it to startup
	#sleep 1;
	my $head = $mserver->headLog(100);
	# XXX fixme: print something out so we know we're inited
	if ($head =~ /Current option values/) {
	    last;
	} else {
	    tinfo "master not ready yet... (log):\n" . thighlight($head);
	    if (! $mserver->isAlive()) {
		tdie "master is dead!";
	    }
	}
    }
}

# start the slave servers
tinfo "** Starting slave servers...";

# XXX: starting 25 at a time seems okay... more than that,
# the bug below appears

$MAXPARALLEL = $NOHACKYSLEEP ? $MAXPARALLEL : int (20 / $VIRTUAL_SERVERS);  
if ($MAXPARALLEL < 1 || $MOREHACKYSLEEP) { 
    $MAXPARALLEL = 1;
}

# this coz if nodes join VERY fast, things go 
# bad in the ring every once in a while. the reason 
# is that a large number of nodes can end up joining 
# between myself and my successor. so getting to the 
# correct successor in the current joining algorithm 
# takes a very long amount of time. 

# there are multiple possible fixes which need to be 
# evaluated when there's time 
#    - go back to the changing succ -> update succ model
#    - route a message to a value (mymax + 1) and start
#      from there.

my @srefs = ParallelExec3($MAXPARALLEL, sub {
    my $server = shift;
    my $index  = 0;

    my @refs = ();

    if ($RUN_MULTIPLE) {

	my @instances;
	while ($server->[2]-- > 0) {
	    if (!$opt_nomaster &&
		$server->[0] eq $master->[0] && $index == 0) {
		$index++;
		next;
	    }

	    push @instances, [$index];
	    
	    $index++;
	}

	if (@instances) {
	    my $sserver = StartMultiple($server->[0], $server->[1], 
					$slave_args, @instances);
	    
	    tdie "couldn't start slaves on $server->[0]" if !$sserver;
	    push @refs, [ $sserver, "slaves " . ToHost($server->[0]) ];
	}
    } else {
	
	while ($server->[2]-- > 0) {
	    # already started the master, skip it
	    if (!$opt_nomaster &&
		$server->[0] eq $master->[0] && $index == 0) {
		$index++;
		next;
	    }
	    
	    my $sserver = StartProgram($server->[0], $server->[1],
				       $index, $slave_args);
	    tdie "couldn't start slave $server->[0] \#$index" if !$sserver;
	    push @refs, [ $sserver, ToHost($server->[0]) . ":$index" ];
	    
	    $index++;
	    #sleep (2);
	}
	
    }
   
    my $slp = 0;
    if (!$NOHACKYSLEEP) {
	$slp = $VIRTUAL_SERVERS * 3 if ($NSERVERS > 80);
	$slp *= 3 if ($MODE =~ /valgrind/);
    } 

    if ($MOREHACKYSLEEP) {
	$slp = 10 if $slp < 10;
    }
    
    tinfo "* sleeping briefly for $slp seconds... ";
    sleep ($slp);
    return \@refs;
}, map { $_ = [ $_ ]; } @servers);  

for (my $i=0; $i<@srefs; $i++) {
    foreach my $ref (@{$srefs[$i]}) {
	push @{$refs[$i]}, $ref;
    }
}

# invoke terminal if requested
if ($INVOKE_GTERM) {

    if ($RUN_AS_BG) {
	twarn "can't attach to bg funcs";
    } else {
	# gather all daemon references
	my @allrefs;
	my @alltitles;
	for (my $i=0; $i<@refs; $i++) {
	    foreach my $ref (@{$refs[$i]}) {
		push @allrefs, $ref->[0];
		push @alltitles, $ref->[1];
	    }
	}

	Travertine::RemoteDaemon::AttachToGterm(\@allrefs, 
						-titles => \@alltitles);
    }
}


# everything started!
tinfo "** Everything started!";

# print out the exeriment for later attachment (if required)
push @logins, $BOOTSTRAP_LOGIN if $SPBOOTSTRAP; # xxx man this is hacky
for (my $i=0; $i<@refs; $i++) {
    my $login = $logins[$i];
    foreach my $ref (@{$refs[$i]}) {
	my $title = $ref->[1];
	my $str   = $ref->[0]->serialize;

	print $OUTPUT "$login\t'$title'\t$str\n";
    }
}

###############################################################################
sub Run
{
    my ($ssh, $func, $args, $log, $title) = @_;

    my %opts = ( -daemon => 1, 
		 -log => $log,
		 -title => $title );
    if ($RUN_AS_BG) {
	%opts = ( -background => 1,
		  -timeout => 60,
		  -log => $log );
	if ($opt_outlogrot && $opt_outlogrot > 0) {
	    $opts{-rotate} = $opt_outlogrot;
	}
    }

    my $ref = ExecRemoteFunc($ssh, $func, $args, %opts);
    if (!$ref) {
	return undef;
    }

    if ($INVOKE_XTERM) {
	if ($RUN_AS_BG) {
	    twarn "can't attach to a bg func";
	} else {
	    $ref->attachToXterm();
	}
    }

    return $ref;
}

sub RunBootstrap
{
    my ($topdir, $libpath, $mode, $prog, $args, $runwithsudo)  = @_;
    my $now_string;
    
    chdir($topdir) or tdie "can't change to $topdir: $!";
    $ENV{LD_LIBRARY_PATH} .= ":$libpath";

    my $cmd;
    if ($mode eq 'gdb') {
	my $temp = "/tmp/RunBootstrap.$$";
	open(T, ">$temp") || die $!;
	print T "handle SIGUSR2 nostop\n";
	print T "exec-file $prog\n";
	print T "r $args\n";
	close(T);
	$cmd = "gdb -x $temp $prog";
    } elsif ($mode eq 'valgrind') {
#	$cmd = "valgrind --tool=memcheck --db-attach=yes --suppressions=realnet.supp --gen-suppressions=yes --num-callers=5 $prog $args";
	$cmd = "valgrind --tool=memcheck --num-callers=10 $prog $args";
    } elsif ($mode eq 'valgrindmem') {
	$cmd = "valgrind --tool=addrcheck --leak-check=yes --show-reachable=yes $prog $args";
    } else {
	$cmd = "$prog $args";
    }

    $now_string = strftime "%H:%M:%S", localtime;
    tinfo "starting at [$now_string]";

    if ($runwithsudo) {
	psystem("sudo sh -c \"LD_LIBRARY_PATH=$libpath $cmd\"");
    }
    else {
	psystem ($cmd);
    }

    $now_string = strftime "%H:%M:%S", localtime;
    if ($? == -1) {
	my $pwd = `pwd`; chomp $pwd;
	twarn "[$now_string] failed to execute $prog ($pwd): $!";
    }
    elsif ($? & 127) {
	twarn sprintf "[$now_string] child died with signal %d, %s coredump",
	($? & 127),  ($? & 128) ? "with ":  " without ";
    }
    else {
	twarn sprintf "[$now_string] child exited with value %d", $? >> 8;
    }
}

sub RunMultiple
{
    my ($topdir, $libpath, $prog, $mode, $log_dir, $runwithsudo,
	@instances) = @_;

    my $now_string;
    my $started = 0;
    my $START_BOUND = 10;

    foreach my $i (@instances) {
	my ($if, $vindex, $port, $outlog, $args) = @$i;

	# XXX - someway to avoid duplicating this code from Run
	# (todo this we have to export a shared function...
	chdir($topdir);
	$ENV{LD_LIBRARY_PATH} .= "$ENV{HOME}/lib:$ENV{HOME}/local/lib";
	$ENV{LD_LIBRARY_PATH} .= ":$libpath";
	
	my $cmd;
	if ($mode eq 'gdb') {
	    my $temp = "/tmp/RunLocalTest.master.$$";
	    open(T, ">$temp") || die $!;
	    print T "handle SIGUSR2 nostop\n";
	    print T "exec-file $prog\n";
	    print T "r $args\n";
	    close(T);
	    $cmd = "gdb -x $temp $prog";
	} elsif ($mode eq 'valgrind') {
	    $cmd = "valgrind --tool=memcheck --num-callers=6 $prog $args";
	} elsif ($mode eq 'valgrindmem') {
	    $cmd = "valgrind --tool=addrcheck --leak-check=yes --show-reachable=yes $prog $args";
	} elsif ($mode eq 'pprof' || $mode eq 'pprof1' && $vindex == 0) {
	    $cmd = "CPUPROFILE=$log_dir/Profile.$if:$port.out $prog $args";
	} elsif ($mode eq 'heapcheck' || $mode eq 'heapcheck1' && $vindex == 0) {
	    $cmd = "HEAPCHECK=normal $prog $args";
	} elsif ($mode eq 'heapprof' || $mode eq 'heapprof1' && $vindex == 0) {
	    $cmd = "HEAPPROFILE=/tmp/HeapProfile.$if:$port $prog $args";
	} else {
	    $cmd = "$prog $args";
	}

	my $pid = fork();
	if ($pid > 0) {
	    # parent
	    tinfo "started vinstance $vindex at $if:$port (pid $pid)";
	    $started++;

	    if ($started > $START_BOUND) { 
		$started = 0;
		tinfo "waiting for a bit; starting too fast is bad";
		sleep (10);
	    }
	    next;
	} elsif (defined $pid) {
	    # child

	    $now_string = strftime "%H:%M:%S", localtime;
	    #tinfo "starting at [$now_string]";
	    #tinfo "cmd: $cmd";

	    close(STDOUT);
	    close(STDERR);
	    open(STDOUT, ">$outlog");
	    open(STDERR, ">&STDOUT");

	    tinfo "starting at [$now_string]";

	    if ($runwithsudo) {
		psystem("sudo sh -c \"LD_LIBRARY_PATH=$libpath $cmd\"");
	    }
	    else {
		psystem ("$cmd");
	    }
	    
	    $now_string = strftime "%H:%M:%S", localtime;

	    if ($? == -1) {
		twarn "[$now_string] failed to execute: $!";
	    }
	    elsif ($? & 127) {
		twarn sprintf "[$now_string] child died with signal %d, %s coredump",
		($? & 127),  ($? & 128) ? "with ":  " without ";
	    }
	    else {
		tinfo sprintf "[$now_string] child exited with value %d", $? >> 8;
	    }
	} else {
	    twarn "failed to start up virtual instance $vindex!";
	}
	
	exit 0;
    }

    while (wait >= 0) {}

    tinfo "all childs exited ! going home";
}

sub RunProgram
{
    my ($topdir, $libpath, $prog, $args, $mode, $log_dir, 
	$if, $port, $vindex, $runwithsudo) = @_;
    my $now_string;
    
    chdir($topdir);
    $ENV{LD_LIBRARY_PATH} .= "$ENV{HOME}/lib:$ENV{HOME}/local/lib";
    $ENV{LD_LIBRARY_PATH} .= ":$libpath";

    my $cmd;
    if ($mode eq 'gdb') {
	my $temp = "/tmp/RunLocalTest.master.$$";
	open(T, ">$temp") || die $!;
	print T "handle SIGUSR2 nostop\n";
	print T "exec-file $prog\n";
	print T "r $args\n";
	close(T);
	$cmd = "gdb -x $temp $prog";
    } elsif ($mode eq 'valgrind') {
#	$cmd = "valgrind --tool=memcheck --db-attach=yes --suppressions=realnet.supp --gen-suppressions=yes --num-callers=5 $prog $args";
	$cmd = "valgrind --tool=memcheck --num-callers=6 $prog $args";
    } elsif ($mode eq 'valgrindmem') {
	$cmd = "valgrind --tool=addrcheck --leak-check=yes --show-reachable=yes $prog $args";
    } elsif ($mode eq 'pprof' || $mode eq 'pprof1' && $vindex == 0) {
	$cmd = "CPUPROFILE=$log_dir/Profile.$if:$port.out $prog $args";
    } elsif ($mode eq 'heapcheck' || $mode eq 'heapcheck1' && $vindex == 0) {
	$cmd = "HEAPCHECK=normal $prog $args";
    } elsif ($mode eq 'heapprof' || $mode eq 'heapprof1' && $vindex == 0) {
	$cmd = "HEAPPROFILE=/tmp/HeapProfile.$if:$port $prog $args";
    } else {
	$cmd = "$prog $args";
    }

    #psystem("netstat -u -s > $log_dir/Netstat.$if:$port.out");
    $now_string = strftime "%H:%M:%S", localtime;
    tinfo "starting at [$now_string]";
    
    if ($runwithsudo) {
	psystem("sudo sh -c \"LD_LIBRARY_PATH=$libpath $cmd\"");
    }
    else {
	psystem ($cmd);
    }
    
    $now_string = strftime "%H:%M:%S", localtime;
    if ($? == -1) {
	twarn "[$now_string] failed to execute: $!";
    }
    elsif ($? & 127) {
	twarn sprintf "[$now_string] child died with signal %d, %s coredump",
	($? & 127),  ($? & 128) ? "with ":  " without ";
    }
    else {
	tinfo sprintf "[$now_string] child exited with value %d", $? >> 8;
    }
    #psystem("netstat -u -s >> $log_dir/Netstat.$if:$port.out");
}

sub StartBootstrap
{
    my ($login, $ssh) = @_;
    my ($user, $host, $iface) = SplitLogin($login);

    # if $USE_WASPNET still have bootstrap run on a non-virtualized address

    tinfo "** Starting Bootstrap on: $iface:15000";
    return Run($ssh, \&RunBootstrap, 
	       [ $TOPDIR, $LIBRARY_PATH, $MODE, $BOOTSTRAP_PROG,
		 " -v $VERBOSITY "         .
		 ($APP_BOOTSTRAP_ARGS ? " $APP_BOOTSTRAP_ARGS " : "") .
		 ($ONE_NODE_PER_HUB ? "--onenodehub " : "") . 
		 "--schema $MERC_SCHEMA " . 
		 "--hostname $iface --histograms --buckets 35 " .
		 ($SLOWDOWN > 1 ? " --slowdown-factor $SLOWDOWN " : " ") .
		 ($opt_waitjoin ? "--nservers $NSERVERS" : "") .
		 ($RUNWITHSUDO ? " --sched-rr " : "") .
		 ($BOOTSTRAP_IDENTMAP ? " --policy ident-map --ident-map $BOOTSTRAP_IDENTMAP " : ""), $RUNWITHSUDO ],
	       "$LOG_DIR/OutputLog.bootstrap.out",
	       "bootstrap ($host)");
}

sub GetSomeCommonArgs {
    my ($login, $vindex, $mercPort) = @_;
    my $args = "";
	
    if ($BCAST_LI) {
	my $port = $BASE_BROADCAST_LOAD_PORT + $vindex;
	$args .= " --loadinfo-bcast-port $port ";
    }
    if (defined $opt_joinlocations) {
	my $ss = " --schema-str ";
	my $jl = " --join-locations ";
	my $loc = shift @JOIN_LOCATIONS;

	foreach my $hub (@SCHEMA_INFO) { 
	    $ss .= "," if ($ss =~ /:/);  # hacky way to avoid adding it for the first one
	    $ss .= join(":", @{$hub}) . ":true"; # XXX join all hubs!

	    $jl .= "," if ($jl =~ /:/);
	    $jl .= $hub->[0] . ":" . $loc;
	}
	
	push @JOIN_LOCATIONS, $loc;
	$args .= " $ss $jl ";
    }
    else {
	$args .= " --bootstrap '$BOOTSTRAP_ADDR' ";
    }
    
    $args .= &{$APP_INDIV_ARGS}($login, $vindex, $mercPort, \@logins, $VIRTUAL_SERVERS);

    $args .= " --sched-rr" if ($RUNWITHSUDO);
     # $args .= " --use-poll ";
    $args .=  ($SLOWDOWN > 1 ? " --slowdown-factor $SLOWDOWN " : " ");
    return $args;
    
}

sub StartProgram
{
    my ($login, $ssh, $vindex, $args) = @_;
    my ($user, $host, $iface) = SplitLogin($login);

    if ($USE_WASPNET) {
	# each vserver gets its own address, so no need for vports
	$iface = GetVServerAddr($iface, $vindex);
	$login = JoinLogin($user, $host, $iface);
	$vindex = 0;
    }
    
    my $mercPort = $MERC_PORTS[$vindex];
    
    $args .= " --hostname $iface --port $mercPort";
    $args .= GetSomeCommonArgs($login, $vindex, $mercPort);

    tinfo "* Starting $APP_EXE on: ($host) $iface:$mercPort";

    #tinfo "*      merc-port   : $mercPort";
    #tinfo "*      direct-port : " . ($mercPort+5000);
    #tinfo "*      term-port   : " . ($mercPort+10000);

    return Run($ssh, \&RunProgram,
	       [$TOPDIR, $LIBRARY_PATH, "./$PROGRAM", $args, $MODE, 
		$LOG_DIR, $iface, $mercPort, $vindex, $RUNWITHSUDO ],
	       "$LOG_DIR/OutputLog.$iface:".($mercPort+5000).".out",
	       "$host:$mercPort");
}

sub StartMultiple
{
    my ($login, $ssh, $base_args, @instances) = @_;
    my ($user, $host, $iface) = SplitLogin($login);

    my $orig_iface = $iface;
    
    foreach my $i (@instances) {
	my($vindex) = @$i;

	if ($USE_WASPNET) {
	    # each vserver gets its own address, so no need for vports
	    $iface  = GetVServerAddr($orig_iface, $vindex);
	    $login  = JoinLogin($user, $host, $iface);
	    $vindex = 0;
	}

	my $mercPort = $MERC_PORTS[$vindex];
	my $args = $base_args;
	$args .= " --hostname $iface --port $mercPort";

	$args .= GetSomeCommonArgs($login, $vindex, $mercPort);

	tinfo "* Starting $APP_EXE on: ($host) $iface:$mercPort";

	unshift @$i, $iface;
	push @$i, $mercPort;
	push @$i, "$LOG_DIR/OutputLog.$iface:".($mercPort+5000).".out";
	push @$i, $args;
    }

    return Run($ssh, \&RunMultiple,
	       [$TOPDIR, $LIBRARY_PATH, "./$PROGRAM", $MODE, 
		$LOG_DIR, $RUNWITHSUDO, @instances],
	       "$LOG_DIR/RunLog.$orig_iface.out",
	       "$host");
}

sub InitServer
{
    my ($login, $ssh) = @_;

    my $host = ToHost($login);

    tdie "invalid server: $host : $ssh" if !$host || !$ssh;

    my (undef, undef, $ret) = ExecRemoteFunc($ssh, sub {
	my $topo2waspnet = shift;
	my $waspnetdir   = shift;
	my $latfile      = shift;
	my $slowdown     = shift;
	my $inbound      = shift;
	my $outbound     = shift;
	my $rtt          = shift;
	my $nhosts       = shift;
	my $vservers     = shift;
	my $addr         = shift;

	my $cmd = "$topo2waspnet -d $waspnetdir -n $nhosts -v $vservers " .
	    ($slowdown ? " -y $slowdown " : "") . 
	    ($inbound ? " -I $inbound " : "") .
	    ($outbound ? " -O $outbound " : "") .
	    ($rtt ? " -L $rtt " : "") .
	    " $latfile $addr";
	if ( psystem("$cmd > /tmp/$addr.waspnet") ) { 
	    twarn("failed to execute '$cmd' on $addr; see above messages");
	    return(undef);
	}
	if ( psystem("sudo sh /tmp/$addr.waspnet > /tmp/$addr.waspnet.out 2>&1") ) {
	    twarn("failed to execute waspnet script:");
	    psystem("cat /tmp/$addr.waspnet.out 1>&2");
	    return(undef);
	}

	return 1;
    }, [$TOPO2WASPNET, $WASPNET_DIR, "$TOPDIR/$APP_TOPODIR/$LATENCY_FILE", $SLOWDOWN, $WASPNET_IN, $WASPNET_OUT, $WASPNET_LAT, $HOSTS, $VIRTUAL_SERVERS, $host], -print => 1, -timeout => 300 );

    return $ret;
}

sub CleanServer
{
    my ($ssh) = shift @_;

    ExecRemoteFunc($ssh, sub {
	my $exe     = shift;
	my $logdir  = shift;
	my $usesudo = shift;
	my $runwithsudo = shift;
	
	if ($logdir) {
	    psystem(($usesudo?"sudo ":"") . "rm -f $logdir/*.log");
	    psystem(($usesudo?"sudo ":"") . "rm -f $logdir/*.out");
	    psystem(($usesudo?"sudo ":"") . "mkdir -p $logdir");
	    psystem(($usesudo?"sudo ":"") . "chmod 777 $logdir");
	}
#	psystem("killall screen       >/dev/null 2>&1");

	my $killall = "killall";
	$killall = "sudo killall" if ($runwithsudo);

	psystem("$killall -1 bootstrap >/dev/null 2>&1");
	psystem("$killall -1 $exe      >/dev/null 2>&1");
	psystem("$killall -1 gdb       >/dev/null 2>&1");
	psystem("$killall -1 valgrind  >/dev/null 2>&1");
	
	#sleep 1;
	psystem("$killall -9 bootstrap >/dev/null 2>&1");
	psystem("$killall -9 $exe      >/dev/null 2>&1");
	psystem("$killall -9 gdb       >/dev/null 2>&1");
	psystem("$killall -9 valgrind  >/dev/null 2>&1");

	sleep 1;

	while (`ps ax | grep $exe | grep -v grep` =~ $exe) {
	    tinfo "$exe not dead yet, waiting...";
	    sleep 5;
	} 
    }, [$PROGRAM, $CREATE_LOGDIR ? $LOG_DIR : undef, $USE_SUDO, $RUNWITHSUDO], 
		   -print => 1, -timeout => 60 );
}

# dont clean the logs, please!
sub StopServer
{
    my ($ssh) = shift @_;

    ExecRemoteFunc($ssh, sub {
	my $exe     = shift;
	my $runwithsudo = shift;

	my $killall = "killall";
	$killall = "sudo killall" if ($runwithsudo);
	
	psystem("$killall -1 bootstrap >/dev/null 2>&1");
	psystem("$killall -1 $exe      >/dev/null 2>&1");
	psystem("$killall -1 gdb       >/dev/null 2>&1");
	psystem("$killall -1 valgrind  >/dev/null 2>&1");

	psystem("$killall -9 bootstrap >/dev/null 2>&1");
	psystem("$killall -9 $exe      >/dev/null 2>&1");
	psystem("$killall -9 gdb       >/dev/null 2>&1");
	psystem("$killall -9 valgrind  >/dev/null 2>&1");
    }, [$PROGRAM, $RUNWITHSUDO], -print => 1, -timeout => 60 );
}

sub ResolveIP($)
{
    my $host = shift;

    if ($host !~ /\d+\.\d+\.\d+\.\d+/) {
	my $hent = gethost($host);
	die "bad hostname: $host -- $?" if
	    !defined $hent || @{$hent->addr_list} < 1;
	$host = inet_ntoa($hent->addr_list->[0]);
    }

    return $host;
}

sub SplitLogin($)
{
    my $login = shift;
    
    my ($user, $host, $iface) = ($login =~ /^([^\@]+)\@([^:]+):([^:]+)$/);
    die "bad login: $login" if !$user || !$host || !$iface;

    #$host  = ResolveIP($host);
    #$iface = ResolveIP($iface);

    return ($user, $host, $iface);
}

sub JoinLogin($$$)
{
    my ($user, $host, $iface) = @_;
    return "$user\@$host:$iface";
}

sub ToHost($)
{
    my ($user, $host, $iface) = SplitLogin(shift);
    return $iface;
}

sub LoadVServerMap($)
{
    my $topo = shift;
    my %vserv_count = ();

    open(T, "<$topo") or tdie "can't open vserver topology $topo: $!";
    while (<T>) {
	if (/^node\s+\d+\s+([^\t ]+)\s+([^\t ]+)/) {
	    my ($addr, $vaddr) = ($1, $2);
	    my $index = $vserv_count{$addr} || 0;
	    $vserv_count{$addr}++;
	    $VSERVER_MAP{"$addr:$index"} = $vaddr;
	} elsif (/^(\d+),(\d+)\s+([\d\.]+)/) {
	    last;
	} else {
	    twarn "bad line in $topo: $_";
	}
    }
    close(T);
}

sub RemapVServerLogins(@)
{
    my @logins = @_;
    my @ret;

    foreach my $login (@logins) {
	my ($user,$host,$iface) = SplitLogin($login);

	for (my $i=0; $i<$VIRTUAL_SERVERS; $i++) {
	    my $vaddr = GetVServerAddr($iface,$i);
	    push @ret, JoinLogin($user,$host,$vaddr);
	}
    }
    return @ret;
}

sub GetVServerAddr($$)
{
    my $iface = shift;
    my $vindex = shift;

    my $ret = $VSERVER_MAP{"$iface:$vindex"};
    tdie "no vserver for $iface:$vindex (should you have specified hostnames/ipaddrs? check the topo file)" if !defined $ret;
    return $ret;
}

