#!/usr/bin/perl

use strict;
use Travertine;
use lib "planetlab";
use PlanetLabConf;

our $EXP = shift @ARGV;

tdie "missing exp" if !$EXP;

our $VERSION = "0.9.1";
our $PORT = $MERCPORT+10000;

# detect dead nodes
our $INPUT = new IO::File("<$EXP");
tdie "can't open $EXP: $!" if !$INPUT;

our %seen;
our @logins;
while (<$INPUT>) {
    chomp $_;
    my ($login, $title, $ser) = ($_ =~ /^([^\t]+)\t'([^\t]+)'\t([^\t]+)$/);
    tdie "bad line: $_" if !defined $login or !defined $title or !$ser;
    
    my ($user, $host, $iface) = ($login =~ /(.*)\@(.*):(.*)/);
    
    next if $title =~ /bootstrap/;
    
    if ($user && $host) {
	if ($seen{$host}) {
	    tdie "can't handle multiple vservers on a host!";
	}
	push @logins, [$user, $host];
	$seen{$host} = 1;
    } else {
	twarn "bad login: $login";
    }
}

my $cmd = "$ENV{HOME}/Merc/run/TermExec.pl -o -b $PORT -p $TERMPASS $EXP QV";
my $out = `$cmd`;
my @lines = split(/\n/, $out);

my %resps;
my $curr = undef;
foreach my $line (@lines) {
    if ($line =~ /^[\w\d_-]+\@([\w\d_\-\.]+):([\w\d_\-\.]+)/) {
        my ($host, $iface) = ($1, $2);
        $curr = $host;
    }
    if ($line =~ /^(quake2|NO RESPONSE)/) {
        tdie "bad format: $out" if !$curr;
        $resps{$curr} .= $line;
    }
}

sub isok($)
{
    tdie "no response from $_[0]?!" if !$resps{$_[0]};
    return $resps{$_[0]} =~ /$VERSION/;
}

my @dead;

foreach my $k (keys %resps) {
    if (!isok($k)) {
	push @dead, $k;
    }
}

###############################################################################

# pull logs from dead nodes

if (!@dead) {
    tinfo "nobody is dead!";
    exit 0;
}

tinfo "dead nodes: " . join(" ", @dead);

#exit 1;

tinfo "pulling logs...";

psystem("$ENV{HOME}/Merc/run/planetlab/PlanetLabPullLogs.pl -H " . join(",", @dead));

###############################################################################

# reboot dead nodes

tinfo "rebooting...";

$cmd = "./planetlab/PlanetLabRun.pl -R '" . join(",", @dead) . "' QuakeConf.pl $* " .
    "--output /tmp/plab.reboot.$$.exp " .
    "--bootstrap $BOOTSTRAP:15000 " .
    "--nobootstrap " .
    "--nomaster " .
    "--moresleep " .
    "--timelimit 5184000000 " .
    "--nofight " .
    "--nbots 1 " .
    "--nmons 0 " .
    "--maxclients 4 " .
    "--map big_map " .
    "--logenableonly QuakeStatsLog,GUIDLog,AggregateMessageLog,RUsageLog,DiscoveryLatLog,RepConnSendLog,RepConnRecvLog,RepConnSendDroppedLog,RepConnRecvDroppedLog " .
    "--logsampleparams QuakeStatsLog=0.1/1,DiscoveryLatLog=0.02/60,RepConnSendLog=0.1/1,RepConnRecvLog=0.1/1";

psystem("$cmd");

tinfo "done!";
