#!/usr/bin/perl
#
# Generate aggregate bandwidth stats for a MessageLog;
# Modified version of Jeff's MessageLogTimeSeries.pl script
# 
# $Id: ExpMsgLogTmSeries.pl 2266 2005-10-15 00:52:57Z ashu $
#
# -S time  - use this as the starttime (before skipping)
# -s num   - skip the first x seconds of the trace
# -l num   - the length of time to aggregate over (ignore remainder)
# -e epoch - size of each aggregation epoch (sec -- floating point)
# -t type  - {bwidth, proto, pps}
#            bandwidth without protocol overhead,
#            bandwidth with protocol overhead,
#            packets per second
# -a       - absolute count (not rate per second)
# -b       - the log is binary
# -T       - synchronize all the files; use the skip time as the time from the first timestamp you observe in all the files
# -d       - print detailed stats (i.e., raw buckets) about each log instead of summary 

use strict;
use Getopt::Std;
use vars qw($opt_S $opt_s $opt_l $opt_e $opt_t $opt_a $opt_b $opt_T $opt_A $opt_w $opt_d); 
use Statistics::Descriptive;

our $basedir;
chomp ($basedir = `dirname $0`);
require "$basedir/Common.pl";

our $PROTO_OVERHEAD_BYTES = 28;

getopts("S:s:l:e:t:abTAw:d");

our $START    = defined $opt_S ? $opt_S : undef;
our $SKIPTIME = defined $opt_s ? $opt_s : 120;
our $LENGTH   = defined $opt_l ? $opt_l : 600;
our $EPOCH    = defined $opt_e ? $opt_e : 1;
our $TYPE     = defined $opt_t ? $opt_t : "bwidth";
our $DETAILS_ONLY = defined $opt_d;

our $ABSOLUTE  = defined $opt_a; 
our $LOG_BINARY = defined $opt_b;
our $DO_SYNC    = defined $opt_T;
our $AGGLOGS = defined $opt_A;
$LOG_BINARY = 0 if $AGGLOGS;

our $SLOWDOWN = defined $opt_w ? $opt_w : 1;
if ($SLOWDOWN > 1.0) { 
    $SKIPTIME *= $SLOWDOWN;
    $LENGTH *= $SLOWDOWN;
}

###############################################################################

our %MsgTypes = ParseMessageHeader ($0);

my ($expstats,$ninefifthstats,$nineninthstats) = ProcessInput(@ARGV);
exit 0 if ($DETAILS_ONLY);

printf("%-15s %12s %12s %12s %12s %12s\n", "Type", "mean", "stddev", "min", "max", "median");
print "-"x80, "\n";
foreach my $msgtype ("TOTAL", "ROUTE", "MATCH", "OM") {
    foreach my $dir ("IN", "OUT") {
# we print the mean and median of each of these statistics.

	printf ("%-15s %12.3f %12.3f %12.3f %12.3f %12.3f\n", 
		"$msgtype:$dir:MEAN", 
		$expstats->{$dir}->{$msgtype}->mean(),
		$expstats->{$dir}->{$msgtype}->standard_deviation(),
		$expstats->{$dir}->{$msgtype}->min(),
		$expstats->{$dir}->{$msgtype}->max(),
		$expstats->{$dir}->{$msgtype}->median());
        printf ("%-15s %12.3f %12.3f %12.3f %12.3f %12.3f\n", 
		"$msgtype:$dir:95%",
                $ninefifthstats->{$dir}->{$msgtype}->mean(),
                $ninefifthstats->{$dir}->{$msgtype}->standard_deviation(),
                $ninefifthstats->{$dir}->{$msgtype}->min(),
                $ninefifthstats->{$dir}->{$msgtype}->max(),
                $ninefifthstats->{$dir}->{$msgtype}->median());
	printf ("%-15s %12.3f %12.3f %12.3f %12.3f %12.3f\n", 
		"$msgtype:$dir:99%",
                $nineninthstats->{$dir}->{$msgtype}->mean(),
                $nineninthstats->{$dir}->{$msgtype}->standard_deviation(),
                $nineninthstats->{$dir}->{$msgtype}->min(),
                $nineninthstats->{$dir}->{$msgtype}->max(),
                $nineninthstats->{$dir}->{$msgtype}->median());
    }
}

###############################################################################

sub ProcessInput(@)
{
    my @files = @_;
    my $start;

    if ($START) {
	$start = $START;
    } elsif ($DO_SYNC) {
	$start = GetAbsoluteStart(@files);
    }
    
    my %expstats = ();
    my %ninefifthstats = ();
    my %nineninthstats = ();
    foreach my $dir ("IN", "OUT") {
	foreach my $msgtype ("ROUTE", "MATCH", "OM", "TOTAL") {
	    $expstats{$dir}->{$msgtype} = new Statistics::Descriptive::Full();
            $ninefifthstats{$dir}->{$msgtype} = new Statistics::Descriptive::Full();
            $nineninthstats{$dir}->{$msgtype} = new Statistics::Descriptive::Full(); 
	}
    }
    
    my %seen = ();
    foreach my $f (@files) {	
	$f =~ /(\d+\.\d+\.\d+\.\d+)/;
	my $node = $1;
	if (!exists $seen{$node}) {
	    print STDERR "processing node $node...\n";
	    $seen{$node} = 1;
	}

	my $fstats = ProcessFile($f, $start);
	
	if ($DETAILS_ONLY) {
	    my %data = ();
	    my $count = 0;
	    my $dir = "OUT";
	    foreach my $msgtype ("ROUTE", "MATCH", "OM", "TOTAL") {
		$data{$msgtype} = [ $fstats->{$dir}->{$msgtype}->get_data () ];
		$count = $fstats->{$dir}->{$msgtype}->count ();
		print "OUT:$msgtype\t";
	    }
	    print "\n";

	    for (my $i = 0; $i < $count; $i++) { 
		foreach my $msgtype ("ROUTE", "MATCH", "OM", "TOTAL") {
		    print sprintf ("%.3f\t", $data{$msgtype}->[$i]);
		}
		print "\n";
	    }
	}
	else {
	    foreach my $dir ("IN", "OUT") {
		foreach my $msgtype ("ROUTE", "MATCH", "OM", "TOTAL") {
		    next if $fstats->{$dir}->{$msgtype}->count () == 0;
		    
		    $expstats{$dir}->{$msgtype}->add_data($fstats->{$dir}->{$msgtype}->trimmed_mean(0.05));
		    $ninefifthstats{$dir}->{$msgtype}->add_data(scalar $fstats->{$dir}->{$msgtype}->percentile(95));
		    $nineninthstats{$dir}->{$msgtype}->add_data(scalar $fstats->{$dir}->{$msgtype}->percentile(99));
		}
	    }
	}
    }
    
    return (\%expstats, \%ninefifthstats, \%nineninthstats);
}

#
# Make one pass through all the files to get the first time observed in the logs 
#
sub GetAbsoluteStart(@) 
{
    my @files = @_;
    my $absolute_start = 0;
    foreach my $f (@files) {
	if ($LOG_BINARY) {
	    open F, "$basedir/ParseMessageLog $f |" or die "can't open pipe from file $f";
	}
	else {
	    if ($f =~ /.gz$/) {
		open F, "gunzip -c $f |" or die "cant gunzip on $f: $!";
	    }
	    else {
		open(F, "<$f") || die "can't open $f";
	    }
	}

	$_ = <F>;    # read first line;

	if ($AGGLOGS) {
	    m/(\d+\.\d+)/;
	}
	else {
	    m/[0-9A-F]{8}?\t(\d+\.\d+)/;
	}
	
	my $time = $1;
	if ($absolute_start == 0 or $time < $absolute_start) {
	    $absolute_start = $time;
	}
	close F;
    }

     #print STDERR "absolute start = $absolute_start\n";
    return $absolute_start;	
}

sub Percentile($$) {
    my ($stats, $perc) = @_;

    my @a = $stats->get_data();
    @a = sort { $a <=> $b } @a;
    return $a[int($perc/(scalar @a))];
}

# record stats for each bucket -
# ultimately, we need to report mean, median, 95th and stddev for this guy
#
sub ProcessFile($$)
{
    my $f = shift;
    my $start = shift;
    my $end;
    
    my %stats = ();
    foreach my $dir ("IN", "OUT") {
	foreach my $msgtype ("ROUTE", "MATCH", "OM", "TOTAL") {
	    $stats{$dir}->{$msgtype} = new Statistics::Descriptive::Full();
	}
    }
    
    if ($LOG_BINARY) {
	open F, "$basedir/ParseMessageLog $f |" or die "can't open pipe from file $f";
    }
    else {
	if ($f =~ /.gz$/) { 
	    open F, "gunzip -c $f | " or die "cant gunzip on $f: $!";
	}
	else {
	    open(F, "<$f") || die "can't open $f";
	}
    }

    my $print = 1;
    my $prev_index = -1;
    my $index = -1;
    
    my %buckets = ();
    foreach my $dir ("IN", "OUT") {
	foreach my $type ("TOTAL", "MATCH", "OM", "ROUTE") {
	    $buckets{$dir}->{$type} = 0;
	}
    }
    
    my $s_epoch = $EPOCH * $SLOWDOWN;

    while (<F>) {
	chomp $_;

	my $time;
	my $dir;
	my $type;
	my $size;
	my $samples;

	
	if ($AGGLOGS) {
	    m/(\d+\.\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t\s+(\d+\.\d+)/;
	    
	    $time = $1;
	    $dir = $2;
	    $type = $3;
	    $size = $4;
	    $samples = $5;
	}
	else {
	    if ($_ !~ m/([0-9A-F]{8}?)\t(\d+\.\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)/) {
		print STDERR "bad line: $_\n";
		next;
	    }

	    $time = $2;
	    $dir = $3;
	    $type = $5;
	    $size = $6;
	}

	if (!defined $start) {
	    $start = $time;
	}
	if ( $time < $start + $SKIPTIME ) {
	    next;
	}
	if (defined $LENGTH && $time > $start + $SKIPTIME + $LENGTH) {
	    last;
	}
	if ($time > $end) {
	    $end = $time;
	}

	$dir = $dir == 0 ? "IN" : "OUT";

	$index = int( ($time - ($start+$SKIPTIME))/$s_epoch );
	
	my $inc;
	if ($TYPE eq 'bwidth') {
	    $inc = $size*8/1000;
	} elsif ($TYPE eq 'proto') {
	    if ($AGGLOGS) {
		$inc = ($size + $PROTO_OVERHEAD_BYTES * $samples)*8/1000;
	    }
	    else {
		$inc = ($size + $PROTO_OVERHEAD_BYTES)*8/1000;
	    }
	} elsif ($TYPE eq 'pps') {
	    if ($AGGLOGS) {
		$inc = $samples;
	    }
	    else {
		$inc = 1;
	    }
	} else {
	    die "unknown type: $TYPE!";
	}
	if (!$ABSOLUTE) {
	    $inc /= $EPOCH;
	}

#	if ($index % 100 == 0 && $print) { $print = 0; print STDERR "$index...\n"; }
#	if ($index % 100 != 0) { $print = 1; }

	#print STDERR "$_\n";
	#print STDERR ">> $dir $type $inc $index $prev_index\n";

	if (IsAppMsg($type)) {
	    $buckets{$dir}->{"OM"} += $inc;
	}
	else {
            if ($MsgTypes{$type} =~ /MATCH/) {
                $buckets{$dir}->{"MATCH"} += $inc;
            }
            elsif (IsRoutingMsg($type)) {
                $buckets{$dir}->{"ROUTE"} += $inc;
            }
	}
	$buckets{$dir}->{"TOTAL"} += $inc;
	
	if ($index != $prev_index) {
	    $prev_index = $index;
	    foreach my $dir ("IN", "OUT") {
		foreach my $type ("TOTAL", "MATCH", "OM", "ROUTE") {
		    $stats{$dir}->{$type}->add_data($buckets{$dir}->{$type});
		    $buckets{$dir}->{$type} = 0;
		}
	    }
	}
    }
    if ($index != $prev_index) {
	foreach my $dir ("IN", "OUT") {
	    foreach my $type ("TOTAL", "MATCH", "OM", "ROUTE") {
		$stats{$dir}->{$type}->add_data($buckets{$dir}->{$type});
		$buckets{$dir}->{$type} = 0;
	    }
	}
    }
    close F;

    return \%stats;
}
