#!/usr/bin/perl
#
# Rescale the epoch of a time series by averaging over a new epoch size.
# The new epoch must be larger than the old epoch of course.
#
# Usage: ./TimeSeriesAvg.pl -e [epoch] <time_series>
#
# E.G., To rescale a timeseries that is in 0.1 sec epochs to 5 second epochs,
# the script will average each 50 lines. Each field will be averaged 
# individually. (The average is bucketized, not moving, so the number of
# points is reduced)

use strict;
use Statistics::Descriptive;
use Getopt::Std;
use vars qw($opt_e $opt_C $opt_F $opt_G $opt_M $opt_X);

getopts("e:CF:G:M:X:");

my $EPOCH = $opt_e || 1;
my $CONVERT = $opt_C; # convert from bytes to kbps

# sometimes we want to skip datapoints which are more than X times larger
# or smaller than the standard deviation. Note that we skip the entire "line"
# of the log if ANY of the fields on the line fall outside these filter ranges
my $MINFILTER = $opt_F; # filter out points below std_dev*thresh
my $MAXFILTER = $opt_G; # filter out points above std_dev*thresh
my $ABSMINFILTER = $opt_M;
my $ABSMAXFILTER = $opt_X;

my $start;
my @buckets;
my @stats;

my $lasttime;
my @lastvals;
my $count = 0;

while (<>) {
    chomp $_;
    my @fields = split(/\t/, $_);
    if ($fields[0] =~ /^([\d\.]+)/) {
	my $time = $1;
	if (!defined $lasttime) {
	    $lasttime = $time;
	}

	if ($time >= $lasttime + $EPOCH) {
	    my @newvals = map { ($CONVERT ? ($_*8/1000)/$EPOCH : 
				 $_/$count) } @lastvals;
	    if (!defined $start) { $start = $lasttime; }
	    push @buckets, [ $time, @newvals ];
	    for (my $i=0; $i<@newvals; $i++) {
		if (!defined $stats[$i]) {
		    $stats[$i] = new Statistics::Descriptive::Full();
		}
		$stats[$i]->add_data( $newvals[$i] );
	    }
	    #print "$lasttime\t$newvals\n";
	    $lasttime = $time;
	    @lastvals = ();
	    $count = 0;
	}

	for (my $i=1; $i<@fields; $i++) {
	    $lastvals[$i-1] += $fields[$i];
	}
	$count++;
    }
}

OUTER: for (my $i=0; $i<@buckets; $i++) {
    my @vals = @{$buckets[$i]};
    my $time = shift @vals;
    if (defined $ABSMINFILTER || defined $ABSMAXFILTER ||
	defined $MINFILTER || defined $MAXFILTER) {
	# do we filter this point out?
	for (my $j=0; $j<@vals; $j++) {
	    if (defined $ABSMINFILTER && $vals[$j] < $ABSMINFILTER) {
		next OUTER;
	    }
	    if (defined $ABSMAXFILTER && $vals[$j] > $ABSMAXFILTER) {
		next OUTER;
	    }

	    my $lb =
		$stats[$j]->mean()-$MINFILTER*$stats[$j]->standard_deviation();
	    my $ub =
		$stats[$j]->mean()+$MAXFILTER*$stats[$j]->standard_deviation();
	    if (defined $MINFILTER && $vals[$j] < $lb) {
		next OUTER;
	    }
	    if (defined $MAXFILTER && $vals[$j] > $ub) {
		next OUTER;
	    }
	}
    }

    print "$time\t" . join("\t", @vals) . "\n";
}
