#!/usr/bin/perl
#
# Compute the cost incurred by a particular run of a pub sub simulation.
# The cost is the tuple (in kbps per server):
#
# total    - total units transfered in the system (sum of the below)
# pub/sub register/unregister:
# sending  - total pub/sub units sent by the app in the system (first hop)
# routing  - total units routed (this is approx as sending*(log(N) - 2))
# storing  - total cost due to storing at nodes (last routing hop)
# matching - total units trasfered due to matching
# pinging  - total overhead units of pinging to check whether we are actually
#            interested in an object we got a match for. this is
#            matching*ttl*ping_rate
#
# The cost model we use is the following:
#
# There are 3 operations that occur in our system:
#
# (1) reg   (r) - register a pub or sub
# (2) unreg (u) - unregister a pub or sub explicitly
# (3) match (m) - a single pub was matched with a single sub
#
# The log we read tells us when the application was /forced/ to register
# a new pub or sub. It *must* do this when the current position of the
# object is no longer within the bbox of the old pub or the new visible bbox
# is not entirely contained within the old sub bbox. Just before this
# happens (if an old pub/sub was present in the system) the log tells us
# we sent an explicit unregister (also when objects are deleted).
#
# Each forced pub/sub incurs the normal send/route/store cost. It may
# incur the cost multiple times if the TTL expires. Each unreg incurs a cost 
# if the ttl on the old one + some fudge has not expired.
#
# Each match incurs 1 matching message from one RP to the receiver. /Then/
# the receiver incurs the cost of "pinging" the object which it believes it
# is interested in, and it must continue to ping for TTL amount of time.
# Why? Because the pub/sub match tells him that he is "potentially" interested
# in this object for TTL amount of time (min(sub_ttl, pub_ttl)) and no one
# will tell him otherwise *until* either the pub or sub expires and a new one
# is submitted retry the match. There is a special case for items: we don't
# have to ping in the case the matched thingy is an item because the item
# can not change its pub, therefore we can test locally if we are interested
# in it.
#
###############################################################################

use strict;
use POSIX qw(floor ceil);
use Getopt::Std;
use vars qw($opt_s $opt_l 
	    $opt_t
	    $opt_n $opt_X $opt_Y $opt_m $opt_y $opt_O
	    $opt_p $opt_f $opt_u $opt_g $opt_r $opt_T
	    $opt_i $opt_S $opt_C);

getopts("s:l:t:n:XYm:y:Op:f:u:g:r:Ti:S:C");

###############################################################################
#
# Constants
#

# size units are in messages (assuming pub or sub = 1)
# bytes = header flags (x y z)*2 guid sid + extra_unneeded_label_junk
#our $MSGSIZE   = 71 + (5*(4+1)+4);
# time units are in frames or (units/frame)
our $FPS       = 10;

# program used to turn the binary logs into text
our $DECOMP = "$ENV{HOME}/id-source/scripts/PubSubSimParser";

###############################################################################
#
# Options
#

# number of initial frames to skip
our $SKIPFRAMES = $opt_s || 0;
# number of maximum frames to examine
our $MAXFRAMES  = $opt_l || 99999999;

# which types to examine (m = missile, p = player, i = item)
#
# this restricts us to looking at costs incurred because it was sent /by/
# an object of this type or /to/ an object of this type. But not generating
# cost (e.g., routing/storage) by another originating type
our %EXAMINE_TYPES;
if (defined $opt_t) {
    %EXAMINE_TYPES = map { $_ => 1 } split(/,/, $opt_t);
} else {
    %EXAMINE_TYPES = ( 'p' => 1,
		       'o' => 1,
		       'm' => 1,
		       'i' => 0 );
}

# which types to examine matches from (e.g., when a match occurs from this
# type, send it to me).
our %MATCH_TYPES;
if (defined $opt_y) {
    %MATCH_TYPES = map { $_ => 1 } split(/,/, $opt_y);
} else {
    %MATCH_TYPES = ( 'p' => 1,
		     'o' => 1,
		     'm' => 1,
		     'i' => 1 );
}

# number of servers
our $NSERVERS     = $opt_n || 8;

# do we want to use the xhub?
our $USE_XHUB     = !defined $opt_X;
# do we want to use the yhub?
our $USE_YHUB     = !defined $opt_Y;
our $NHUBS        = $USE_XHUB + $USE_YHUB;
# do we send pubs (p) to multiple hubs or subs (s)?
die "not enough hubs!" if $NHUBS < 1;
our $MULTHUB_TYPE = $opt_m || 'p';
die "bad type $MULTHUB_TYPE!" if $MULTHUB_TYPE ne 'p' && $MULTHUB_TYPE ne 's';

our $ASSOC_OWNER = !defined $opt_O;

sub CustomizeTTLs($$)
{
    my ($default, $custstr) = @_;
    my %opts = map { split('=', $_) } split(',', $custstr);
    foreach my $k (keys %opts) {
	die "non-positive ttl: $k = $opts{$k}!" if $opts{$k} <= 0;
	$default->{$k} = $opts{$k};
    }
}

our %PUB_TTLS     = ( 'p' => 1,
		      'o' => 1,
		      'i' => 300,
		      'm' => 1 );
if (defined $opt_p) {
    CustomizeTTLs(\%PUB_TTLS, $opt_p);
}
# don't actually bother unreg if expires in x frames
our $PUB_TTL_FUDGE = $opt_f || 2;

our %SUB_TTLS      = ( 'p' => 1,
		       'o' => 1,
		       'i' => 300, # irrelevant, don't sub
		       'm' => 1 );
if (defined $opt_u) {
    CustomizeTTLs(\%SUB_TTLS, $opt_u);
}
# don't actually bother unreg if expires in x frames
our $SUB_TTL_FUDGE = $opt_g || 2;

# simulate simple caching
our $CACHING = !defined $opt_C;

# number of pings per frame
our $PING_RATE     = 1;

# the dimension to stripe on
our $STRIPE_DIM = undef;
if (defined $opt_i) {
    if ($opt_i eq 'x') {
	$STRIPE_DIM = 0;
    } elsif ($opt_i eq 'y') {
	$STRIPE_DIM = 1;
    } else {
	$STRIPE_DIM = 2;
    }
}

# stripping the hubs:
our @STRIPES = (1, 1, 1);
@STRIPES = split(/,/, $opt_S) if defined $opt_S;
die "bad stripes: @STRIPES" if !$STRIPES[0] || !$STRIPES[1] || !$STRIPES[2];
die "too many hubs to use stripes!" if $NHUBS != 1;

# only print total cost
our $ONLY_PRINT_TOTAL = defined $opt_T;

###############################################################################

if (@ARGV != 1) {
    print "  usage: PubSubSimComputeCost.pl [options] <PubSubCostLog>\n\n";
    print "  options:\n";
    print "    -s frames    number of initial frames to skip ($SKIPFRAMES)\n";
    print "    -l frames    max number of frames to examine ($MAXFRAMES)\n";
    print "    -t types     types to examine (p,o,m,i) (" . 
	join(",", keys %EXAMINE_TYPES) . ")\n";
    print "    -y types     types to examine matches from (p,o,m,i) (" .
	join(",", keys %MATCH_TYPES) . ")\n";
    print "    -n nservers  number of servers in the system ($NSERVERS)\n";
    print "    -X           don't use x-hub (" . (!$USE_XHUB) . ")\n";
    print "    -Y           don't use y-hub (" . (!$USE_YHUB) . ")\n";
    print "    -m {p,s}     type to send to multiple hubs (pub,sub) ($MULTHUB_TYPE)\n";
    print "    -O           disable object from merging subs with the owner\n";
    print "    -p type=ttl  pub ttl in # frames for diff types (" .
	join(",", map { "$_=$PUB_TTLS{$_}" } keys %PUB_TTLS) . ")\n";
    print "    -f frames    pub ttl fudge (allow to expire within) ($PUB_TTL_FUDGE)\n";
    print "    -u type=ttl  sub ttl in # frames (" . 
	join(",", map { "$_=$SUB_TTLS{$_}" } keys %SUB_TTLS) . ")\n";
    print "    -g frames    sub ttl fudge (allow to expire within) ($SUB_TTL_FUDGE)\n";
    print "    -r 1/frames  pinging rate in pings per frame ($PING_RATE)\n";
    print "    -T           only print the total cost ($ONLY_PRINT_TOTAL)\n";
    print "\n";
    exit 1;
}

my $file;
if ($ARGV[0] =~ /\.gz$/) {
    $file = open(F, "gunzip -c $ARGV[0] | $DECOMP |");
} else {
    $file = open(F, "$DECOMP $ARGV[0] |");
}

die "can't open $ARGV[0]" if !$file;

###############################################################################
#
# Process simulation log
#

# costs initiated by a pub
my %pub = ( 'rsending' => 0,
	    'rrouting' => 0,
	    'rstoring' => 0,
	    'usending' => 0,
	    'urouting' => 0,
	    'ustoring' => 0,
	    'matching' => 0,
	    'pinging'  => 0 );
# costs initiated by a sub
my %sub = %pub;

# remembers the last pub/sub for each object
#
# guid => [ [x,y,z]_min, [x,y,z]_max ]
my %last_pub;
my %last_sub;

my $startframe;
my $endframe;

my $f;
while (<F>) {
    chomp $_;
    my ($frameno, $guid, $owner, $type, $class, $op, $min, $max, $life,
	$match_guid, $match_type) =
	split(/\t/, $_);

    my @min = split(/,/, $min);
    my @max = split(/,/, $max);
    my $bbox = [ \@min, \@max ];

    # HACK: z-dim is messed up on big_map4 -- not exactly even, pretend it is
    #print ">> $min[2]\t$max[2]\n";
    $min[2] = int($min[2]/0.25 + 0.5)*0.25;
    $max[2] = int($max[2]/0.25 + 0.5)*0.25;
    #print "<< $min[2]\t$max[2]\n";
    # HACK

    my $count  = 1;
    my $xrange = $max[0] - $min[0];
    my $yrange = $max[1] - $min[1];
    my $xcached = 0;
    my $ycached = 0;

#    if ($CACHING) {
#	my @last = (-1,-1);
#	if (defined $last_pub{$guid}) {
#	    $last[0] = $last_pub{$guid}->[0]->[0];
#	    $last[1] = $last_pub{$guid}->[0]->[1];
#
#	    $xcached = $xcached ||
#		( abs($min[0]-$last[0]) < 1/$NSERVERS );
#	    $ycached = $ycached ||
#		( abs($min[1]-$last[1]) < 1/$NSERVERS );
#	}
#	if (defined $last_sub{$guid}) {
#	    $last[0] = $last_sub{$guid}->[0]->[0];
#	    $last[1] = $last_sub{$guid}->[0]->[1];
#
#	    $xcached = $xcached ||
#		( abs($min[0]-$last[0]) < 1/$NSERVERS );
#	    $ycached = $ycached ||
#		( abs($min[1]-$last[1]) < 1/$NSERVERS );
#	}
#
#	if (!$xcached) {
#	    $xcached = 0;
#	}
#	if (!$ycached) {
#	    $ycached = 0;
#	}
#
#	print "$xcached $ycached\n";
#    }

    if (defined $STRIPE_DIM) {
	my $range;
	# if using stripes, then the total range is "expanded" by lining
	# up each of the stripes.
	if ($STRIPE_DIM == 0) {
	    $xrange = $xrange/($STRIPES[0]*$STRIPES[1]*$STRIPES[2]);
	} elsif ($STRIPE_DIM == 1) {
	    $yrange = $yrange/($STRIPES[0]*$STRIPES[1]*$STRIPES[2]);
	} else {
	    die "don't know what to do with z-dim";
	}

	# but we might need to be sent to multiple stripes -- count them
	# by figuring out how many "pieces" our bbox was chopped into.
	for (my $i=0; $i<3; $i++) {
	    next if $i == $STRIPE_DIM;

	    my $stripe_width = 1/$STRIPES[$i];
	    my $pieces = 1;
	    for (my $j=int($min[$i]/$stripe_width); $j<$STRIPES[$i]; $j++) {
		my $line = $j*$stripe_width;
		next if $min[$i] >= $line;
		last if $max[$i] <= $line;
		$pieces++ if $line > $min[$i] && $line < $max[$i];
	    }
	    $count *= $pieces;
	}

	#if ($min[2] != $max[2]) {
	#    print "$count\n";
	#}
    }
    
    die "bad line: $_!"
	if !defined $frameno || !defined $type || !defined $owner ||
	!defined $class || !defined $op || !defined $min || !defined $max;

    my $nhubs;
    my $ttl;
    my $fudge;

    # skip frames we don't want
    if (!defined $startframe) {
	$startframe = $frameno;
    }
    if ($startframe + $SKIPFRAMES > $frameno) {
	next;
    }
    $endframe = $frameno;
    if ($frameno - ($startframe + $SKIPFRAMES) > $MAXFRAMES) {
	last;
    }

    if (! $EXAMINE_TYPES{$type} ) {
	next;
    }

    my $xub;
    if ($class eq 'p') {
	$xub = \%pub;
	$ttl = $PUB_TTLS{$type};
	$fudge = $PUB_TTL_FUDGE;
    } else {
	$xub = \%sub;
	$ttl = $SUB_TTLS{$type};
	$fudge = $SUB_TTL_FUDGE;
    }

    if ($class eq $MULTHUB_TYPE) {
	$nhubs = $NHUBS;
    } else {
	$nhubs = 1;
    }

    # since we record stuff on the same "round" iteratively, some lifetimes 
    # are marked as 0, but really should be 1 (we really want the stuff from
    # this round to look at stuff from the last round, but the delta is
    # so small it hardly really matters).
    #
    # XXX: we could be off by 1 for larger lifetimes as well... oh well
    # in reality this should be fudge...
    $life = max($life, 1);

    if ($op eq 'r') {
	if ($ASSOC_OWNER && $owner != 0 && defined $last_sub{$owner}) {
	    # if we have an owner (e.g., we are a missile) and our
	    # owner's subscription contains ours, then we should just
	    # use their subscription and not resend another one
	    if (BBoxContains($last_sub{$owner}, $bbox)) {
		next;
	    }
	}

	for (my $i=0; $i<$count; $i++) {
	    # someone registering an item
	    recordReg($xub, $nhubs, $xrange, $yrange);
	}

	if ($class eq 'p') {
	    $last_pub{$guid} = [ \@min, \@max ];
	} else {
	    $last_sub{$guid} = $bbox;
	}
    } elsif ($op eq 'u') {
	die "bad line: $_!" if !defined $life;

	# if we had set the ttl shorter than
	# how long we actually wanted the xub, then we had to resubmit
	# it a couple of times...
	while ($life > $ttl) {
	    for (my $i=0; $i<$count; $i++) {
		recordReg($xub, $nhubs, $xrange, $yrange);
	    }
	    $life -= $ttl;
	}

	# when the life drops below the ttl, then when we want to change
	# it, we either have to explicitly unregister it (record) or
	# let it expire ("fudge").
	#
	# XXX:
	# right now the code does not take into account costs incurred
	# by letting it expire (extraneous matches, etc.), so the fudge
	# should be set low
	if ($life < $ttl - $fudge) {
	    for (my $i=0; $i<$count; $i++) {
		recordUnreg($xub, $nhubs, $xrange, $yrange);
	    }
	}

	if ($class eq 'p') {
	    $last_pub{$guid} = undef;
	} else {
	    $last_sub{$guid} = undef;
	}
    } elsif ($op eq 'm') {
	die "bad line: $_!" 
	    if !defined $life || !defined $match_type;

	if (!$MATCH_TYPES{$match_type}) {
	    next;
	}

	# matching an item to someone
	my($sub_remainder, $pub_remainder);
	my $pub_type;

	# $type always refers to the type of the receiver (i.e., owner of the
	# subscripcation)
	# $match_type is the type of the sender (i.e., owner of the
	# publication)
	if ($op eq 'p') {
	    # this means the pub is new and the life refers to the sub
	    $pub_remainder = $PUB_TTLS{$match_type};
	    $sub_remainder = $SUB_TTLS{$type} - ($life % $SUB_TTLS{$type});
	} else {
	    # this means the sub is new and life refers to the pub
	    $pub_remainder = $PUB_TTLS{$type} - ($life % $PUB_TTLS{$type});
	    $sub_remainder = $SUB_TTLS{$match_type};
	}

	recordMatch( $xub, $xrange, $yrange, 
		     # if the pub was from an item, the cost of pinging is
		     # 0. This is because items can not move, so we can
		     # test locally whether we are interested in items or not.
		     $match_type eq 'i' ? 0 :
		     # otherwise, we must query the owner of the item
		     # during the interval min(pub_ttl, sub_ttl), because
		     # the match tells us that we may be interested in
		     # the object for this amount of time, and we will not
		     # learn otherwise until either a new sub is submitted
		     # or a new pub is submitted.
		     min($pub_remainder, $sub_remainder) );
    }
}
close(F);

my $interval = $endframe - ($startframe + $SKIPFRAMES);
die "not long enough!" if $interval <= 0;

my $total = 0;

# normalize costs by interval and print results
foreach my $k (sort { $a cmp $b } keys %pub) {
    $pub{$k} = normalize($pub{$k}, $interval);
    $total += $pub{$k};

    print sprintf("%20s\t%12.5f   pps/server\n", "PUB_$k", $pub{$k})
	if !$ONLY_PRINT_TOTAL;
}
foreach my $k (sort { $a cmp $b } keys %sub) {
    $sub{$k} = normalize($sub{$k}, $interval);
    $total += $sub{$k};

    print sprintf("%20s\t%12.5f   pps/server\n", "SUB_$k", $sub{$k})
	if !$ONLY_PRINT_TOTAL;
}

print sprintf("%20s\t%12.5f   pps/server\n", "TOTAL", $total)
    if !$ONLY_PRINT_TOTAL;

if ($ONLY_PRINT_TOTAL) {
    print sprintf("%.5f\n", $total);
}

close(F);

# done!
exit(0);

###############################################################################
#
# Cost Computation: (i.e., how many messages sent per operation)
#

#
# Application registration of a publication or subscription
#
sub recordReg($$$$) {
    my ($xub, $nhubs, $xrange, $yrange) = @_;

    # (1) app incurs sending cost:
    #     one per each hub
    $xub->{rsending} += $nhubs;
    # (2) merc incurs routing cost:
    #     log(N) - 2 for each hub
    #
    #     in reality it may be cheaper because of caching
    $xub->{rrouting} += $nhubs*max( 0, log($NSERVERS)/log(2) - 2);
    # (3) storage cost at the appropriate number of nodes:
    #     store at k nodes in each hub, where k = the range of the xub
    #     this assumes ranges are unif distributed.
    my $xcost = max( 1, ceil($xrange*$NSERVERS) );
    my $ycost = max( 1, ceil($yrange*$NSERVERS) );
    $xub->{rstoring} += $nhubs > 1 ? $xcost + $ycost : 
	# if we're not using one of the hubs, we pick the cost of the other
	min($USE_XHUB ? $xcost : 1e100, $USE_YHUB ? $ycost : 1e100);
}

#
# Application unregistration of a publication or subscription
#
sub recordUnreg($$$$) {
    my ($xub, $nhubs, $xrange, $yrange) = @_;

    # (1) app incurs sending cost:
    #     one per each hub
    $xub->{usending} += $nhubs;
    # (2) merc incurs routing cost:
    #     log(N) - 2 for each hub
    $xub->{urouting} += $nhubs*max( 0, log($NSERVERS)/log(2) - 2);
    # (3) storage cost at the appropriate number of nodes:
    #     store at k nodes in each hub, where k = the range of the xub
    #     this assumes ranges are unif distributed.
    my $xcost = max( 1, ceil($xrange*$NSERVERS) );
    my $ycost = max( 1, ceil($yrange*$NSERVERS) );
    $xub->{ustoring} += $nhubs > 1 ? $xcost + $ycost : 
	# if we're not using one of the hubs, we pick the cost of the other
	min($USE_XHUB ? $xcost : 1e100, $USE_YHUB ? $ycost : 1e100);
}

#
# A pub was matched with a sub in the system
#
sub recordMatch($$$$) {
    my($xub, $xrange, $yrange, $ttl_remaining) = @_;

    # actually its an easy optimization to ensure there is only 1
    # match message -- just make the guy who sees that he contains
    # the minimum point of overlap send the match, no one else has to.

    # each match incurs 1 pub_match message per matched RP -- if the
    # match is more than a point, it matches at a range of servers.
    # optimistically assume that the match occurred at the most selective
    # hub 
    #
    # XXX: (this might not be the case -- it is possible that the xub
    # was sent to a less selective hub for *itself* but that overlapped
    # a larger portion of the corresponding pub/sub)
    #my $most_sel;
    #if (!$USE_XHUB) {
#	$most_sel = $yrange;
    #} elsif (!$USE_YHUB) {
#	$most_sel = $xrange;
#    } else {
#	$most_sel = min($xrange, $yrange);
#    }
    $xub->{matching} += 1; #max(1, ceil( $most_sel*$NSERVERS ));
    # but now the receiver has to "ping" the remote node to see if
    # he is actually interested in it -- must continue to ping
    # regardless if whether "currently" interested or not because
    # the pub or sub won't expire for the TTL and we won't get
    # another notification whether we are interested or not!
    # this is the primary "overhead" from too many unneeded matches
    #
    # in reality we can improve this by having a more complicated
    # protocol between the pinger and the pingee, but lets keep it simple
    $xub->{pinging}  += $PING_RATE*$ttl_remaining;
}

#
# Normalize cost from packets per frame total to pps per server
#
sub normalize($$) {
    my ($x, $n) = @_;

    # ppf total => kbps per server
    return ( $x/($n/$FPS) )/$NSERVERS; 
}

###############################################################################

sub min($$)
{
    $_[0] < $_[1] ? $_[0] : $_[1];
}

sub max($$)
{
    $_[0] > $_[1] ? $_[0] : $_[1];
}

sub BBoxContains($$)
{
    my $box1 = shift;
    my $box2 = shift;

    for (my $i=0; $i<3; $i++) {
	if ($box1->[0]->[$i] > $box2->[0]->[$i] || 
	    $box1->[1]->[$i] < $box2->[1]->[$i]) {
	    return 0;
	}
    }

    return 1;
}
