#!/usr/local/bin/perl

# USAGE NOTES:
# Script that will produce a ranked list of candidate pointer
# pairs for pragma independent and optionally annotate source code.
# By default just tries to compile *.c
#
#  -c compile with memory profiling
#  -cc location of memory profiling compiler
#  -r run instrumented executable
#  -rr location of instrumented executable
#  -l list pointer pairs (assumes existance of data files)
#  -a annotate source code
#  -copt options for compilation
#  -lopt options for linking
#  -ropt options for executing executable

$files = "*.c"; #files to compile
$support = "mchk.a"; #runtime support library

$do_compile = 0;
$gcc = "/usr0/local/bin";
$do_run = 0;
$bin = "./a.out";
$do_list = 0;
$do_annotate = 0;
$copt = " -O2 -funroll-loops -w";
$lopt = "";
$ropt = "";

while ($ARGV[0] =~ /^[-+]/) {
    $_ = $ARGV[0];
  OPTION:
    {
	/^-c$/ && ($do_compile = 1, last OPTION);
	/^-cc$/ && ($gcc = $ARGV[1], shift, last OPTION);
	/^-r$/ && ($do_run = 1, last OPTION);
	/^-rr$/ && ($bin = $ARGV[1], shift, last OPTION);
	/^-l$/ && ($do_list = 1, last OPTION);
	/^-a$/ && ($do_annotate = 1, last OPTION);
	/^-copt$/ && ($copt = $ARGV[1], shift, last OPTION);
	/^-lopt$/ && ($lopt = $ARGV[1], shift, last OPTION);
	/^-ropt$/ && ($ropt = $ARGV[1], shift, last OPTION);

	print STDERR "Illegal flag $_\n";
	print STDERR "annotate_indep is a script that will produce a\n";
	print STDERR "a ranked list of candidate pointer pairs for\n";
	print STDERR "pragma independent and optionally annotate source\n";
	print STDERR "code.  Will compile *.c files in current directory.\n\n";
	print STDERR "-c compile with memory profiling\n";
	print STDERR "-cc location of memory profiling compiler\n";
	print STDERR "-r run instrumented executable\n";
	print STDERR "-rr location of instrumented executable\n";
	print STDERR "-l list pointer pairs (assumes existence of data files)\n";
	print STDERR "-a annotate source code\n";
	print STDERR "-copt options for compilation\n";
	print STDERR "-lopt options for linking\n";
	print STDERR "-ropt options for executing\n";
	exit 1;
    }
    shift(@ARGV);
}


$do_compile || $do_run || $do_annotate || $do_list || die("Must specify at least one of -c -r and -a\n");

$cflags = " -fprof-mem  -Wl,--wrap,exit "; #mandatory compiler options


$postfix = ""; # <c file><postfix>, set to "" for overwriting files

$visual = 1; # 0 - no output, 1 - status output, 2 - debuggable.


sub psystem {
  # system and print
  if ($_[0])
    {
      print $_[0] . "\n";
      $_ = system(@_[0]);
      return $_;
    }
  return "";
}

sub cpsystem {
  # check psystem
  my($command, $error) = @_[0..1];
  my($ret) = &psystem($command);
  if ($ret) {
    die "Error $ret: " . $error . "\n";
  }
}

###--------- Display configuration
if ($visual)
  {
    print "#pragma independent perl script\n";
    if ($do_compile) { print "* will run custom GCC in $gcc\n"; }
    if ($do_run) { print "* will run executable $bin\n"; }
    if ($do_annotate) { print "* will annotate source code\n"; }
    print "===============================\n";
  }

###--------- Pass through custom compiler
if ($do_compile)
  {    

    print "Running gcc...\n" if $visual;
    (-e $support) || die("Runtime support library $support not found. \nPlease change script variable \$support to correct path.\n");
    psystem("$gcc $copt $cflags $files $largs $support -o $bin > gcc.out 2>&1");
  }

%idPtr  = (); # pid -> [ ptr1 * ptr2 * score ]
%ffuncs = (); # file * func -> [ pid_0 ... ]

open(GCC, "<gcc.out") || die "can't open gcc.out. Please run with -c\n";

###--------- Accumulate the wisdom of the compiler

# Parse file for the following input form:
#
#file:function
#===================
#pid: ptr1 	ptr2	score
#pid:...
#Pairs: n
GCCLOOP:
while ( $read = <GCC> )
  {
    chop($read);
    next GCCLOOP if (!$read);
    next GCCLOOP if ($read =~ /arc profiling/);

    ($file, $func) = split(':', $read);
    print "file = $file, function = $func\n" if ($visual>1);
    chop($read = <GCC>); # ===================

    $pairs = 0;
    while ($read = <GCC> )
      {
	chop($read);
	next GCCLOOP if (!$read);

	if ($read =~ /Pairs:\s+([\d]+)/)
	  {
	    # might be useful to know, albeit unlikely.
	    if ($visual && $pairs != $1) {
	      print "pair count mismatch for ($file,$func)\n";
	      print "expected $1 got $pairs\n";
	    }
	    next GCCLOOP;
	  }

	if ($read =~ /^([\d]+):\s+(.+)\s+(.+)\s+([\d]+)/)
	  {
	    $pairs++;
	    # strings are easier than dealing with the other
	    # record data structures in perl, and we've
	    # exercised the invariant that there are no spaces
	    # in these values.
	    #
	    # And to get file,func -> [ id ] requires some
	    # perl ref magic.  We later split by file and
	    # func (when we can verify which are non-conflicting
	    # pairs.
	    if (defined $idPtr{$1})
	      {
		$idPtr{$1}->[2] += $4;
		push @{$ffuncs{"$file $func"}}, $1;
	      }
	    else
	      {
		push @{$idPtr{$1}}, $2;  # ptr 1
		push @{$idPtr{$1}}, $3;  # ptr 2
		push @{$idPtr{$1}}, $4;  # score

		push @{$ffuncs{"$file $func"}}, $1;
	      }
	  }
      }
  }

close(GCC);

###------------  Simulate the custom compiler output

if ($do_run)
  {
    print "Running $bin ...\n" if $visual;
    if(! -f "$bin") {die "No executable $bin\n";}
    
    psystem("$bin $ropt 2> run.out");
  }


###------------  Bias score by execution frequency
open(SIM, "<run.out") || die "can't open run.out.  Please run with -r\n";

# Look for the following from the simulator output:
#
#Conflict: pid
while ( $read = <SIM> )
  {
    chop($read);
    next if (!$read);

    if ($read =~ /^([\d]+):\s+([\d]+)/)
      {
	$idPtr{$1}->[2] *= $2;
      }
    last if ($read =~ /sim\: \*\* simulation statistics \*\*/);
  }

close(SIM);

###------------  Eliminate conflicted data
open(SIM, "<run.out") || die "can't open run.out\n";

# Look for the following from the simulator output:
#
#Conflict: pid
while ( $read = <SIM> )
  {
    chop($read);
    next if (!$read);
    if ($read =~ /^Conflict: ([\d]+)/)
      {
	print "Bad pair! $1 (@{$idPtr{$1}})\n" if ($visual>1); 
	delete $idPtr{$1};
      }
    last if ($read =~ /sim\: \*\* simulation statistics \*\*/);
  }

close(SIM);


if($do_annotate)
{
###------------  Restructure the data
%funcList = (); # file -> [func ...]
%funcIds  = (); # func -> [pid ...]
foreach $function (keys %ffuncs)
  {
    my @ids = @{$ffuncs{$function}};
    my %vids; # valid ids

    while($id = shift @ids )
      {
	$vids{$id} = 1 if defined $idPtr{$id};
      }

    if (scalar(%vids))
      {
	($file, $func) = split (/\s/, $function);
	push @{$funcList{$file}}, $func;
	$funcIds{$func} = \%vids;
      }
  }

###-----------  Annotate
if ($visual)
  {
    print "State of pair data\n";
    print "------------------\n";
  }

FILELOOP:
foreach $file (keys %funcList)
  {
    print "In file $file:\n" if $visual;

    my(@functions) = @{$funcList{$file}};
    my(%pragma) = ();

    # function by function
    for my $func (@functions)
      {
	my($sumscore) = 0;
	my(%ids) = %{$funcIds{$func}};
	print "  Function $func (",scalar(keys %ids),"):\n" if $visual;

	foreach $id (keys(%ids))
	  {
	    my($pstr, $lineno);
	    my @pid = @{$idPtr{$id}};


	    # p -> file:n1:p1:file:n2:p2
	    my @p = split(/\:/,$pid[0].':'.$pid[1]);;

	    # line is (in general) AT LEAST below the second declaration
	    my @s = ($p[1] > $p[4]) ? (1,4) : (4,1);
	    $lineno = $p[ $s[0] ];
	    $pstr   = "#pragma independent $p[2] $p[5] /* score: $pid[2] */";

	    print "      $p[2] $p[5]  score: $pid[2]\n";
	    push @{$pragma{$lineno}}, $pstr;
	    push @{$pragma{$lineno}}, $p[ $s[0]+1 ];
	    push @{$pragma{$lineno}}, $p[ $s[1]+1 ];
	    # sometimes global values are 'defined' as the end of the file;
	    # and often these values (like stdout) are never directly refered
	    # to.  We'll put the other linenumber to fallback on.
	    push @{$pragma{$lineno}}, $p[ $s[1] ];

	    print "      ($p[ $s[0]+1 ] $p[ $s[0] ]) ($p[ $s[1]+1 ] $p[ $s[1] ]) #$id $lineno \n" if ($visual>1);

	    $sumscore += $pid[2];
	  }
	# Write out sorted list of ranked independence
	foreach $line(sort 
{
my @pid1 = @{$idPtr{$a}};
my @pid2 = @{$idPtr{$b}};
$pid1[2] <=> $pid2[2];
} 
keys(%ids))
	  {
	    my @pid = @{$idPtr{$line}};
	    my @p =  split(/\:/,$pid[0].':'.$pid[1]);
	    my $line = "$file:$func:$p[2] ($p[1]) $p[5] ($p[4]): score: $pid[2]\n";

	    push @all_scores,$line;
	  }
	
	print "    Total Score: $sumscore\n" if $visual;
      }


    # read in source $file
    if (!open(SOURCE, "<$file"))
      {
	print "Warning: Can not open file $file!\n";
	next FILELOOP;
      }
    my (@src) = <SOURCE>;
    close(SOURCE);

    # DETERMINE appropriate pragma line number
    #   How the "magic" works:  find the first line after the
    #     declaration (as given by gcc in the ptr statement) where
    #     the given ptr is used, and stick the pragma the line before.
    #   This avoids the complications of the declaration being in the
    #     header of the function (resulting in #pragma between foo() and
    #     the opening { [illegal].
    #   Note that while we strip out existing #pragma from the annotated
    #     source, it still exists at this time, and so it will trip this
    #     magic.  We can only hope that the existing #pragma was put in
    #     an appropriate place (to avoid this results in a linenumber
    #     bookkeeping nightmare far worse than is already here.

    my (@newpragma) = ();
    foreach my $lo (keys(%pragma))
      {
	my @prags = @{$pragma{$lo}};
	my $flag = 0;
	
	while (@prags)
	  {
	    my $nlo = $lo;
	    my $str = shift @prags;
	    my $p1  = shift @prags;
	    my $p2  = shift @prags;
	    my $ml  = shift @prags; #min line number

	    if ($flag) { $nlo = $ml; }
	    
	    # p1 is the one that corresponds with $lo
	    $comment = 0;
	    $line = 0;
	    $offset = 0;
	    while ($nlo < $#src)
	      {
		$comment++ if $src[$line] =~ /\/\*/;
		$comment-- if $src[$line] =~ /\*\//;
		last if !$comment and $line >= $nlo and ($src[$nlo] =~ /$p1/);
		if ($line == $nlo) { $nlo++; }
		$line++;

	      }
	    $nlo++;
	    if ($nlo >= $#src)
	      {
		if (!$flag)
		  {
		    print "---could not find reference to $p1 or $p2 in $file starting at $lo\n" if ($visual>1);
		    # Fallback and shift variable to look for.
		    @prags = ($str,$p2,$p1,$ml,@prags);
		    $flag = 1;
		    redo;
		  }
		else { print "+++Could not find $p1 or $p2 in $file ($ml,$lo).  Giving up.\n"; $flag = 0;}
	      }
	    else
	      {
		  while($src[$nlo+$offset] =~ /\*\//) {
			$offset = $offset+1;
  		  }
		  $nlo += $offset;
		  $offset = 0;
		  while($src[$nlo+$offset] =~ /\\$/) {
			$offset = $offset+1;
  		  }

		  if($offset > 0) { $offset = $offset + 2; }
		$nlo += $offset;		

		print "---$p1 $p2, [",($lo<$nlo)?$lo:$ml,"->$nlo] f$flag $offset: $src[$nlo]" if ($visual>1);
		push @{$newpragma{$nlo-1}}, $str;
		$flag = 0;
	      }
	  }
      }
    %pragma = ();

    # WRITE the annotated file
    open(ANN, ">$file$postfix") || die "Can not open $file.ann\n";

    $line = 1;
    print ANN "/* #pragma annotated lines:  ";
    print ANN join(", ", sort(keys(%newpragma))), " */\n";
    $endif = 0;
    for my $srcline (@src)
      {
	# STRIP OUT previous #pragma independent blocks
	if ($endif && $srcline =~ /\#endif/)     { $endif--; next; }
	if ($srcline =~ /\#ifdef INDEP_PRAGMA/)  { $endif++; next; }
	next if ($srcline =~ /\#pragma independent/);

	print ANN $srcline;
      }
    continue
      {
	if (defined $newpragma{$line})
	  {
	    my @prags = @{$newpragma{$line}};
	    print ANN "#ifdef INDEP_PRAGMA\n";
	    while($p = shift @prags) {
	      print ANN "$p\n";
	    }
	    print ANN "#endif\n";
	    delete $newpragma{$line};
	  }
	$line++;	
      }
    }
}

if($do_list)
{
print sort {
  $a =~ /score: ([\d]+)/;
  my $score1 = $1;
  $b =~ /score: ([\d]+)/;
  my $score2 = $1;
  $score1 <=> $score2;
} @all_scores;
}
