#!/usr/local/bin/perl

$metric = "b";

for ($i = 0; $i < @ARGV; $i++) {
    if ($ARGV[$i] eq "-m") { # scoring metric to use m for meteor, b for bleu
	if ($i+1 < @ARGV) {
	    $metric = $ARGV[$i+1];
	    $i++;
	}
    } else { # n-best hypothesis file
	push @sitefiles, $ARGV[$i];
	if (!-e $ARGV[$i]) {
	    die "$ARGV[$i] does not exist\n";
	}
    }
}



if (@sitefiles == 0) {

#@sitefiles = </shared/data/Chinese/Translations/dryrun-chinese07-text/hyps/*.top.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/dryrun-chinese07-text/hyps/*.bot.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.top.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.bot.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.dev.b_score>;
#@sitefiles = </shared/data/Chinese/GALE_GNG07/GnGEvalSplit/teamdata/chinese/*dev_hyp.mtf.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-arabic07-text/hyps/*.top.b_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-arabic07-text/hyps/*.bot.b_score>;

#@sitefiles = </shared/data/Chinese/Translations/dryrun-chinese07-text/hyps/*.top.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/dryrun-chinese07-text/hyps/*.bot.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.top.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.bot.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-chinese07-text/hyps/*.dev.m_score>;
#@sitefiles = </shared/data/Chinese/GALE_GNG07/GnGEvalSplit/teamdata/chinese/*dev_hyp.mtf.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-arabic07-text/hyps/*.top.m_score>;
#@sitefiles = </shared/data/Chinese/Translations/eval-arabic07-text/hyps/*.bot.m_score>;

@sitefiles = ("/shared/data/Chinese/GALE_GNG07/GnGEvalSplit/teamdata/chinese/rosetta_dev_hyp.mtf.t_score");
#push @sitefiles, "/shared/data/Chinese/Translations/eval-chinese07-text/hyps/transfer-planA-eval-chinese07-text.hyp.dev.t_score";

#push @sitefiles, "/shared/data/Chinese/Translations/eval-chinese07-text/hyps/MEMT-9487-planA-eval-chinese07-text-335055070.hyp.dev.t_score";
#push @sitefiles, "/shared/data/Chinese/Translations/eval-chinese07-text/hyps/MEMT-9487-planA-eval-chinese07-text-490712.hyp.dev.t_score";
push @sitefiles, "/shared/data/Chinese/Translations/eval-chinese07-text/hyps/MEMT-9487-planA-eval-chinese07-text-877109119.hyp.dev.t_score";
}

=comment
$reference = "/shared/data/Chinese/GALE_GNG07/GnGEvalSplit/gng07.dev.ref.sgm";
open(REF, "$reference") or die $!;
while ($line = <REF>) {
    if ($line =~ m/<seg[^>]*>([^<]+)</) {
	$ref = $1;
	$ref =~ s/\&quot;/\"/g;
	push @refs, $ref;
    }
}
close(REF);
=cut

#$target = "/shared/data/Chinese/Translations/eval-chinese07-text/hyps/transfer-planA-eval-chinese07-text.hyp.top";
$target = "transfer";
#$target = "EBMT";
#$target = "smt";
#$target = "rosetta";
$target = ".";

#$oracle = "/afs/cs/user/eepeter/sitecompare/oracles/eval-arabic07-text-bot-bleu-oracle.txt";

@maxscores = ();
@maxsites = ();
$maxtie = 0;
foreach $scorefile (sort @sitefiles) {
    print "Comparing $scorefile\n";
    $sitefile = $scorefile;
    $sitefile =~ s/\.._score$//;
    #print "Site file: $sitefile\n";
    open(SF, "$scorefile") or die $!;
    $count = 0;
    while ($line = <SF>) {
	chomp($line);
	($score, $trans) = split(/\t/, $line);
	if ($metric eq "t") { $score = -$score; }
	${$sitefile}[$count] = $score;
	${$sitefile . "-trans"}[$count] = $trans;
	if (!defined($maxscores[$count])) { $maxscores[$count] = -1000; }
	if ($score > $maxscores[$count]) {
	    $maxscores[$count] = $score;
	    @{$maxsites[$count]} = ();
	    push @{$maxsites[$count]}, $sitefile;
	} elsif ($score == $maxscores[$count]) {
	    push @{$maxsites[$count]}, $sitefile;
	}
	$count++;
    }
    close(SF);
}

$defaultfile = $sitefiles[0];
$defaultfile =~ s/\.._score$//;
$fortrans = $defaultfile;
$defaultsite = $defaultfile;
$defaultfile =~ m/\/([^\/]+)$/;
$defaultfile = $1;
#print "$sitefiles[0]  Default file $defaultfile, $defaultsite\n";

#open(ORACLE, "> $oracle") or die $!;

$sentenceties = 0;
for ($i = 0; $i < @maxsites; $i++) {
    #print "Maxsites $maxsites[$i]   ";
    if (@{$maxsites[$i]} > 1) {
	$sentenceties++;
	for ($j = 0; $j < @{$maxsites[$i]}; $j++) {
	    $maxsites[$i][$j] =~ m/\/([^\/]+)$/;
	    $justfile = $1;
	    $maxties{$justfile}++;
	    #print "$i $justfile $maxscores[$i]\n"; # (mindiff $mindiff)\n";
	}
    } else {
	$maxsites[$i][0] =~ m/\/([^\/]+)$/;
	$justfile = $1;
	$maxcounts{$justfile}++;
    }
}

#=comment
for ($i = 0; $i < @maxsites; $i++) {
    #print "Maxsites $maxsites[$i]   ";
    for ($j = 0; $j < @{$maxsites[$i]}; $j++) {
	$maxsites[$i][$j] =~ m/\/([^\/]+)$/;
	$justfile = $1;
	#print "Justfile $justfile\n";
	
	#if ($justfile eq "") {
	#    $maxcounts{"All-Systems-Scored-Zero"}++;
	#    print "$i All-Systems-Scored-Zero Zero 0\n";
	#    $maxsites[$i][0] = $defaultfile;
	    #print ORACLE ${$fortrans . "-trans"}[$i], "\n"
	#    
	#} else {
	
	$besttrans = ${$maxsites[$i] . "-trans"}[$i];
	if ($besttrans eq "") {
	    $maxsites[$i] = $defaultfile;
	    #print ORACLE "$defaultfile: ", ${$fortrans . "-trans"}[$i], "\n"
	} else {
	    #print ORACLE "$besttrans\n";
	}
	
	$mindiff = 100;
	for ($k = 0; $k < @sitefiles; $k++) {
	    $sitefile = $sitefiles[$k];
	    $sitefile =~ s/\.._score$//;
	    
	    if (#$maxsites[$i][$j] ne $sitefile and
		$maxscores[$i] - ${$sitefile}[$i] != 0 and
		$maxscores[$i] - ${$sitefile}[$i] < $mindiff) {
		$mindiff = $maxscores[$i] - ${$sitefile}[$i];
	    }
	}
	#}

	#if ($mindiff != 0) {
	#    $maxcounts{$justfile}++;
	#} else {
	#    $maxties{$justfile}++;
	#}

	#print "$i $justfile $maxscores[$i] (mindiff $mindiff)\n";


  	if ($justfile =~ m/$target/) {
 	    print "\n$i $justfile ", -$maxscores[$i], " (mindiff $mindiff)\n";
 	    for ($j = 0; $j < @sitefiles; $j++) {
 		$sitefile = $sitefiles[$j];
 		$sitefile =~ s/\.._score$//;
 		if ($sitefile =~ m/\/([^\/]+)(?=-planA)/i) {
		    $systemname = $1;
		} elsif ($sitefile =~ m/\/([^\/]+)(?=\.)/i) {
		    $systemname = $1;
		} else {
		    $systemname = $sitefile;
		}
 		print $systemname, "\t", -${$sitefile}[$i], "\t", ${$sitefile . "-trans"}[$i], "\n";
 	    }
	    print "Reference:\t1.0000\t" . $refs[$i] . "\n";
	}


    }
}
#=cut
#close(ORACLE);

print "\n";
foreach $maxcount (sort keys %maxcounts) {
    $percent = 1000 * $maxcounts{$maxcount}/scalar(@maxsites);
    $percent = int($percent)/10;
    print "$maxcount\t$maxcounts{$maxcount} of ", scalar(@maxsites), " ($percent), Ties $maxties{$maxcount}\n";
}

print "Sentence with ties: $sentenceties\n";
