#!/usr/local/bin/perl

use File::Temp "tempdir";

#$meteordir = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Transfer/weights/meteor/meteor_version_0.4.3";
$meteordir = "/shared/code/meteor-0.5.1";

$printrefs = 0;

# For mt03:  -n file -r ~/avenue/Transfer/Chinese/mt03_chinese_evlset_v0-ref.sgm -p

# Read command line arguments
for ($i = 0; $i < @ARGV; $i++) {
    if ($ARGV[$i] eq "-r") { # reference file
	if ($i+1 < @ARGV) {
	    $srcref = $ARGV[$i+1];
	    if (!-e $srcref) {
		die "$srcref does not exist\n";
	    }
	    $i++;
	}
    } elsif ($ARGV[$i] eq "-n1") { # n-best hypothesis file
	if ($i+1 < @ARGV) {
	    $transfile1 = $ARGV[$i+1];
	    if (!-e $transfile1) {
		die "$transfile1 does not exist\n";
	    }
	    $i++;
	}
    } elsif ($ARGV[$i] eq "-n2") { # n-best hypothesis file
	if ($i+1 < @ARGV) {
	    $transfile2 = $ARGV[$i+1];
	    if (!-e $transfile2) {
		die "$transfile2 does not exist\n";
	    }
	    $i++;
	}
    } elsif ($ARGV[$i] eq "-o") { # scored n-best hypothesis file
	if ($i+1 < @ARGV) {
	    $scorefile = $ARGV[$i+1];
	    $i++;
	}
    } elsif ($ARGV[$i] eq "-b") { # best of the hypothesis, output to file
	if ($i+1 < @ARGV) {
	    $oraclefile = $ARGV[$i+1];
	    $i++;
	}
    } elsif ($ARGV[$i] eq "-p") {
	$printrefs = 1;
    } elsif ($ARGV[$i] eq "-s") {
	$set = $ARGV[$i+1];
	$i++;
    }
}

if ($set eq "h1") {
    $srcref = "/temuco/usr0/eepeter/Hebrew/e1.ref.sgm.new";
    $srclang = "Hebrew";
} elsif ($set eq "mt03") {
    $srcref = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Transfer/Chinese/mt03_chinese_evlset_v0-ref.sgm";
    $srclang = "Chinese";
} elsif ($set eq "mt03dev") {
    $srcref = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Transfer/Chinese/mt03_chinese_devset-ref.sgm";
    $srclang = "Chinese";
} elsif ($set eq "june02") {
    $srcref = "/temuco/usr0/rreynold/eval/TidesEval-Chinese-June2002-Ref.sgm";
    $srclang = "Chinese";
} elsif ($set eq "" and $srcref eq "") {
    die "Must specify test/dev name or reference set file to use\n";
}


if ($transfile1 eq "" or $transfile2 eq "") {
    &printHelp;
    die "Must specify the n-best hypothesis files\n";
}
if ($srcref eq "") {
    &printHelp;
    die "Must specify the reference file\n";
}
if ($scorefile eq "") {
    $scorefile = $transfile . ".score";
}

$srclang = "Source"; 
$totalsents = 0;

$refdir = tempdir( CLEANUP => 1 );  # Where to store the individual sentence reference files


&createSentRefs;
# Need to score sentences

if ($transfile1 =~ m/\.gz$/) {
    open(NBEST, "gunzip -c $transfile1 |") or die $!;
} else {
    open(NBEST, "$transfile1") or die $!;
}
$line = <NBEST>;
if ($line =~ m/SrcSent/i) {
    &scoreAll($transfile1, $transfile2);
} else {
    &scoreAllOut($transfile1);
}
close(NBEST);

exit;

sub printHelp {
    print "Usage: scorenbest.pl -r reference -n hypotheses -o outputfile [-b overallbest]\n";

}

# Deals with input file with xfer n-best format
sub scoreAll {
    my($transfile1, $transfile2) = @_;
    my($line, $transline, $sentcount, $ncount, $trans, $scores, $meteor, $totalsents);
    #my(@maxsents);
    #my(@maxscores);

    #print "#sentcount ncount prob rule fragpen lenpen meteor\n";

    if ($transfile1 =~ m/\.gz$/) {
	open(NBEST, "gunzip -c $transfile1 |") or die $!;
    } else {
	open(NBEST, "$transfile1") or die $!;
    }
    #$maxmeteor = -1;

    #open(OUT, "> $scorefile") or die $!;

    $sentcount = 0;
    $currentsent = 0;
    while (!eof(NBEST)) {
	$line = <NBEST>;

	if ($line =~ m/^SrcSent/i) {
	    #print OUT $line;
	    $currentsent++;
	    next;
	} elsif ($line =~ m/^\s*$/) {
	    #print OUT $line;
	    next;
	} elsif ($line =~ m/^\d+\s0\t/) {
	    $transline = $line;
	} elsif ($line =~ m/^\</ or $line =~ m/Overall:/) {
	    #print OUT $line;
	    next;
	} else {
	    next;
	}
	
	#print "Trans line $transline\n";
	chomp($transline);
	($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);

	#if ($ncount == 0) {
	#    $maxscores[$sentcount] = -1;
	# print STDERR "Processing sentence $sentcount\n"; 
	#}
	
	# Calculate a METEOR score for the sentence
	$meteor = &scoreMETEOR($sentcount, $trans);
	push @trans1scores, $meteor;

	#print OUT "$sentcount $ncount $meteor\t$trans\n";

    }

    close(NBEST);


    if ($transfile2 =~ m/\.gz$/) {
	open(NBEST, "gunzip -c $transfile2 |") or die $!;
    } else {
	open(NBEST, "$transfile2") or die $!;
    }
    #$maxmeteor = -1;

    #open(OUT, "> $scorefile") or die $!;

    $sentcount = 0;
    $currentsent = 0;
    while (!eof(NBEST)) {
	$line = <NBEST>;

	if ($line =~ m/^SrcSent/i) {
	    #print OUT $line;
	    $currentsent++;
	    next;
	} elsif ($line =~ m/^\s*$/) {
	    #print OUT $line;
	    next;
	} elsif ($line =~ m/^\d+\s0\t/) {
	    $transline = $line;
	} elsif ($line =~ m/^\</ or $line =~ m/Overall:/) {
	    #print OUT $line;
	    next;
	} else {
	    next;
	}
	
	#print "Trans line $transline\n";
	chomp($transline);
	($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);

	#if ($ncount == 0) {
	#    $maxscores[$sentcount] = -1;
	#print STDERR "Processing sentence $sentcount\n"; 
	#}
	
	# Calculate a METEOR score for the sentence
	$meteor = &scoreMETEOR($sentcount, $trans);
	push @trans2scores, $meteor;

	#print OUT "$sentcount $ncount $meteor\t$trans\n";

    }

    close(NBEST);


    for ($i = 0; $i < @trans1scores and $i < @trans2scores; $i++) {
	if ($trans2scores[$i] < $trans1scores[$i]) {
	    print "> $i\t$trans1scores[$i]\t$trans2scores[$i]\n";
	} elsif ($trans2scores[$i] > $trans1scores[$i]) {
	    print "< $i\t$trans1scores[$i]\t$trans2scores[$i]\n";
	}
    }


}


# Deals with input file in format used by .out.debug
sub scoreAllOut {
    my($transfile) = shift;
    my($line, $transline, $sentcount, $ncount, $trans, $scores, $meteor, $totalsents);
    my(@maxsents);
    my(@maxscores);

    #print "#sentcount ncount prob rule fragpen lenpen meteor\n";

    if ($transfile =~ m/\.gz$/) {
	open(NBEST, "gunzip -c $transfile |") or die $!;
    } else {
	open(NBEST, "$transfile") or die $!;
    }

    open(OUT, "> $scorefile") or die $!;

    while (!eof(NBEST)) {
	$line = <NBEST>;

	if ($line =~ m/^(\d+):/i) {
	    $sentcount = $1;
	    $maxscores[$sentcount] = -1;
	    $ncount = 0;
	    print OUT $line;
	    next;
	} elsif ($line =~ m/^sl:/) {
	    print OUT $line;
	    next;
	} elsif ($line =~ m/^tl:\s(.*)$/) {
	    $trans = $1;
	} elsif ($line =~ m/^\s*$/) {
	    print OUT $line;
	    next;
	} else {
	    next;
	}
	
	#print "Trans line $transline\n";
	chomp($transline);
	#($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);

	if ($ncount == 0) { print STDERR "Processing sentence $sentcount\n"; }
	
	# Calculate a METEOR score for the sentence
	$meteor = &scoreMETEOR($sentcount, $trans);

	print OUT "tl: $meteor\t$trans\n";

	if ($meteor > $maxscores[$sentcount]) {
	    $maxscores[$sentcount] = $meteor;
	    $maxsents[$sentcount] = $trans;
	}
	$ncount++;
	
    }

    close(NBEST);

    if ($oraclefile ne "") {
	open(ORACLE, "> $oraclefile") or die $!;
	foreach $maxsent (@maxsents) {
	    print ORACLE "$maxsent\n";
	}
	close(ORACLE);
	scoreXfer($oraclefile);
    }

}



# Score a particular sentence from the n-best list
sub scoreMETEOR {
    my($sentindex, $trans) = @_;
    my($reference, $hypfile, $sysid, $meteor);

    # Write out to a mini-document
    $sysid = "xfer";
    $hypfile = "/tmp/hypsent.sgm";

    open(HYP, "> $hypfile") or die $!;
    print HYP "<tstset setid=\"xfer_sent$sentnum\" srclang=\"$srclang\" trglang=\"English\">\n";
    print HYP "<DOC docid=\"SENT$sentindex\" sysid=\"$sysid\">\n";
    print HYP "<seg id=0> $trans </seg>\n";
    print HYP "</DOC>\n</tstset>\n";
    close(HYP);

    $reference = "$refdir/refs$sentindex.txt";

    # Score against reference file for just that sentence index
    $meteorcommand = "perl -I$meteordir $meteordir/meteor.pl -s $sysid -r $reference -t $hypfile 2> /dev/null |";
    #print "METEOR: $meteorcommand\n";
    $meteor = 0;
    open(SCORE, $meteorcommand) or die $!;
    while ($line = <SCORE>) {
	#print "$line";
	if ($line =~ m/^Score: (.*)$/) {
	    $meteor = $1;
	}
    }
    close(SCORE);

    return $meteor;
}


sub scoreXfer {
    my($arg) = shift;
    my($hypfile);
    my($sysid) = "xfer";

   if ($arg =~ m/sgm$/i) {
        $hypfile = $arg;
    } elsif ($arg =~ m/txt$/i) {
        $hypfile = "/tmp/xfer-hypfile.sgm";
        `perl /afs/cs.cmu.edu/project/avenue-1/Avenue/Transfer/mkref2tst.pl $arg $srcref $sysid > $hypfile`;
    }

    # METEOR
    my($meteorcommand) = "perl -I/avenue/usr2/shared/Evaluation/MEMT /avenue/usr2/shared/Evaluation/MEMT/meteor.pl -d 10 -s $sysid -r $srcref -t $hypfile 2> /dev/null |";
#print "METEOR: $meteorcommand\n";
    open(SCORE, $meteorcommand) or die $!;
    while ($line = <SCORE>) {
        #print "$line";
        if ($line =~ m/^Score,(.*)$/) {
            $meteor = $1;
        }
    }
    close(SCORE);
    # Bleu

    $evalRoot = "/afs/cs.cmu.edu/usr/joy/Eval/evaluationScripts";
    # "$evalRoot/nist-v11/mteval-v11a-cmufix_b.pl"
    open(SCORE, "perl $evalRoot/bleuv09/bleu-v09e.pl -s $sysid -t $hypfile -r $srcref |") or die $!;
    while ($line = <SCORE>) {
        if ($line =~ m/^BLEU,/) {
            $line =~ m/^BLEU,(.+)$/;
            $bleu = $1;
            #print $line;

        } elsif ($line =~ m/^Modified/) {
            $line =~ m/^Modified BLEU,(.+)$/;
            $modified = $1;
            #print $line;
        }
    }
    close(SCORE);
    print "$arg\t$meteor\t$modified\t$bleu\n\n";


}


# Create appropriate ref file for each sent
sub createSentRefs {
    my($i);
    my($sentcount);

    open(REF, $srcref) or die "$srcref: $!";
    while ($line = <REF>) {
	$line =~ s/[\r\n]*$//;
	if ($line =~ m/sysid=\"(\w+)\"/) {
	    $sysid = $1;
	}
	if ($line =~ m/<seg/) {
	    $line =~ s/<seg(\s+id=\d+)?>\s*//;
	    $line =~ s/\s*<\/seg>//;
	    push @{$refsets{$sysid}}, $line;
	}
    }
    close(REF);

    foreach $sysid (sort keys %refsets) { 
	$sentcount = @{$refsets{$sysid}};
    }


    for ($sentnum = 0; $sentnum < $sentcount; $sentnum++) {
	open(NEWREF, "> $refdir/refs$sentnum.txt") or die $!;
	print NEWREF "<refset setid=\"XFER.ref\" srclang=\"$srclang\" trglang=\"English\">\n";
	foreach $sysid (sort keys %refsets) { 
	    print NEWREF "<DOC docid=\"SENT$sentnum\" sysid=\"$sysid\">\n";
	    print NEWREF "<seg id=0> " . $refsets{$sysid}[$sentnum] . " </seg>\n";
	    print NEWREF "</DOC>\n";
	}
	print NEWREF "</refset>\n";
	close(NEWREF);
    }
}


# Given a set of weights, extract the best sentence from each n-best set
sub extractBest {
    my($probweight, $ruleweight, $fragweight, $lenweight) = @_;
    my($maxscore) = -1;
    my($maxindex) = 0;
    my($i, $j, $thisscore);
    my($totalsents) = scalar(@fragpens);


    my(@maxindexes) = ();
    for ($i = 0; $i < $totalsents; $i++) {
	$maxscore = -1;
	$maxindex = -1;
	for ($j = 0; $j < @{$probs[$i]}; $j++) {
	    $thisscore = ($probweight * $probs[$i][$j]) + 
		($ruleweight * $rulescores[$i][$j]) + 
		($fragweight * $fragpens[$i][$j]) +
		($lenweight * $lenpens[$i][$j]);
	    
	    if ($thisscore > $maxscore) {
		$maxscore = $thisscore;
		$maxindex = $j;
	    }
	}
	push @maxindexes, $maxindex;
    }
    
    open(NBEST, $transfile) or die $!;
    while ($line = <NBEST>) {
	chomp($line);
	if ($line =~ m/^(\d+)\s+(\d+)\t(.*)$/) {
	    ($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);
	    if ($maxindexes[$sentcount] == $ncount) {
		print $trans;
	    }
	}
    }
    close(NBEST);


}



# Given a set of weights, extract the best sentence from each n-best set
sub extractBestMETEOR {
    my($maxscore) = -1;
    my($maxindex) = 0;
    my($i, $j, $thisscore);
    my($totalsents) = scalar(@fragpens);


    my(@maxindexes) = ();
    for ($i = 0; $i < $totalsents; $i++) {
	$maxscore = -1;
	$maxindex = -1;
	for ($j = 0; $j < @{$probs[$i]}; $j++) {
	    if ($meteorscores[$i][$j] > $maxscore) {
		$maxscore = $thisscore;
		$maxindex = $j;
	    }
	}
	push @maxindexes, $maxindex;
    }
    
    open(NBEST, $transfile) or die $!;
    while ($line = <NBEST>) {
	chomp($line);
	if ($line =~ m/^(\d+)\s+(\d+)\t(.*)$/) {
	    ($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);
	    if ($maxindexes[$sentcount] == $ncount) {
		print $trans;
	    }
	}
    }
    close(NBEST);


}
