#!/usr/local/bin/perl

die "Include name of hypothesis file\n" if @ARGV == 0;

my($transfile); # = $ARGV[0];

my($line, $transline, $sentcount, $ncount, $trans, $scores, $meteor, $totalsents);

@nbests = ();

foreach $transfile (@ARGV) {

#print "#sentcount ncount prob rule fragpen lenpen meteor\n";
    
    if ($transfile =~ m/\.gz$/) {
	open(NBEST, "gunzip -c $transfile |") or die $!;
    } else {
	open(NBEST, "$transfile") or die $!;
    }
    
    %seentrans = ();
    $sentcount = -1;
    while (!eof(NBEST)) {
	while ($line = <NBEST>) {
	    last if $line !~ m/^\s*$/;
	}
	#print "First line: $line\n";
	
	if ($line =~ m/^SrcSent/i) {
	    print STDERR ".";
	    $transline = <NBEST>;
	    #print SCORE "\n" if $sentcount != -1;
	    %seentrans = ();
	} else {
	    $transline = $line;
	}
	
	next if $transline =~ m/^\s*$/;
	
	#print "Trans line $transline\n";
	chomp($transline);
	($sentcount, $ncount, $trans) = ($transline =~ m/^(\d+) (\d+)\t(.*)$/);
	
	$trans = lc($trans);
	$scores = <NBEST>; chomp($scores);
	
	
# Overall: -8.57367, Prob: -168.426, Rules: 2.69743, Frag: 0.291667, Length: 0.605446, Words: 12,24
	$transscore = 0;
	$transsgtscore = 0;
	$transtgsscore = 0;
	
	$prob = $1 if $scores =~ m/Prob: ([^,]+)/;
	$rulescore = $1 if $scores =~ m/Rules: ([^,]+)/;
	$transsgtscore = $1 if $scores =~ m/TransSGT: ([^,]+)/;
	$transtgsscore = $1 if $scores =~ m/TransTGS: ([^,]+)/;
	$fragpen = $1 if $scores =~ m/Frag: ([^,]+)/;
	$lenpen = $1 if $scores =~ m/Length: ([^,]+)/;
	$scores =~ m/Words: (\d+),(\d+)/;
	$srclen = $1; $tgtlen = $2;

	#$lenpen = &getLengthPenaltyHat($srclen, $tgtlen);
	$lenpen = $tgtlen - $srclen;
	
	$transsgtscore = -1000 if $transsgtscore =~ m/-inf/;
	$transtgsscore = -1000 if $transtgsscore =~ m/-inf/;
	
	#($overall, $prob, $rulescore, $fragpen, $lenpen, $srclen, $tgtlen) = ($scores =~ m/Overall: ([^,]+), Prob: ([^,]+), Rules: ([^,]+), Frag: ([^,]+), Length: ([^,]+), Words: (\d+),(\d+)/i);  
	
    #print SCORE ($prob/$tgtlen) . " " . ($transsgtscore/$srclen) . " " . ($transtgsscore/$tgtlen) . " $fragpen $lenpen\n";
    #print SCORE ($prob/$tgtlen) . " " . ($transtgsscore/$tgtlen) . " $fragpen $lenpen\n";
    #print SCORE ($prob/$tgtlen) . " $fragpen $lenpen\n"; #  $srclen $tgtlen\n";
    # Try also with non-normalized $prob
    
	if ($sentcount > $totalsents) {
	    $totalsents = $sentcount;
	}
    
    
	# Overall: -4.27181, Prob: -4.27181, Frag: 0.485714, Length: 0.590909
    
	# Read in trace, arc by arc
	$trace = <NBEST>; chomp($trace);
	$tracecount = 0;
	$rulecount = 0;
	$src = "";
	while ($trace !~ m/^\s*$/) {
	    #print "Trace $trace\n";
	    $tracecount++;
	    #$trace =~ m/: ([^\(]+) \(/;
	    #$src .= " " . $1;
	    (@rules) = ($trace =~ m/\(([^\(\),\'\" ]+,\d+)/g);
	    $rulecount += scalar(@rules);
	    $trace = <NBEST>;
	    chomp($trace);
	}
	$line = <NBEST>;

	if ($seentrans{$trans} != 1) {
	    $nbests[$sentcount] .= lc($trans) . "\n";
	    #$nbests[$sentcount] .= ($prob/$tgtlen) . " " . $rulescore . " " . ($transsgtscore/$srclen) . " " . ($transtgsscore/$tgtlen) . " $fragpen $lenpen\n";
	    #$nbests[$sentcount] .= $prob . " " . $rulescore . " " . $transsgtscore . " " . $transtgsscore . " $tracecount $lenpen\n";
	    #$nbests[$sentcount] .= ($prob/$tgtlen) . " " . $rulescore . " " . ($transsgtscore/$srclen) . " " . ($transtgsscore/$tgtlen) . " $tracecount $lenpen\n";
	    #$nbests[$sentcount] .= $prob . " " . ($rulescore*$rulecount) . " " . $transsgtscore . " " . $transtgsscore . " $tracecount $lenpen\n";
	    $nbests[$sentcount] .= $prob . " " . ($rulescore*$rulecount) . " " . $transsgtscore . " " . $transtgsscore . " $tracecount $tgtlen\n";
	}
	
	$seentrans{$trans} = 1;

	
	$src =~ s/^\s//;
	$src =~ s/\s$//;
	$src =~ s/\s\s/ /g;
    }
    close(NBEST);
}


$ARGV[0] =~ m/([^\/]+)$/;
print "Nbest count: ", scalar(@nbests), "\n";
my $scorefile = $1 . ".hyps2";
open(SCORE, "> /tmp/$scorefile") or die $!;
for ($i = 0; $i < scalar(@nbests); $i++) {
    print SCORE $nbests[$i], "\n";
}
close(SCORE);

print STDERR "\n";

sub getLengthPenaltyHat {
    my($srclen, $tgtlen) = @_;
    my $k1 = 0.5;
    my $k2 = 2.5;
    my $k = 5;
    my $mean = 1.2071;
    my $stddev = 0.2826;
    my $ratio = $tgtlen/$srclen;
    my($a, $b1, $b2);

    $a = $k/$stddev;
    $b1 = $a * ($mean - $k1*$stddev) - $k;
    $b2 = $a * ($mean + $k2*$stddev) - $k;

    my $lengthbonus = 1.0/((1.0+exp(-($a*$ratio) + $b1)) * (1.0+exp(($a*$ratio)-$b2)));

    my $lengthscore = log($lengthbonus)/log(10);

    return $lengthscore;
}
