#!/usr/bin/perl

die "Must include name of elicitation tool file" if @ARGV == 0;

$etfile = $ARGV[0];

open(ET, $etfile) or die $!;
while ($line = <ET>) {
    $line =~ s/[\r\n ]*$//;
    if ($line eq "newpair") {
	$srcsent = <ET>; $srcsent =~ s/[\r\n ]*$//;
	$srcsent =~ s/srcsent: //;
	$tgtsent = <ET>; $tgtsent =~ s/[\r\n ]*$//;
	$tgtsent =~ s/tgtsent: //;
	$aligned = <ET>;
	$aligned =~ s/[\r\n ]*$//;
	$aligned =~ s/aligned: //;

	next if $srcsent eq "" or $tgtsent eq "";
	$sentencecount++;
	@srcwords = split(/\s+/, $srcsent);
	@tgtwords = split(/\s+/, $tgtsent);

	$srccount = scalar(@srcwords);
	$tgtcount = scalar(@tgtwords);

	$srccounttotal += $srccount;
	$tgtcounttotal += $tgtcount;

	@pairs = ($aligned =~ m/\(([^,\(\)]+,[^\)]+)\)/g);

	@srcfilled = ();
	@tgtfilled = ();
	foreach $pair (@pairs) {
	    #print $pair, "\n";
	    ($srcalign, $tgtalign) = split(/,/, $pair);
	    @srcindexes = split(/ /, $srcalign);
	    foreach $srcindex (@srcindexes) {
		$srcfilled[$srcindex-1] = 1;
	    }
	    @tgtindexes = split(/ /, $tgtalign);
	    foreach $tgtindex (@tgtindexes) {
		$tgtfilled[$tgtindex-1] = 1;
	    }
	}

	for ($i = 0; $i < $srccount; $i++) {
	    if ($srcfilled[$i] == 0) {
		$srcnull++;
	    }
	}

	for ($i = 0; $i < $tgtcount; $i++) {
	    if ($tgtfilled[$i] == 0) {
		$tgtnull++;
	    }
	}

    }

}
close(ET);

print "Number of sentences      = $sentencecount\n";
print "Number of source words   = $srccounttotal\n";
print "Number of target words   = $tgtcounttotal\n";
print "Source words aligned to NULL = $srcnull = ", (100*$srcnull/$srccounttotal), "\n";
print "Target words not aligned = $tgtnull =  ", (100*$tgtnull/$tgtcounttotal), "\n";
