#!/usr/local/bin/perl

die "Need name of file to use\n" unless @ARGV;



$initfile = "/afs/cs/project/avenue-1/Avenue/Transfer/Chinese/xfer3.ini";
@lexfiles = ();

open(INIT, "$initfile") or die $!;
while ($line = <INIT>) {
    if ($line =~ m/^loadlex/) {
	$line =~ s/\s*;.*$//;
	$line =~ m/^loadlex\s+(.*)\s*$/;
	$lexfile = $1;
	push @lexfiles, $lexfile;
    }
}
close(INIT);

foreach $lexfile (@lexfiles) {
    $lexfile =~ m/\/([^\/]+)$/;
    $lexshort = $1;
    print STDERR "$lexshort\n";
    open(LEX, "$lexfile") or next;
    while ($line = <LEX>) {
	if ($line =~ m/\{([^\}]+)\}/) {
	    $id = $1;
	    $idsrcfile{uc($id)} = $lexshort;
	    $posline = <LEX>;
	    $posline =~ m/\|:\s*\[([^\]]*)\]/;
	    $idsrclex{$id} = $1;
	}
    }
    close(LEX);
}


foreach $arg (@ARGV) {
    open(TRANS, $arg) or next;
    while ($line = <TRANS>) {
	if ($line =~ m/^\d+\s+0\t/) {
	    $scores = <TRANS>;
	    $line = <TRANS>;
	    while ($line =~ m/\(/) {
		(@lexids) = ($line =~ m/(\([^,\(\) ]+,\d+\s+\'[^\']*\'\))/ig);
		foreach $lexid (@lexids) {
		    $lexfreq{$lexid}++;
		}
		$line = <TRANS>
	    }
	}
    }
    close(TRANS);

}


foreach $lexid (sort {$lexfreq{$b} <=> $lexfreq{$a} } keys %lexfreq) {
    #next if $lexfreq{$lexid} == 1;
    $lexid =~ m/\((\w+,\d+) /;
    $lextype = $1;
    print "$lexfreq{$lexid}\t$lexid\t$idsrclex{$lextype}\t$idsrcfile{$lextype}\n";
    $srcfreqtypes{$idsrcfile{$lextype}} ++;
    $srcfreqtokens{$idsrcfile{$lextype}} += $lexfreq{$lexid};
}

print "\n";
foreach $srcfile (keys %srcfreqtypes) {
    print $srcfile, "\t", $srcfreqtypes{$srcfile}, "\t", $srcfreqtokens{$srcfile}, "\n";

}
