#!/usr/local/bin/perl

die "Need name of nbest file\n" if @ARGV == 0;

$nbest = $ARGV[0];
open(NBEST, $nbest) or die $!;

while ($line = <NBEST>) {
    if ($line =~ m/^\d+ 0\t/) {
	$sentcount++;
	$scores = <NBEST>;
	while ($line !~ m/^\s*$/) {
	    $line = <NBEST>;
	    last if $line =~ m/^\s*$/;
	    (@nps) = ($line =~ m/(CNP,\d+)/g);
	    $npcount += scalar(@nps);
	    foreach $np (@nps) {
		$npfreq{$np}++;
	    }
	}
    }
}

@nps = sort { $npfreq{$b} <=> $npfreq{$a} } keys %npfreq;
foreach $np (@nps) {
    print "$np $npfreq{$np}\n";
}

print "Sent count $sentcount, NP count $npcount, " . ($npcount/$sentcount) . " NPs/Sent\n";
