#!/usr/local/bin/perl

$wordfile = "../lexlist-all.txt";
open(WL, "<:utf8", $wordfile) or die $!;
while ($line = <WL>) {
    chomp($line);
    $exists{$line} = 1;
}
close(WL);

$file = "wl.trans.txt";
$file = "ng.trans.txt";
$file = "asrbc.trans.txt";
$file = "asrbn.trans.txt";
open(FD, "<:utf8", $file) or die $!;
while ($line = <FD>) {
    chomp($line);
    @words = split(/\s+/, $line);
    foreach $word (@words) {
	next if $exists{$word};
	$wordfreq{$word}++;
    }
}
close(FD);

@sortedwords = sort {$wordfreq{$b} <=> $wordfreq{$a}} keys %wordfreq;

binmode(STDOUT, ":encoding(gbk)");
for ($i = 0; $i < @sortedwords && $i < 20; $i++) {
    print $sortedwords[$i], "\t", $wordfreq{$sortedwords[$i]}, "\n";
}
