#!/usr/local/bin/perl

## Combine and rescore the lexicon

use strict;
use warnings;

binmode STDOUT,":utf8";
my $file1Name = shift;
my $file2Name = shift;

my %lexicon = ();
my %srcCounts = ();

my %badKeys = ();
open(BFILE,"bad-keys") or die("Couldn't find bad-keys file\n");
while(<BFILE>){
	chomp;
	$badKeys{$_} = 1;
}
close BFILE;

loadFile($file1Name);
loadFile($file2Name);

foreach my $lkey (sort keys %lexicon){
	my $pos = ${$lexicon{$lkey}}{'pos'};
	my $urd = ${$lexicon{$lkey}}{'urd'};
	my $eng = ${$lexicon{$lkey}}{'eng'};
	my $count = ${$lexicon{$lkey}}{'count'};
	my $form = ${$lexicon{$lkey}}{'form'};
	my $lex = ${$lexicon{$lkey}}{'lex'};
	my $key = ${$lexicon{$lkey}}{'key'};
	my $ruleScore = $count/$srcCounts{$urd};
	
	unless(defined $badKeys{$key}){
		print "{$key}\n";
		print "$pos\::$pos |: [\"$urd\"] -> [\"$eng\"]\n";
		print "(\n\t(*score* $ruleScore) ;$count $srcCounts{$urd}\n";
		print "$form\t(X1::Y1)\n$lex)\n\n"; # \t$probStr\n
	}
}

sub loadFile{
	my $fileName = shift;
	print STDERR "Loading $fileName\n";
	open(IFILE, "<:encoding(utf-8)",$fileName) or die("Couldn't open the file $fileName\n");
	while(<IFILE>){
		chomp;
		/^{(.+)}$/;
		my $key = $1;
		my $line = <IFILE>;
# 		<IFILE>;
# 		my $scorel = <IFILE>;
# 		$scorel = /\(\*score\* (.+)\) ;/; 
		chomp $line;
		$line =~ s/[\x{FEFF}\x{202B}\x{202C}\x{202A}]//g;
		next unless($line =~ /^(.+)::(.+) \|: \[\"(.+)\"\] -> \[\"(.+)\"\]$/);
		if ($4 eq '-'){
			<IFILE>;<IFILE>;<IFILE>;<IFILE>;<IFILE>;<IFILE>;<IFILE>;
			next;
		}
		my $pos = $1;
		my $urd = $3;
		my $eng = lc($4);
		my $lkey = "$1__$3__$4";
		<IFILE>;
		my $scoreLine = <IFILE>;
		chomp($scoreLine);
		$scoreLine =~ /\(\*score\* (.+)\) ;(\d+) (\d+)/;
# 		print "$scoreLine $1 $2 $3\n";
		if(defined $lexicon{$lkey}){
			${$lexicon{$lkey}}{'count'} += $2;
# 			print "Duplicate Found $lkey $2 \n";
		}
		else{
			${$lexicon{$lkey}}{'count'} = $2;
			$srcCounts{$urd} += 1;
		}
		$srcCounts{$urd} += $2;
	
		${$lexicon{$lkey}}{'pos'} = $pos;
		${$lexicon{$lkey}}{'urd'} = $urd;
		${$lexicon{$lkey}}{'eng'} = $eng;
		${$lexicon{$lkey}}{'key'} = $key;
		my $form = <IFILE>;
		${$lexicon{$lkey}}{'form'} = $form;
		<IFILE>;
		my $lex = <IFILE>;
		${$lexicon{$lkey}}{'lex'} = lc($lex);
		<IFILE>;<IFILE>;
	}
	close IFILE;
}
