#!/usr/local/bin/perl

## Induce pos tags for LEX

use strict;
use warnings;

binmode STDOUT,":utf8";

my %posmapP2L = (NOUN => 'N',OTHER => 'LEX',ADV => 'RB',PRON => 'PRP',VERB => 'V',FW => 'FW',ADJ => 'JJ',CONJ => 'CC',NUM => 'CD',WHQ => 'WH',POST => 'IN', DET => 'DET');

my $fileName = shift;
my $posFile = shift;
my %lexicon = ();
loadLex($fileName);

open(PFILE,"<:encoding(utf8)" ,$posFile) or die("Couldn't open pos dictionary\n");
my %posDict = ();
while(<PFILE>){
	chomp;
	my @tokens = split /\s+/;
	my $pnum = $#tokens/3;
	if($pnum > 1 and $tokens[3] > 20){
		my $pos = '';
		my $pmax = 0;
		for(my $i = 1; $i <= $pnum; $i++){
			if($tokens[$i*3-1] > $pmax){
				$pmax = $tokens[$i*3-1];
				$pos = $tokens[$i*3-2]
			}
		}
		$tokens[1] = $pos;
		$tokens[2] = $pmax;
	}
	push @{$posDict{$tokens[0]}}, @tokens[1..3];
# 	print "@tokens[1..$#tokens]\n";
}
close PFILE;

open(IFILE,"<:encoding(utf8)" ,$fileName) or die("Couldn't open the file $fileName\n");
while(<IFILE>){
	chomp;
	unless(/^(.+)::(.+) \|: \[\"(.+)\"\] -> \[\"(.+)\"\]$/){
		print $_,"\n";
		next;
	}
# 		print "$1 $2 $3 $4\n";
	if($3 =~ /"/ || $4 =~ /"/){
		print $_,"\n";
		next;
	}
	my $urd = $3; my $eng = $4; my $ptag = $1;
	if(defined $posDict{$urd}){
# 		print "Possible pos tags for $urd $eng: ", join(' ',@{$posDict{$urd}}),"\n";
		if($#{$posDict{$urd}} == 2 and ${$posDict{$urd}}[0] ne 'OTHER' and ${$posDict{$urd}}[0] ne 'FW' and ${$posDict{$urd}}[2] > 1){
			unless(${$posDict{$urd}}[0] eq 'VERB' and $urd !~ /\x{0646}\x{0627}$/){
				$ptag = $posmapP2L{${$posDict{$urd}}[0]};
				print STDERR "Assigned $urd $eng: $ptag ${$posDict{$urd}}[1] ${$posDict{$urd}}[2]\n";
			}
		}
		else{
			print STDERR "$urd $eng: ", join(' ',@{$posDict{$urd}}),"\n";
		}
	}
	else{
		foreach my $eng (keys %{$lexicon{$urd}}){
# 			print STDERR $urd,' ',$eng,' ',join(' ',@{${$lexicon{$urd}}{$eng}}),"\n";
		}
	}
	print "$ptag\::$ptag \|: \[\"$urd\"\] -> \[\"$eng\"\]\n";
}


sub loadLex{
	my $fileName = shift;
	open(IFILE,"<:encoding(utf8)" ,$fileName) or die("Couldn't open the file $fileName\n");
	while(<IFILE>){
		chomp;
		unless(/^(.+)::(.+) \|: \[\"(.+)\"\] -> \[\"(.+)\"\]$/){
			next;
		}
# 		print "$1 $2 $3 $4\n";
		if($3 =~ /"/ || $4 =~ /"/){
			next;
		}
		push @{${$lexicon{$3}}{$4}}, $1 ;
		print "Duplicate\n $1 $2 $3 $4\n" if($#{${$lexicon{$3}}{$4}} > 0);
	}
	close IFILE;
}