#!/usr/local/bin/perl

## Enter a short description of the script

use strict;
use warnings;

binmode(STDOUT,":utf8");
binmode(STDERR,":utf8");

my %posmap = (NOUN => 'N',OTHER => 'LEX',ADV => 'RB',PRON => 'PRP',VERB => 'V',FW => 'FW',ADJ => 'JJ',CONJ => 'CC',NUM => 'CD',WHQ => 'WH',POST => 'IN', DET => 'DET');

my %nt = (NP => '1', VP => '1', PP => '1', ADJP => '1', ADVP => '1', S => '1', WHNP => '1');

my %posDict = ();
open(IFILE,"<:encoding(utf8)",'/afs/cs/project/avenue-1/Avenue/Urdu-MT/data/mono-pos-lexicon.filtered.num') or die("Couldn't find the phrase table\n");
while(<IFILE>){
	chomp;
	my @array = split /\s+/;
	for(my $i = 1; $i <= $#array; $i += 3){
		${$posDict{$array[0]}}{$posmap{$array[$i]}} = $array[$i+1];
	}
}
close IFILE;

my $fileName = shift;
my $inc = shift;
my $count = 0;
my $countOne = 0;
my $countOne2P = 0;
open(IFILE,"<:encoding(utf8)" ,$fileName) or die("Couldn't open the file $fileName\n");
while(<IFILE>){
	chomp;
	if(/^(.+)::(.+) \|: \[\"(.+)\" \] -> \[\"(.+)\" \]$/){
		my $utag = $1;my $uword = $3; my $eword = $4;
# 		print STDERR "Unmatched verb $_\n" if(/\|:/);
		if(defined $posDict{$uword} and !defined $nt{$utag}){
# 			next if(defined $nt{$utag});
			if(keys %{$posDict{$uword}} == 1){
				my @tags = keys %{$posDict{$uword}};
				if($tags[0] eq 'FW' || $tags[0] eq 'OTHER'){
					${$posDict{$uword}}{$utag} = 1;
					delete 	${$posDict{$uword}}{$tags[0]};
				}
				if($utag ne $tags[0]){
					$count++;
					next;
# 					print STDERR "Changing POS tag $1 to $tags[0] for $3 $4\n";
				}
			}
			else{
				next unless(defined ${$posDict{$3}}{$2});
# 				{
# # 					print STDERR "Keeping POS tag $1 $2\n"
# 				}
# 				else{
# 					$count++;
# 					print STDERR "Multiple POS with no consensus : $1 $3 $4\n";
# 				}
			}
		}
		else{
# 			print STDERR "Not found in POS dict\n";
		}
		print $_,"\n";
		my $line = <IFILE>;
		print $line;
		<IFILE>;<IFILE>;
		$line = <IFILE>;
		print $line;
		unless($3 =~ /"/){
			$countOne++;
			
# 			print STDERR "One word entry $3 $4\n";
			print "  ((x0 lex) = $3)\n";
			$countOne2P++ if($4 =~ /" /);
		}
		else{
			print "  ;((x0 lex) = multi)\n";
		}
		next;
	}
	print $_,"\n" unless($_ eq '1');
}
close IFILE;
print STDERR $count," $countOne $countOne2P\n";
