#!/usr/local/bin/perl

## Generate a dict file for use with GIZA

use strict;
use warnings;

my $mode = shift;

if($mode eq 'ldc'){
	my $ldcFile = shift;
	open(IFILE, $ldcFile) or die("Couldn't open the file $ldcFile\n");
	while(<IFILE>){
		chomp;
		my @tokens = split /\s+/;
		next if ($#tokens > 2);
		print "$tokens[2] $tokens[0]\n";
	}
	close IFILE;
}
else{
	my $srcVCB = shift;
	my $tgtVCB = shift;
	my $dictFile = shift;

	my %srcMap = ();
	my %tgtMap = ();
	loadVCB(\%srcMap,$srcVCB);
	loadVCB(\%tgtMap,$tgtVCB);
	
	open(IFILE, $dictFile) or die("Couldn't open the file $dictFile\n");
	open(OFILE, ">$dictFile.giza") or die("Couldn't open the file $dictFile.giza for writing\n");
	my %dict = ();
	my $uc = 0;
	my $ec = 0;
	while(<IFILE>){
		chomp;
		my $src = "";
		my $tgt = "";
		($src, $tgt) = split /\s+/ if($mode eq 'pl');
		($tgt, $src) = split /\s+/ if($mode eq 'rev');
		$tgt = lc($tgt);
		$src = lc($src);
		$uc++ unless(defined $srcMap{$src});
		print STDERR $tgt,"\n" unless(defined $tgtMap{$tgt});
		
		next unless(defined($srcMap{$src}) and defined($tgtMap{$tgt}));
		${$dict{$tgtMap{$tgt}}}{$srcMap{$src}} = 1;
	}
	close IFILE;
	
	foreach my $t (sort {$a <=> $b} keys %dict){
		foreach my $s (keys %{$dict{$t}}){
			print "$t $s\n";
		}
	}
	print STDERR "Unknown English $ec Unknown Urdu $uc\n";
}

sub loadVCB{
	my $hashRef = shift;
	my $vcbFile = shift;
	open(IFILE, $vcbFile) or die("Couldn't open the file $vcbFile\n");
	while(<IFILE>){
		chomp;
		(my $id, my $word, my $occ) = split /\s+/;
		${$hashRef}{$word} = $id
;	}
	close IFILE;
}