#!/usr/bin/perl -- -*- coding: utf-8 -*-

use English;
$OUTPUT_AUTOFLUSH = 1;
use lib "/afs/cs.cmu.edu/project/avenue-1/Avenue/Transfer/Arabic/Aragen-PAK";
use Aragen;

use IO::Socket::INET;
use Net::hostent;

my $bwdir = "/shared/code/Arabic/Morph/bama_ver2/data";

$diacritics = "FNKaui~o\`";
#use Lingua::EN::Inflect qw(PL_N);

use POSIX ":sys_wait_h";
#if (scalar(@ARGV) == 1 and $ARGV[0] =~ m/^\d+$/) {
#    $serverport = $ARGV[0];
#} else {
#    $serverport = 2006;
#}

#open2( \*Reader,\*Writer,"$basedir/morphg -cf $basedir/verbstem.list");
#Writer->autoflush();
#sleep 1;

#print Writer "dog+s_V\n";
#$got = <Reader>;
#print "Got $got\n";

my $mode="generate";
my $debug = "NO";
my $debuglexicon = "NO";
my $directory= "directory=$bwdir/";

$encoding = "buckwalter";
print "Directory $directory\n";
&Aragen::initialize($mode,$debug,$debuglexicon,$encoding,$directory);

print STDERR "# Using [Aragen 2.0] By Nizar Habash. Copyright (c) 2004 University of Maryland College Park.\n";

$| = 1;

$serverport = 4500;
$featureset = "h";
for ($i = 0; $i < @ARGV; $i++) {
    if ($ARGV[$i] eq "-fs" and $i+1 < @ARGV) {
	$featureset = $ARGV[$i+1];
    } elsif ($ARGV[$i] eq "-p" and $i+1 < @ARGV) {
	$serverport = $ARGV[$i+1];
    } elsif ($ARGV[$i] eq "-h") {
	print "Usage: genmorphArabic.pl -fs [h|p|c] -p portnum
  -fs  The feature set to use (h = Hebrew input)
  -p   port number \n";
    }
}

$person = "person";
$gender = "gender";
$case = "case";
$nom = "nom"; # nominative
$acc = "acc"; # accusative
$poss = "poss"; #possessive

# /afs/cs/user/cmonson/Research/Data/Arabic/arabic_resources/bama_ver2/data

%posmap = ("N", "N",
	   "PRO", "PN",
	   "ADJ", "AJ",
	   "ADV", "AV",
	   "V", "V",
	   "PREP", "P");

if ($featureset eq "Hebrew" or $featureset eq "h") {
    $person = "per";
    $gender = "gen";
    $masc = "masculine";
    $fem = "feminine";

    $case = "case";
    $nom = "nominative"; 
    $acc = "accusative"; 
    $gen = "genetive";
    $poss = "possessive";
    $pron = "pronominal"; 
    $reflex = "reflex"; 

    $num = "num";
    $sg = "singular";
    $pl = "plural";
    $dual = "dual";

    $tense = "tense";
    $past = "past";
    $future = "future";
    $infinitive = "inifinitive";
    $imperative = "imperative";
    $present = "present";


} 

%fsmap = ("+FEM", "($gender $fem)",
	  "+MASC", "($gender $masc)",

	  "F", "($gender $fem)",
	  "M", "($gender $masc)",

	  "+SG", "($num $sg)",
	  "+DU", "($num $dual)",
	  "+PL", "($num $pl)",

	  "S", "($num $sg)",
	  "D", "($num $dual)",
	  "P", "($num $pl)",

	  "1", "($person 1)",
	  "2", "($person 2)",
	  "3", "($person 3)",

	  "+NOM", "($case $nom)",
	  "+ACC", "($case $acc)",
	  "+GEN", "($case $gen)",

	  "+INDEF", "(def -)",
	  "+DEF", "(def +)",

	  "+POSS", "(poss +)",

	  "+PV", "(aspect perfect)",
	  "+IV", "(aspect imperfect)",	
	  "+CV", "(apsect imperative)",

	  "+PASS", "(voice passive)",

	  "+MOOD:I", "(mood inicative)",
	  "+MOOD:S", "(mood subjunctive)",
	  "+MOOD:J", "(mood jussive)");
		
	


my %cache = ();

$server = IO::Socket::INET->new(LocalPort => $serverport,
				Type => SOCK_STREAM,
				Reuse => 1,
				Listen => 12)
    or die "Couldn't be a tcp server on port $serverport: $!\n";

print STDERR "Starting Arabic Generation morphology server on port $serverport\n\n"; 

$SIG{CHLD} = 'IGNORE';
#$SIG{CHLD} = sub { wait };

#$morphinited = 0;

while ($client = $server->accept()) {
    if ($kidpid = fork) {
	close $client;
	next;
    }

    defined($kidpid) or die "cannot fork: $!";

    close $server;
    
    $hostinfo = gethostbyaddr($client->peeraddr);
    printf "[Connect from %s]\n", $hostinfo->name || $client->peerhost;

    while ($fs = <$client>) {
	$fs =~ s/[\r\n]*$//;
	if ($fs eq "**EXIT**") {
	    last;
	}
	print "Processing $fs\n";

	# Cache will only last for this connection
	if (defined($cache{$fs})) {
	    print $client $cache{$fs}, "\n";
	} else {
	    $genfss = &getGenMatches($fs);
	    if ($genfss eq "") { $genfss = $fs; }
	    print "Returning $genfss\n";
	    $cache{$fs} = $genfss;
	    print $client "$genfss\n";
	}

    }
    print "\n[Closing out connection]\n\n";
    close($client);
    exit;
}

close($server);


sub getGenMatches {
    my($fs) = shift;
    my($pos, $lex);
    my($derivation, $genfs, $genfss);


    if ($fs =~ m/\(POS\s+([^\)]+)\)/i) {
	$pos = $1;
	# Do any POS mapping?
	# ...
    } else {
	if ($fs =~ m/\(LEX\s+([^\)]+)\)/i) {
	    $lex = $1;
	    $lex = &normalize($lex);
	    $fs =~ s/\(LEX\s+([^\)]+)\)/"\(lex \"$lex\"\)"/ie;
	}
	return $fs;
    }


    if ($fs =~ m/\(LEX\s+\"\(\"\)/i or $fs =~ m/\(LEX\s+\"\)\"\)/i) {     # Deal with parens
	return $fs;
    } elsif ($fs =~ m/\(LEX\s+([^\)]+)\)/i) {
	$lex = $1;
    } else {
	return $fs; # No lex found
    }

    if ($lex =~ m/\s/) {
	$lex = &normalize($lex);
	$fs =~ s/\(LEX\s+([^\)]+)\)/"\(lex \"$lex\"\)"/ie;
	return $fs;
    }
    if ($lex =~ m/^\"$/) {
	$fs =~ s/\(LEX\s+([^\)]+)\)/(lex \"\\\"\")/i;
	return $fs;
    }

    if (uc($lex) eq "*PRO*") {
	return $fs;
    }

    if (uc($pos) eq "UNK") {
	$lex = lc($lex);
	# make Hebrew romanization more Arabic-like
	$lex =~ tr/abgdhwzx\@iklmns\&pcqr\$t/AbjdhwzHTyklmnsEfSqrst/; 
	$lex = &normalize($lex);
	$fs =~ s/\(LEX\s+([^\)]+)\)/"\(LEX \"$lex\"\)"/ie;
	return $fs;
    }

    if (uc($pos) eq "ADJ" or uc($pos) eq "PRO" or uc($pos) eq "N" or
	uc($pos) eq "ADV") {
	
	@combos = ();

	# Translate other feature/values
	if ($fs =~ m/\($gender\s+$masc\)/i) {
	    $combos[0][0] = "+MASC";
	} elsif ($fs =~ m/\($gender\s+$fem\)/i) {
	    $combos[0][0] = "+FEM";
	} else {
	    $combos[0][0] = "+MASC";
	    $combos[0][1] = "+FEM";
	    #$combos[0][2] = "";
	}

	if ($fs =~ m/\($num\s+$sg\)/i) {
	    $combos[1][0] = "+SG";
	} elsif ($fs =~ m/\($num\s+$dual\)/i) {
	    $combos[1][0] = "+DU";
	} elsif ($fs =~ m/\($num\s+$pl\)/i) {
	    $combos[1][0] = "+PL";
	} else {
	    $combos[1][0] = "+SG";
	    $combos[1][1] = "+DU";
	    $combos[1][2] = "+PL";
	    #$combos[1][3] = "";
	}
	
	
	if ($fs =~ m/\($case\s+$nom\)/i) {
	    $combos[2][0] = "+NOM";
	} elsif ($fs =~ m/\($case\s+$acc\)/i) {
	    $combos[2][0] = "+ACC";
	} elsif ($fs =~ m/\($case\s+$poss\)/i) {
	    $combos[2][0] = "+GEN";
	} else {
	    $combos[2][0] = "+NOM";
	    $combos[2][1] = "+ACC";
	    $combos[2][2] = "+GEN";
	    #$combos[2][3] = "";
	}
	
	if ($fs =~ m/\(def\s+\-\)/i) {
	    $combos[3][0] = "+INDEF";
	} elsif ($fs =~ m/\(def\s+\+\)/i) {
	    $combos[3][0] = "+DEF";
	} else {
	    $combos[3][0] = "+INDEF";
	    $combos[3][1] = "+DEF";
	}

	if ($fs =~ m/\(possession\s+poss\)/i) {
	    $combos[4][0] = "+POSS";
	} else {
	    $combos[4][0] = "+POSS";
	    $combos[4][1] = "";
	}

	# Run through all possible combinations
	$allpaths = 0;
	@pathcounter = (0, 0, 0, 0, 0);

	while (!$allpaths) {
	    
	    @newfs = ();
	    $agfs = "[$lex POS:$pos";
	    for ($pathindex = 0; $pathindex < 5; $pathindex++) {
		#print "$pathindex $pathcounter[$pathindex] $combos[$pathindex][$pathcounter[$pathindex]]\n";
		next if $combos[$pathindex][$pathcounter[$pathindex]] eq "";
		$agfs .= " " . $combos[$pathindex][$pathcounter[$pathindex]];
		if (defined($fsmap{$combos[$pathindex][$pathcounter[$pathindex]]}) and
		    $fsmap{$combos[$pathindex][$pathcounter[$pathindex]]} ne "") {
		    push @newfs, $fsmap{$combos[$pathindex][$pathcounter[$pathindex]]};
		}
	    }

	    $agfs .= "]";

	    print "Sending $agfs\n";
	    
	    my @genlexs = ();
	    @genlexs = &Aragen::AragenProcess($agfs);
	    print " Received: ", join(" ", @genlexs), "\n";
	    foreach $genlex (@genlexs) {
		$genfs = $fs;
		my $fullform = $genlex;
		$genlex = &normalize($genlex);
		
		$genfs =~ s/\(LEX ([^\)]+)\)/"(lex \"$genlex\")"/ie;
		$addfs = " (fullform \"$fullform\")";
		foreach $newpair (@newfs) {
		    #print "$newpair\n";
		    $escaped = $newpair;
		    $escaped =~ s/(\(|\)|\+)/\\$1/g;
		    if ($fs !~ m/$escaped/i) {
			$addfs .= " $newpair";
		    }
		}
		$genfs =~ s/\)$/"$addfs\)"/ie;

		$genfss .= " $genfs";    
	    }

	    $pathindex = 0;
	    $allpaths = 1;
	    while ($pathindex < 5) {
		if ($pathcounter[$pathindex]+1 < scalar(@{$combos[$pathindex]})) {
		    $pathcounter[$pathindex]++;
		    $allpaths = 0;
		    last;
		} else {
		    $pathcounter[$pathindex] = 0;
		    $pathindex++;
		}
	    }
	}


    } elsif (uc($pos) eq "V") {

	@combos = ();

	# Translate other feature/values
	if ($fs =~ m/\(aspect\s+perfect\)/i) {
	    $combos[0][0] = "+PV";
	} elsif ($fs =~ m/\(aspect\s+imperfect\)/i) {
	    $combos[0][0] = "+IV";
	} elsif ($fs =~ m/\(aspect\s+imperative\)/i) {
	    $combos[0][0] = "+CV";
	} else {
	    $combos[0][0] = "+PV";
	    $combos[0][1] = "+IV";
	    $combos[0][2] = "+CV";
	    #$combos[0][3] = "";
	}

	if ($fs =~ m/\(voice\s+passive\)/i) {
	    $combos[1][0] = "+PASS";
	} else {
	    $combos[1][0] = "+PASS";
	    $combos[1][1] = "";
	}
	
	
	if ($fs =~ m/\(mood\s+indicative\)/i) {
	    $combos[2][0] = "+MOOD:I";
	} elsif ($fs =~ m/\(mood\s+subjunctive\)/i) {
	    $combos[2][0] = "+MOOD:S";
	} elsif ($fs =~ m/\(mood\s+jussive\)/i) {
	    $combos[2][0] = "+MOOD:J";
	} else {
	    $combos[2][0] = "+MOOD:I";
	    $combos[2][1] = "+MOOD:S";
	    $combos[2][2] = "+MOOD:J";
	}
	
	if ($fs =~ m/\($person\s+1\)/i) {
	    $combos[3][0] = "1";
	} elsif ($fs =~ m/\($person\s+2\)/i) {
	    $combos[3][0] = "2";
	} elsif ($fs =~ m/\($person\s+3\)/i) {
	    $combos[3][0] = "3";
	} else {
	    $combos[3][0] = "1";
	    $combos[3][1] = "2";
	    $combos[3][2] = "3";
	}


	if ($fs =~ m/\($gender\s+$masc\)/i) {
	    $combos[4][0] = "M";
	} elsif ($fs =~ m/\($gender\s+$fem\)/i) {
	    $combos[4][0] = "F";
	} else {
	    $combos[4][0] = "M";
	    $combos[4][1] = "F";
	}

	if ($fs =~ m/\($num\s+$sg\)/i) {
	    $combos[5][0] = "S";
	} elsif ($fs =~ m/\($num\s+$dual\)/i) {
	    $combos[5][0] = "D";
	} elsif ($fs =~ m/\($num\s+$pl\)/i) {
	    $combos[5][0] = "P";
	} else {
	    $combos[5][0] = "S";
	    $combos[5][1] = "D";
	    $combos[5][2] = "P";
	}


	# Run through all possible combinations
	$allpaths = 0;
	@pathcounter = (0, 0, 0, 0, 0, 0);

	while (!$allpaths) {
	    
	    @newfs = ();
	    $agfs = "[$lex POS:$pos";
	    for ($pathindex = 0; $pathindex < scalar(@pathcounter); $pathindex++) {
		#print "$pathindex $pathcounter[$pathindex] $combos[$pathindex][$pathcounter[$pathindex]]\n";
		next if $combos[$pathindex][$pathcounter[$pathindex]] eq "";
		$agfs .= " " . $combos[$pathindex][$pathcounter[$pathindex]];
		if (defined($fsmap{$combos[$pathindex][$pathcounter[$pathindex]]}) and
		    $fsmap{$combos[$pathindex][$pathcounter[$pathindex]]} ne "") {
		    push @newfs, $fsmap{$combos[$pathindex][$pathcounter[$pathindex]]};
		}
	    }

	    $agfs =~ s/(\S) (\S) (\S)$/S:$1$2$3/;  # Group the subject values together

	    $agfs .= "]";

	    print "Sending $agfs\n";
	    
	    my @genlexs = ();
	    @genlexs = &Aragen::AragenProcess($agfs);
	    print " Received: ", join(" ", $genlexs);

	    foreach $genlex (@genlexs) {
		$genfs = $fs;
		my $fullform = $genlex;
		$genlex = &normalize($genlex);
		
		$genfs =~ s/\(LEX ([^\)]+)\)/"(lex \"$genlex\")"/ie;
		$addfs = " (fullform \"$fullform\")";
		foreach $newpair (@newfs) {
		    #print "$newpair\n";
		    $escaped = $newpair;
		    $escaped =~ s/(\(|\)|\+)/\\$1/g;
		    if ($fs !~ m/$escaped/i) {
			$addfs .= " $newpair";
		    }
		}
		$genfs =~ s/\)$/"$addfs\)"/ie;

		$genfss .= " $genfs";    
	    }

	    $pathindex = 0;
	    $allpaths = 1;
	    while ($pathindex < scalar(@pathcounter)) {
		if ($pathcounter[$pathindex]+1 < scalar(@{$combos[$pathindex]})) {
		    $pathcounter[$pathindex]++;
		    $allpaths = 0;
		    last;
		} else {
		    $pathcounter[$pathindex] = 0;
		    $pathindex++;
		}
	    }
	}
	

    } else { # some other part of speech
	$lex = &normalize($lex);
	$fs =~ s/\(LEX\s+([^\)]+)\)/"\(lex \"$lex\"\)"/ie;
	return $fs;
    }
    
    return $genfss;

}


sub normalize {
    my $arabic = shift;
    $arabic =~ s/[$diacritics]//g; # Remove "diacritics"
    $arabic =~ s/[A\>\<\|]/A/g;    # "Ambiguate" Alif Hamza
    $arabic =~ s/[Yy]/y/g;         # "Ambiguate" Alif maqsura forms

    return $arabic;
}
