5a6,7
> use strict;
>
9c11
< $javaexec = "/afs/cs.cmu.edu/user/cmonson/java"; # or just "java" if in your path
---
> my($javaexec) = "/afs/cs.cmu.edu/user/cmonson/java"; # or just "java" if in your path
11c13
< $dirdelim = ":"; # or ";" for Windows
---
> my($dirdelim) = ":"; # or ";" for Windows
14c16
< $morphdir = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Mapu-MT/Morph/";
---
> my($morphdir) = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Mapu-MT/Morph/";
32c34
<
---
> my(@classpaths);
36,42d37
< # Default server port number is 5781;
< # Include port number as argument if want different port
< if (scalar(@ARGV) == 1 and $ARGV[0] =~ m/^\d+$/) {
< $serverport = $ARGV[0];
< } else {
< $serverport = 5781;
< }
49a45,49
> my($HUMAN_READABLE) = $TRUE;
> my($SERVERPORT) = 5782;
>
> parseCommandLine(@ARGV);
>
52c52,53
< $server = IO::Socket::INET->new(LocalPort => $serverport,
---
> my($server);
> $server = IO::Socket::INET->new(LocalPort => $SERVERPORT,
56c57
< or die "Couldn't be a tcp server on port $serverport: $!\n";
---
> or die "Couldn't be a tcp server on port $SERVERPORT: $!\n";
58c59
< print "Starting Mapudungun morphology server on port $serverport\n"; #; $cachecount words cached\n";
---
> print STDOUT "Starting Mapudungun morphology server on port $SERVERPORT\n"; #; $cachecount words cached\n";
60,61c61
< #$SIG{CHLD} = sub { wait };
< $SIG{CHLD} = 'IGNORE';
---
> $SIG{CHLD} = sub { wait };
64,68c64,66
< $jarfile = $morphdir . "stable/MapudungunMorphologyAnalysis.jar";
<
<
< $stemDataFile = $morphdir . "MapudungunStemLexicon.txt";
< $suffixDataFile = $morphdir . "MapudungunSuffixLexicon.txt";
---
> my($jarfile) = $morphdir . "stable/MapudungunMorphologyAnalysis.jar";
> my($stemDataFile) = $morphdir . "activelyWorking/MapudungunStemLexicon-Azumchefi-Reconciled.txt";
> my($suffixDataFile) = $morphdir . "activelyWorking/MapudungunSuffixLexicon-Azumchefi-withSpanishGlosses.txt";
71a70
> my(@args);
73a73
> my($childpid, $childsOut, $childsIn);
85a86,91
> my($client, $kidpid);
> my($sentence);
> my(@words);
> my($wordPlusNewLine);
> my($resultFromMorphologyAnalyzer);
> my($toWriteToClient);
102,104d107
<
< print "\nExplicitly exiting...\n\n";
<
107a111,112
> $toWriteToClient = "";
>
122c127
< foreach $word (@words) {
---
> foreach my $word (@words) {
132,133d136
<
< print "got past sending the the word to the java morphological analyzer\n";
142,148c145,151
< # If the morphology analyzer couldn't analyze this word return a simple f-structure
< if ($resultFromMorphologyAnalyzer =~ /^\s*$/) {
< $uniqueMorphemeFStructuresWithTheirSpanTags = "( ( SPANSTART $spanTagBase ) ( SPANEND "
< . ($spanTagBase + 1)
< . " ) ( lex " . uc($word) . " ) ( pos lex ) ) ";
< $spanTagBase++;
< } else {
---
> if ($HUMAN_READABLE) {
>
> $toWriteToClient .= doHumanReadable($resultFromMorphologyAnalyzer);
>
> } else { # produce machine interpretable output
>
> my($uniqueMorphemeFStructuresWithTheirSpanTags);
150,156c153,154
< getUniqueMorphemeFStructuresAndAssignTheirSpanTags($resultFromMorphologyAnalyzer,
< $word,
< $spanTagBase);
< }
< print "After assigning Span Tags the f-structures are : |";
< print $uniqueMorphemeFStructuresWithTheirSpanTags;
< print "|\n";
---
> doMachineReadable($resultFromMorphologyAnalyzer, $word, $spanTagBase);
> $toWriteToClient .= $uniqueMorphemeFStructuresWithTheirSpanTags;
158c156
< $allUniqueCompleteFStructures .= $uniqueMorphemeFStructuresWithTheirSpanTags;
---
> }
161a160,170
> if ( ! $HUMAN_READABLE) {
> # post-processing to get output into the exact form that the transfer engine expects
> print $toWriteToClient;
> $toWriteToClient =~ s/\s+/ /g;
> $toWriteToClient =~ s/\" \)/\"\)/;
> $toWriteToClient =~ s/\( LEX\s+\)/\( LEX \"\"\)/;
>
> $toWriteToClient =~ s/[\r\n]*$/ /;
> $toWriteToClient =~ s/\( LEX /\( lex /;
> $toWriteToClient =~ s/\( POS /\( pos /;
> }
163,167c172,177
< # post-processing to get output into the exact form that the transfer engine expects
< print $allUniqueCompleteFStructures;
< $allUniqueCompleteFStructures =~ s/\s+/ /g;
< $allUniqueCompleteFStructures =~ s/\" \)/\"\)/;
< $allUniqueCompleteFStructures =~ s/\( LEX\s+\)/\( LEX \"\"\)/;
---
> if ( ! $HUMAN_READABLE) {
> print "\nThe Final f-structure of this sentence is:\n\n\t";
> } else {
> print "\nThe Human readable morphological analysis is:\n\n";
> }
> print "$toWriteToClient\n\n";
169,171c179,183
< $allUniqueCompleteFStructures =~ s/[\r\n]*$/ /;
< $allUniqueCompleteFStructures =~ s/\( LEX /\( lex /;
< $allUniqueCompleteFStructures =~ s/\( POS /\( pos /;
---
> # Send feature structures back to xfer engine OR human readable text back to client
> # that needs human readable text.
> print $client "$toWriteToClient\n";
>
> }
173,174c185,186
< print "\nThe Final f-structure of this sentence is:\n\n\t";
< print "$allUniqueCompleteFStructures\n\n";
---
> print "[Connection closed]\n";
> }
176,178c188,189
< # Send feature structures back to xfer engine
< print $client "$allUniqueCompleteFStructures\n";
< }
---
> sub doMachineReadable {
> my($resultFromMorphologyAnalyzer, $word, $spanTagBase) = @_;
180,182c191,208
< print "[Connection closed]\n";
< close($client);
< exit;
---
> # If the morphology analyzer couldn't analyze this word return a simple f-structure
> my($uniqueMorphemeFStructuresWithTheirSpanTags);
> if ($resultFromMorphologyAnalyzer =~ /^\s*$/) {
> $uniqueMorphemeFStructuresWithTheirSpanTags = "( ( SPANSTART $spanTagBase ) ( SPANEND "
> . ($spanTagBase + 1)
> . " ) ( lex " . uc($word) . " ) ( pos lex ) ) ";
> $spanTagBase++;
> } else {
> ($uniqueMorphemeFStructuresWithTheirSpanTags, $spanTagBase) =
> getUniqueMorphemeFStructuresAndAssignTheirSpanTags($resultFromMorphologyAnalyzer,
> $word,
> $spanTagBase);
> }
> print "After assigning Span Tags the f-structures are : |";
> print $uniqueMorphemeFStructuresWithTheirSpanTags;
> print "|\n";
>
> return ($uniqueMorphemeFStructuresWithTheirSpanTags, $spanTagBase);
184a211,212
> sub doHumanReadable {
> my($resultFromMorphologyAnalyzer) = @_;
186c214
< print "I should never get here in the perl morphology analyzer wrapper because I loop forever\n";
---
> my($humanReadable) = "";
188c216,273
< use strict;
---
> my($lex, $pos, $Sp);
>
> if ($resultFromMorphologyAnalyzer =~ /^\s*$/) {
> return "lo siento, pero aśn no puedo analizar esta palabra\n";
> }
>
> # The analyses returned from the Mapudungun java-implemented analyzer
> # are semi-colon separated
> my(@analyses) = split /\s*;\s*/, $resultFromMorphologyAnalyzer;
>
> foreach my $analysis (@analyses) {
>
> print STDOUT "\nDoing the analysis: $analysis\n\n";
>
> my(@morphemes) = split /\)\s*,\s*\(/, $analysis; # can't split just on , because the spanish-gloss may have commas in it
>
> my($firstMorpheme) = $TRUE;
> foreach my $morpheme (@morphemes) {
> print STDOUT " morpheme: $morpheme\n";
> $morpheme =~ /\(\s*lex \s*(\S+)\s*\)/;
> $lex = $1;
> if ($firstMorpheme) {
> $firstMorpheme = $FALSE;
> } else {
> $humanReadable .= "-";
> }
> $humanReadable .= $lex;
> }
> $humanReadable .= " \n"; # an extra return between analyses but \n\n means end of morphology to the server
> # so the space in " \n" is very important
>
> $firstMorpheme = $TRUE;
> foreach my $morpheme (@morphemes) {
> $morpheme =~ /\(\s*lex \s*(\S+)\s*\)/;
> $lex = $1;
> $humanReadable .= " $lex";
> if ($firstMorpheme) {
> $firstMorpheme = $FALSE;
> $morpheme =~ /\(\s*pos \s*(\S+)\s*\)/;
> $pos = $1;
> $humanReadable .= ", $pos";
>
> $morpheme =~ /\(\s*spanish \s*(.+?)\s*\)/; # +? is the non-greedy '+' quantifier
> $Sp = $1;
> $humanReadable .= ", $Sp\n";
> } else {
> $morpheme =~ /\(\s*spanish-gloss \s*(.+?)\s*\)/; # +? is the non-greedy '+' quantifier
> $Sp = $1;
> $humanReadable .= ", $Sp\n";
> }
> }
> $humanReadable .= " \n"; # an extra return between analyses but \n\n means end of morphology to the server
> # so the space in " \n" is very important
> }
> $humanReadable .= "\n\n";
>
> return $humanReadable;
> }
207c292
< # are comma separated
---
> # are semi-colon separated
376c461,500
< no strict;
---
> sub parseCommandLine {
> my(@argv) = @_;
>
> my($index) = 0;
> while ($index < scalar(@argv)) {
> if ($argv[$index] =~ /^((-h)|(-help))$/) {
> print STDERR "argv[$index] = " . $argv[$index] . "\n";
> printUsageAndExit();
> } elsif ($argv[$index] =~ /^-port$/) {
> $index++;
> if ($index < scalar(@argv)) {
> if ($argv[$index] =~ /^\d+/) {
> $SERVERPORT = $argv[$index];
> } else {
> print STDERR "\nThe -port flag requires a numeric port to be specified.\n";
> print STDERR "Instead " . $argv[$index] . " was specified\n\n";
> printUsageAndExit();
> }
> } else {
> print STDERR "\nThe -port flag requires a numeric port to be specified.\n";
> print STDERR "Instead nothing followed the -port flag\n\n";
> printUsageAndExit();
> }
> } elsif ($argv[$index] =~ /^-humanReadable$/) {
> $HUMAN_READABLE = $TRUE;
> }
>
> $index++;
> }
> }
>
> sub printUsageAndExit {
> print STDERR "\nUSAGE: perl mapu_morph.pl [-port ] [-humanReadable]\n\n";
> print STDERR " The default port number is: 5781\n";
> print STDERR " Use the -humanReadable flag if the output is to be interpreted by humans.\n";
> print STDERR " The default is machine readable which sports SPAN tags and *lots* of parenthses\n\n";
> print STDERR " Exiting...\n\n";
> exit(0);
> }
>