5a6,7 > use strict; > 9c11 < $javaexec = "/afs/cs.cmu.edu/user/cmonson/java"; # or just "java" if in your path --- > my($javaexec) = "/afs/cs.cmu.edu/user/cmonson/java"; # or just "java" if in your path 11c13 < $dirdelim = ":"; # or ";" for Windows --- > my($dirdelim) = ":"; # or ";" for Windows 14c16 < $morphdir = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Mapu-MT/Morph/"; --- > my($morphdir) = "/afs/cs.cmu.edu/project/avenue-1/Avenue/Mapu-MT/Morph/"; 32c34 < --- > my(@classpaths); 36,42d37 < # Default server port number is 5781; < # Include port number as argument if want different port < if (scalar(@ARGV) == 1 and $ARGV[0] =~ m/^\d+$/) { < $serverport = $ARGV[0]; < } else { < $serverport = 5781; < } 49a45,49 > my($HUMAN_READABLE) = $TRUE; > my($SERVERPORT) = 5782; > > parseCommandLine(@ARGV); > 52c52,53 < $server = IO::Socket::INET->new(LocalPort => $serverport, --- > my($server); > $server = IO::Socket::INET->new(LocalPort => $SERVERPORT, 56c57 < or die "Couldn't be a tcp server on port $serverport: $!\n"; --- > or die "Couldn't be a tcp server on port $SERVERPORT: $!\n"; 58c59 < print "Starting Mapudungun morphology server on port $serverport\n"; #; $cachecount words cached\n"; --- > print STDOUT "Starting Mapudungun morphology server on port $SERVERPORT\n"; #; $cachecount words cached\n"; 60,61c61 < #$SIG{CHLD} = sub { wait }; < $SIG{CHLD} = 'IGNORE'; --- > $SIG{CHLD} = sub { wait }; 64,68c64,66 < $jarfile = $morphdir . "stable/MapudungunMorphologyAnalysis.jar"; < < < $stemDataFile = $morphdir . "MapudungunStemLexicon.txt"; < $suffixDataFile = $morphdir . "MapudungunSuffixLexicon.txt"; --- > my($jarfile) = $morphdir . "stable/MapudungunMorphologyAnalysis.jar"; > my($stemDataFile) = $morphdir . "activelyWorking/MapudungunStemLexicon-Azumchefi-Reconciled.txt"; > my($suffixDataFile) = $morphdir . "activelyWorking/MapudungunSuffixLexicon-Azumchefi-withSpanishGlosses.txt"; 71a70 > my(@args); 73a73 > my($childpid, $childsOut, $childsIn); 85a86,91 > my($client, $kidpid); > my($sentence); > my(@words); > my($wordPlusNewLine); > my($resultFromMorphologyAnalyzer); > my($toWriteToClient); 102,104d107 < < print "\nExplicitly exiting...\n\n"; < 107a111,112 > $toWriteToClient = ""; > 122c127 < foreach $word (@words) { --- > foreach my $word (@words) { 132,133d136 < < print "got past sending the the word to the java morphological analyzer\n"; 142,148c145,151 < # If the morphology analyzer couldn't analyze this word return a simple f-structure < if ($resultFromMorphologyAnalyzer =~ /^\s*$/) { < $uniqueMorphemeFStructuresWithTheirSpanTags = "( ( SPANSTART $spanTagBase ) ( SPANEND " < . ($spanTagBase + 1) < . " ) ( lex " . uc($word) . " ) ( pos lex ) ) "; < $spanTagBase++; < } else { --- > if ($HUMAN_READABLE) { > > $toWriteToClient .= doHumanReadable($resultFromMorphologyAnalyzer); > > } else { # produce machine interpretable output > > my($uniqueMorphemeFStructuresWithTheirSpanTags); 150,156c153,154 < getUniqueMorphemeFStructuresAndAssignTheirSpanTags($resultFromMorphologyAnalyzer, < $word, < $spanTagBase); < } < print "After assigning Span Tags the f-structures are : |"; < print $uniqueMorphemeFStructuresWithTheirSpanTags; < print "|\n"; --- > doMachineReadable($resultFromMorphologyAnalyzer, $word, $spanTagBase); > $toWriteToClient .= $uniqueMorphemeFStructuresWithTheirSpanTags; 158c156 < $allUniqueCompleteFStructures .= $uniqueMorphemeFStructuresWithTheirSpanTags; --- > } 161a160,170 > if ( ! $HUMAN_READABLE) { > # post-processing to get output into the exact form that the transfer engine expects > print $toWriteToClient; > $toWriteToClient =~ s/\s+/ /g; > $toWriteToClient =~ s/\" \)/\"\)/; > $toWriteToClient =~ s/\( LEX\s+\)/\( LEX \"\"\)/; > > $toWriteToClient =~ s/[\r\n]*$/ /; > $toWriteToClient =~ s/\( LEX /\( lex /; > $toWriteToClient =~ s/\( POS /\( pos /; > } 163,167c172,177 < # post-processing to get output into the exact form that the transfer engine expects < print $allUniqueCompleteFStructures; < $allUniqueCompleteFStructures =~ s/\s+/ /g; < $allUniqueCompleteFStructures =~ s/\" \)/\"\)/; < $allUniqueCompleteFStructures =~ s/\( LEX\s+\)/\( LEX \"\"\)/; --- > if ( ! $HUMAN_READABLE) { > print "\nThe Final f-structure of this sentence is:\n\n\t"; > } else { > print "\nThe Human readable morphological analysis is:\n\n"; > } > print "$toWriteToClient\n\n"; 169,171c179,183 < $allUniqueCompleteFStructures =~ s/[\r\n]*$/ /; < $allUniqueCompleteFStructures =~ s/\( LEX /\( lex /; < $allUniqueCompleteFStructures =~ s/\( POS /\( pos /; --- > # Send feature structures back to xfer engine OR human readable text back to client > # that needs human readable text. > print $client "$toWriteToClient\n"; > > } 173,174c185,186 < print "\nThe Final f-structure of this sentence is:\n\n\t"; < print "$allUniqueCompleteFStructures\n\n"; --- > print "[Connection closed]\n"; > } 176,178c188,189 < # Send feature structures back to xfer engine < print $client "$allUniqueCompleteFStructures\n"; < } --- > sub doMachineReadable { > my($resultFromMorphologyAnalyzer, $word, $spanTagBase) = @_; 180,182c191,208 < print "[Connection closed]\n"; < close($client); < exit; --- > # If the morphology analyzer couldn't analyze this word return a simple f-structure > my($uniqueMorphemeFStructuresWithTheirSpanTags); > if ($resultFromMorphologyAnalyzer =~ /^\s*$/) { > $uniqueMorphemeFStructuresWithTheirSpanTags = "( ( SPANSTART $spanTagBase ) ( SPANEND " > . ($spanTagBase + 1) > . " ) ( lex " . uc($word) . " ) ( pos lex ) ) "; > $spanTagBase++; > } else { > ($uniqueMorphemeFStructuresWithTheirSpanTags, $spanTagBase) = > getUniqueMorphemeFStructuresAndAssignTheirSpanTags($resultFromMorphologyAnalyzer, > $word, > $spanTagBase); > } > print "After assigning Span Tags the f-structures are : |"; > print $uniqueMorphemeFStructuresWithTheirSpanTags; > print "|\n"; > > return ($uniqueMorphemeFStructuresWithTheirSpanTags, $spanTagBase); 184a211,212 > sub doHumanReadable { > my($resultFromMorphologyAnalyzer) = @_; 186c214 < print "I should never get here in the perl morphology analyzer wrapper because I loop forever\n"; --- > my($humanReadable) = ""; 188c216,273 < use strict; --- > my($lex, $pos, $Sp); > > if ($resultFromMorphologyAnalyzer =~ /^\s*$/) { > return "lo siento, pero aśn no puedo analizar esta palabra\n"; > } > > # The analyses returned from the Mapudungun java-implemented analyzer > # are semi-colon separated > my(@analyses) = split /\s*;\s*/, $resultFromMorphologyAnalyzer; > > foreach my $analysis (@analyses) { > > print STDOUT "\nDoing the analysis: $analysis\n\n"; > > my(@morphemes) = split /\)\s*,\s*\(/, $analysis; # can't split just on , because the spanish-gloss may have commas in it > > my($firstMorpheme) = $TRUE; > foreach my $morpheme (@morphemes) { > print STDOUT " morpheme: $morpheme\n"; > $morpheme =~ /\(\s*lex \s*(\S+)\s*\)/; > $lex = $1; > if ($firstMorpheme) { > $firstMorpheme = $FALSE; > } else { > $humanReadable .= "-"; > } > $humanReadable .= $lex; > } > $humanReadable .= " \n"; # an extra return between analyses but \n\n means end of morphology to the server > # so the space in " \n" is very important > > $firstMorpheme = $TRUE; > foreach my $morpheme (@morphemes) { > $morpheme =~ /\(\s*lex \s*(\S+)\s*\)/; > $lex = $1; > $humanReadable .= " $lex"; > if ($firstMorpheme) { > $firstMorpheme = $FALSE; > $morpheme =~ /\(\s*pos \s*(\S+)\s*\)/; > $pos = $1; > $humanReadable .= ", $pos"; > > $morpheme =~ /\(\s*spanish \s*(.+?)\s*\)/; # +? is the non-greedy '+' quantifier > $Sp = $1; > $humanReadable .= ", $Sp\n"; > } else { > $morpheme =~ /\(\s*spanish-gloss \s*(.+?)\s*\)/; # +? is the non-greedy '+' quantifier > $Sp = $1; > $humanReadable .= ", $Sp\n"; > } > } > $humanReadable .= " \n"; # an extra return between analyses but \n\n means end of morphology to the server > # so the space in " \n" is very important > } > $humanReadable .= "\n\n"; > > return $humanReadable; > } 207c292 < # are comma separated --- > # are semi-colon separated 376c461,500 < no strict; --- > sub parseCommandLine { > my(@argv) = @_; > > my($index) = 0; > while ($index < scalar(@argv)) { > if ($argv[$index] =~ /^((-h)|(-help))$/) { > print STDERR "argv[$index] = " . $argv[$index] . "\n"; > printUsageAndExit(); > } elsif ($argv[$index] =~ /^-port$/) { > $index++; > if ($index < scalar(@argv)) { > if ($argv[$index] =~ /^\d+/) { > $SERVERPORT = $argv[$index]; > } else { > print STDERR "\nThe -port flag requires a numeric port to be specified.\n"; > print STDERR "Instead " . $argv[$index] . " was specified\n\n"; > printUsageAndExit(); > } > } else { > print STDERR "\nThe -port flag requires a numeric port to be specified.\n"; > print STDERR "Instead nothing followed the -port flag\n\n"; > printUsageAndExit(); > } > } elsif ($argv[$index] =~ /^-humanReadable$/) { > $HUMAN_READABLE = $TRUE; > } > > $index++; > } > } > > sub printUsageAndExit { > print STDERR "\nUSAGE: perl mapu_morph.pl [-port ] [-humanReadable]\n\n"; > print STDERR " The default port number is: 5781\n"; > print STDERR " Use the -humanReadable flag if the output is to be interpreted by humans.\n"; > print STDERR " The default is machine readable which sports SPAN tags and *lots* of parenthses\n\n"; > print STDERR " Exiting...\n\n"; > exit(0); > } >