#!/usr/local/bin/perl -w -- -*- coding: utf-8 -*-

# Spanish Morphological Generator Server
# March, 2005 by Erik Peterson

use IO::Socket::INET;
use Net::hostent;
use FileHandle;
use POSIX ":sys_wait_h";

#use Encode;

#use POSIX qw(locale_h);
#setlocale(LC_CTYPE, "es_ES");
#setlocale(LC_COLLATE, "es_ES");

$utf8 = 0;

sub ucLatin1 {
    my($word) = shift;
    my($ucword) = $word;
    my(%lc2uc) = ("a", "A", "b", "B", "c", "C", "d", "D", "e", "E",
		  "f", "F", "g", "G", "h", "H", "i", "I", "j", "J",
		  "k", "K", "l", "L", "m", "M", "n", "N", "o", "O",
		  "p", "P", "q", "Q", "r", "R", "s", "S", "t", "T",
		  "u", "U", "v", "V", "w", "W", "x", "X", "y", "Y",
		  "z", "Z",
		  "\xe0", "\xc0", "\xe1", "\xc1", "\xe2", "\xc2", "\xe3", "\xc3", 
		  "\xe4", "\xc4", "\xe5", "\xc5", "\xe6", "\xc6", "\xe7", "\xc7", 
		  "\xe8", "\xc8", "\xe9", "\xc9", "\xea", "\xca", "\xeb", "\xcb", 
		  "\xec", "\xcc", "\xed", "\xcd", "\xee", "\xce", "\xef", "\xcf", 
		  "\xf0", "\xd0", "\xf1", "\xd1", "\xf2", "\xd2", "\xf3", "\xd3", 
		  "\xf4", "\xd4", "\xf5", "\xd5", "\xf6", "\xd6", "\xf7", "\xd7", 
		  "\xf8", "\xd8", "\xf9", "\xd9", "\xfa", "\xda", "\xfb", "\xdb", 
		  "\xfc", "\xdc", "\xfd", "\xdd", "\xfe", "\xde", "\xff", "\xdf", 
		  );
    $ucword =~ s/([a-z\xe0-\xff])/$lc2uc{$1}/eg;

    return $ucword;
}

# hex2utf8:  Take a string of 4 hex digits (0-9A-F) and convert it
# to the corresponding (1, 2, or 3 byte) UTF-8 representation.

sub hex2utf8 {
    my($hexchar) = shift;
    my($binchar, $retval, $bin1, $bin2, $bin3);
    
    if ($hexchar !~ m/^0x/) {
	$hexchar = "0x" . $hexchar;
    }
    #print STDERR $hexchar ."\n";
    $binchar = oct($hexchar);
    if ($binchar <= 127) {
	$retval = pack("C", $binchar);
    } elsif ($binchar <= 2047) {
	$bin1 = $binchar;
	$bin1 >>= 6;
	$bin1 |= 0xC0;
	$bin2 = $binchar;
	$bin2 &= 0x3F;
	$bin2 |= 0x80;
	$retval = pack("C2", $bin1, $bin2);
    } else {
	$bin1 = $binchar;
	$bin1 >>= 12;
	$bin1 |= 0xE0;
	$bin2 = $binchar;
	$bin2 &= 0x0FFF;
	$bin2 >>= 6;
	$bin2 |= 0x80;
	$bin3 = $binchar;
	$bin3 &= 0x003F;
	$bin3 |= 0x80;
	$retval = pack("C*", $bin1, $bin2, $bin3);
    }
    return $retval;
}

@accented = ("0xe0", "0xc0", "0xe1", "0xc1", "0xe2", "0xc2", "0xe3", "0xc3", 
	     "0xe4", "0xc4", "0xe5", "0xc5", "0xe6", "0xc6", "0xe7", "0xc7", 
	     "0xe8", "0xc8", "0xe9", "0xc9", "0xea", "0xca", "0xeb", "0xcb", 
	     "0xec", "0xcc", "0xed", "0xcd", "0xee", "0xce", "0xef", "0xcf", 
	     "0xf0", "0xd0", "0xf1", "0xd1", "0xf2", "0xd2", "0xf3", "0xd3", 
	     "0xf4", "0xd4", "0xf5", "0xd5", "0xf6", "0xd6", "0xf7", "0xd7", 
	     "0xf8", "0xd8", "0xf9", "0xd9", "0xfa", "0xda", "0xfb", "0xdb", 
	     "0xfc", "0xdc", "0xfd", "0xdd", "0xfe", "0xde", "0xff", "0xdf");

sub u8toLatin1 {
    my($u8str) = @_;
    my($l1str, $i);
    foreach $l1 (@accented) {
	$u82l1{hex2utf8($l1)} = chr(oct($l1));
    }
    for ($i = 0; $i < length($u8str); $i++) {
	if (vec($u8str, $i, 8) > 127) {
	    $l1str .= $u82l1{substr($u8str, $i, 2)};
	    $i++;
	} else {
	    $l1str .= substr($u8str, $i, 1);
	}
    }

    return $l1str;
}

sub Latin1tou8 {
    my($l1str) = @_;
    my($u8str, $i);
    foreach $l1 (@accented) {
	$l12u8{chr(oct($l1))} = hex2utf8($l1);
    }
    for ($i = 0; $i < length($l1str); $i++) {
	if (vec($l1str, $i, 8) > 127) {
	    $u8str .= $l12u8{substr($l1str, $i, 1)};
	} else {
	    $u8str .= substr($l1str, $i, 1);
	}
    }

    return $u8str;
}


# ARI: SET $spanishfile here when not running on Temuco or Avenue
$spanishfile = "maco-sp-girat";

# Let it run on Temuco and Avenue
if (-e "/shared/Spanish/maco-sp-girat") {  # For running on Avenue
    $spanishfile = "/shared/Spanish/maco-sp-girat";
} elsif (-e "/avenue/usr2/shared/Spanish/maco-sp-girat") {  # For running on Temuco
    $spanishfile = "/avenue/usr2/shared/Spanish/maco-sp-girat";
} elsif (!-e $spanishfile) {
    die "Can't find Spanish generation file: $spanishfile\n";
}

$serverport = 3130;

for ($i = 0; $i < @ARGV; $i++) {
    if ($ARGV[$i] =~ m/^\d+$/) {
	$serverport = $ARGV[0];
    } elsif ($ARGV[$i] eq "-u8") {
	print "Using UTF-8 mode\n";
	$utf8 = 1;
    }
}


sub initMorph {
    my($filepos) = 0;

    open(SPAN, $spanishfile) or die $!;
    $prevcite = "";
    while ($line = <SPAN>) {
	($citation, $rest) = split(/\#/, $line);
	if ($prevcite ne $citation) {
	    $citepos{ucLatin1($citation)} = $filepos;
	    #print "$citation\t$filepos\n";
	    $prevcite = $citation;
	}

	$filepos = tell(SPAN);
    }
    close(SPAN);
    print "Citation forms: ", scalar(keys %citepos), "\n";

    
    %fvs = ("A:2:Q", "(type qual)",
	    "A:3:A", "(degree appreciative)",
	    "A:4:M", "(gender masc)",
	    "A:4:F", "(gender fem)",
	    "A:5:S", "(number sg)",
	    "A:5:P", "(number pl)",
	    #"A:6:P", "(form part)",
	    # Nothing for ADV
	    # DET
	    "T:2:D", "(type def)",
	    "T:3:M", "(gender masc)",
	    "T:3:F", "(gender fem)",
	    "T:4:D", "(number sg)",
	    "T:4:D", "(number pl)",

	    "D:2:D", "(type dem)",
	    "D:2:P", "(type poss)",
	    "D:2:T", "(type interr)",
	    "D:2:E", "(type exclam)",
	    "D:2:I", "(type indef)",

	    "D:3:D", "(person 1)",
	    "D:3:D", "(person 2)",
	    "D:3:D", "(person 3)",

	    "D:4:M", "(gender masc)",
	    "D:4:F", "(gender fem)",

	    "D:5:S", "(number sg)",
	    "D:5:P", "(number pl)",

	    "D:7:1", "(possor ((person 1) (number sg)))",
	    "D:7:2", "(possor ((person 2) (number sg)))",
	    "D:7:0", "(possor ((person 3)))",
	    "D:7:4", "(possor ((person 1) (number pl)))",
	    "D:7:5", "(possor ((person 2) (number pl)))",

	    # N
	    "N:2:C", "(type common)",
	    "N:2:P", "(type proper)",
	    "N:3:M", "(gender masc)",
	    "N:3:F", "(gender fem)",
	    "N:4:S", "(number sg)",
	    "N:4:P", "(number pl)",
	    #"N:4:N", "(number inv)",
	    #"N:7:A", "(degree appreciative)",
	    # Verbs
	    "V:2:M", "(type main)",
	    "V:2:A", "(type aux)",
	    "V:2:S", "(type ser)",
	    "V:3:I", "(mood ind)",
	    "V:3:S", "(mood subj)",
	    "V:3:M", "(mood imper)",
	    "V:3:C", "(mood cond)",
	    "V:3:N", "(mood inf)",
	    "V:3:G", "(mood ger)",
	    "V:3:P", "(mood part)",
	    "V:4:P", "(tense pres)",
	    "V:4:I", "(tense imperf)",
	    "V:4:F", "(tense fut)",
	    "V:4:S", "(tense past)",
	    "V:4:C", "(tense cond)",
	    "V:5:1", "(person 1)",
	    "V:5:2", "(person 2)",
	    "V:5:3", "(person 3)",
	    "V:6:S", "(number sg)",
	    "V:6:P", "(number pl)",
	    "V:7:M", "(gender masc)",
	    "V:7:F", "(gender fem)",
	    # Pron
	    "P:2:P", "(type personal)",
	    "P:2:D", "(type dem)",
	    "P:2:X", "(type poss)",
	    "P:2:I", "(type indef)",
	    "P:2:T", "(type interr)",
	    "P:2:R", "(type rel)",
	    "P:3:1", "(person 1)",
	    "P:3:2", "(person 2)",
	    "P:3:3", "(person 3)",
	    "P:4:M", "(gender masc)",
	    "P:4:F", "(gender fem)",
	    "P:5:S", "(number sg)",
	    "P:5:P", "(number pl)",
	    "P:6:N", "(case nom)",
	    "P:6:A", "(case acc)",
	    "P:6:D", "(case dat)",
	    "P:6:O", "(case obl)",
	    "P:7:1", "(possor ((person 1) (number sg)))",
	    "P:7:2", "(possor ((person 2) (number sg)))",
	    "P:7:0", "(possor ((person 3)))",
	    "P:7:4", "(possor ((person 1) (number pl)))",
	    "P:7:5", "(possor ((person 2) (number pl)))",
	    "P:8:P", "(polite +)",
	    # Conj
	    "C:2:C", "(type coord)",
	    "C:2:S", "(type subord)",
	    # Num
	    "M:2:C", "(type card)",
	    "M:2:O", "(type ord)",
	    "M:3:M", "(gender masc)",
	    "M:3:F", "(gender fem)",
	    "M:4:S", "(number sg)",
	    "M:4:P", "(number pl)",
	    # Prep
	    "S:3:S", "(form simple)",
	    "S:3:C", "(form contract)",
	    "S:4:M", "(gender masc)",
	    "S:5:S", "(number sg)");

}


#$Expect::Log_Stdout = 0;
#$Expect::Multiline_Matching = 0;


$| = 1;

&initMorph;

$server = IO::Socket::INET->new(LocalPort => $serverport,
				Type => SOCK_STREAM,
				Reuse => 1,
				Listen => 12)
    or die "Couldn't be a tcp server on port $serverport: $!\n";

print "Starting Generation morphology server on port $serverport\n\n"; #"; $cachecount words cached\n\n";

# For Cygwin
#$SIG{CHLD} = 'IGNORE';

# For Linux
$SIG{CHLD} = sub { wait };

#$morphinited = 0;

while ($client = $server->accept()) {
    if ($kidpid = fork) {
	close $client;
	next;
    }

    defined($kidpid) or die "cannot fork: $!";

    close $server;
    
    $hostinfo = gethostbyaddr($client->peeraddr);
    printf "[Connect from %s]\n", $hostinfo->name || $client->peerhost;
    if ($hostinfo->name !~ m/cmu.edu$/i && $hostinfo->name !~ m/^localhost$/i) {
	print $client "[Not from CMU.  Closing out connection]\n\n";
	print "[Not from CMU.  Closing out connection]\n\n";
	close($client);
	exit;
    }


    while ($fs = <$client>) {
	if ($fs eq "**EXIT**") {
	    last;
	}
	
	$fs =~ s/[\r\n]*$//;
	
	if ($utf8 == 1) {
	    $fs = u8toLatin1($fs);
	}


	print "Processing $fs\n";

	$genfs = &getGenMatches($fs);

	$genfs = ucLatin1($genfs);
	print "Returning $genfs\n";


	if ($utf8 == 1) {
	    $genfs = Latin1tou8($genfs);
	}


	print $client "$genfs\n";

    }
    print "\n[Closing out connection]\n\n";
    close($client);
    exit;
}

close($server);


sub getGenMatches {
    my($fs) = shift;
    my($pos, $lex);
    my($derivation, $derivedfs, $derivedfss, $search, $search2);
    $derivedfs = "";
    $search2 = "";

    if ($fs =~ m/\(POS\s+([^\)]+)\)/i) {
	$pos = $1;
    } else {
	return $fs;
    }

    if ($fs =~ m/\(LEX\s+\)/) {
	$fs =~ s/\(LEX\s+\)/\(LEX \" \"\)/;
	return $fs;
    } elsif ($fs =~ m/\(LEX\s+([^\)]+)\)/i) {
	$lex = $1;
	if ($lex =~ m/ /) {
	    $fs =~ s/\(LEX\s+([^\)]+)\)/\(LEX \"$1\"\)/i;
	}
    } else {
	return $fs;
    }

    if (!defined($citepos{$lex})) { # Root not in lexicon
	return $fs;
    }

    if ($pos eq "ADJ") {
	$search = "A";
	if ($fs =~ m/\(TYPE\s+QUAL\)/i) {
	    $search .= "Q";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(DEGREE\s+APPRECIATIVE\)/i) {
	    $search .= "A";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

#	$search .= ".";

	if ($fs =~ m/\(FORM\s+PART\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}
	

    } elsif ($pos eq "ADV") {
	$search = "R";

	$search .= "....";

    } elsif ($pos eq "DET") { # Could map to two possible POSs in file
	$search = "T";
	if ($fs =~ m/\(TYPE\s+DEF\)/i) {
	    $search .= "D";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	$search .= ".";


	$search2 = "D";
	if ($fs =~ m/\(TYPE\s+DEM\)/i) {
	    $search2 .= "D";
	} elsif ($fs =~ m/\(TYPE\s+POSS\)/i) {
	    $search2 .= "P";
	} elsif ($fs =~ m/\(TYPE\s+INTERR\)/i) {
	    $search2 .= "T";
	} elsif ($fs =~ m/\(TYPE\s+EXCLAM\)/i) {
	    $search2 .= "E";
	} elsif ($fs =~ m/\(TYPE\s+INDEF\)/i) {
	    $search2 .= "I";
	} else {
	    $search2 .= ".";
	}

	if ($fs =~ m/\(PERSON\s+1\)/i) {
	    $search2 .= "1";
	} elsif ($fs =~ m/\(PERSON\s+2\)/i) {
	    $search2 .= "2";
	} elsif ($fs =~ m/\(PERSON\s+3\)/i) {
	    $search2 .= "3";
	} else {
	    $search2 .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search2 .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search2 .= "F";
	} else {
	    $search2 .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search2 .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search2 .= "P";
	} else {
	    $search2 .= ".";
	}

	$search2 .= ".";
	

    } elsif ($pos eq "N") {
	$search = "N";
	if ($fs =~ m/\(TYPE\s+COMMON\)/i) {
	    $search .= "C";
	} elsif ($fs =~ m/\(TYPE\s+PROPER\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} elsif ($fs =~ m/\(NUMBER\s+INV\)/i) {
	    $search .= "N";
	} else {
	    $search .= ".";
	}

	$search .= "...";


    # The choices in this 'elsif' are POS categories
    # in a transfer system, that all map to the
    # '$search' type 'V' in the Spanish generational
    # morphology (the macu-file)
    } elsif (($pos eq "V")       ||        # transfer system V maps to V in morphology
	     ($pos eq "AUX")     ||        # Auxiliary verbs, including possibly 'ser', 'estar', and 'haber'
	     ($pos eq "CAUS")    ||        # Causitive verbs, such as 'hacer'
	     ($pos eq "LIGHT-V") ||        # Spanish light verbs, such as 'tener' and 'hacer'
	     ($pos eq "CONSUMPTION-V")) {  # Consumption verbs include 'comer', 'beber', and 'tomar'
	                                   #   we need consumption verbs in the Mapudungun-Spanish
	                                   #   transfer system to translate verbalizations such as:
	                                   #   kofke-tu-n : I eat bread.
	$search = "V";
	if ($fs =~ m/\(TYPE\s+MAIN\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(TYPE\s+AUX\)/i) {
	    $search .= "A";
	} elsif ($fs =~ m/\(TYPE\s+SER\)/i) {
	    $search .= "S";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(MOOD\s+IND\)/i) {
	    $search .= "I";
	} elsif ($fs =~ m/\(MOOD\s+SUBJ\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(MOOD\s+IMPER\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(MOOD\s+COND\)/i) {
	    $search .= "C";
	} elsif ($fs =~ m/\(MOOD\s+INF\)/i) {
	    $search .= "N";
	} elsif ($fs =~ m/\(MOOD\s+GER\)/i) {
	    $search .= "G";
	} elsif ($fs =~ m/\(MOOD\s+PART\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(TENSE\s+PRES\)/i) {
	    $search .= "P";
	} elsif ($fs =~ m/\(TENSE\s+IMPERF\)/i) {
	    $search .= "I";
	} elsif ($fs =~ m/\(TENSE\s+FUT\)/i) {
	    $search .= "F";
	} elsif ($fs =~ m/\(TENSE\s+PAST\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(TENSE\s+COND\)/i) {
	    $search .= "C";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(PERSON\s+1\)/i) {
	    $search .= "1";
	} elsif ($fs =~ m/\(PERSON\s+2\)/i) {
	    $search .= "2";
	} elsif ($fs =~ m/\(PERSON\s+3\)/i) {
	    $search .= "3";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}


    } elsif ($pos eq "PRON") {
	$search = "P";
	if ($fs =~ m/\(TYPE\s+PERSONAL\)/i) {
	    $search .= "P";
	} elsif ($fs =~ m/\(TYPE\s+DEM\)/i) {
	    $search .= "D";
	} elsif ($fs =~ m/\(TYPE\s+POSS\)/i) {
	    $search .= "X";
	} elsif ($fs =~ m/\(TYPE\s+INDEF\)/i) {
	    $search .= "I";
	} elsif ($fs =~ m/\(TYPE\s+INTERR\)/i) {
	    $search .= "T";
	} elsif ($fs =~ m/\(TYPE\s+REL\)/i) {
	    $search .= "R";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(PERSON\s+1\)/i) {
	    $search .= "1";
	} elsif ($fs =~ m/\(PERSON\s+2\)/i) {
	    $search .= "2";
	} elsif ($fs =~ m/\(PERSON\s+3\)/i) {
	    $search .= "3";
	} else {
	    $search .= ".";
	}
	
	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(CASE\s+NOM\)/i) {
	    $search .= "N";
	} elsif ($fs =~ m/\(CASE\s+ACC\)/i) {
	    $search .= "A";
	} elsif ($fs =~ m/\(CASE\s+DAT\)/i) {
	    $search .= "D";
	} elsif ($fs =~ m/\(CASE\s+OBL\)/i) {
	    $search .= "O";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(POSSOR PERSON\s+1\)/i &&
	    $fs =~ m/\(POSSOR NUMBER\s+SG\)/i) {
	    $search .= "1";
	} elsif ($fs =~ m/\(POSSOR PERSON\s+2\)/i &&
		 $fs =~ m/\(POSSOR NUMBER\s+SG\)/i) {
	    $search .= "2";
	} elsif ($fs =~ m/\(POSSOR PERSON\s+3\)/i) {
	    $search .= "3";
	} elsif ($fs =~ m/\(POSSOR PERSON\s+1\)/i &&
		 $fs =~ m/\(POSSOR NUMBER\s+PL\)/i) {
	    $search .= "4";
	} elsif ($fs =~ m/\(POSSOR PERSON\s+2\)/i &&
		 $fs =~ m/\(POSSOR NUMBER\s+PL\)/i) {
	    $search .= "5";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(POLITE\s+\+\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}


    } elsif ($pos eq "CONJ") {
	$search = "C";
	if ($fs =~ m/\(TYPE\s+COORD\)/i) {
	    $search .= "C";
	} elsif ($fs =~ m/\(TYPE\s+SUBORD\)/i) {
	    $search .= "S";
	} else {
	    $search .= ".";
	}

	$search .= "..";

    } elsif ($pos eq "NUM") {
	$search = "M";

	if ($fs =~ m/\(TYPE\s+CARD\)/i) {
	    $search .= "C";
	} elsif ($fs =~ m/\(TYPE\s+ORD\)/i) {
	    $search .= "O";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} elsif ($fs =~ m/\(GENDER\s+FEM\)/i) {
	    $search .= "F";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(NUMBER\s+PL\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	$search .= ".";

	$search .= "."; # What to do with POS?

    } elsif ($pos eq "INTERJ") {
	$search = "I";

    } elsif ($pos eq "ABBR") {
	$search = "Y";
      

    } elsif ($pos eq "PREP") {
	$search = "S";
	$search .= ".";

	if ($fs =~ m/\(FORM\s+SIMPLE\)/i) {
	    $search .= "S";
	} elsif ($fs =~ m/\(FORM\s+CONTRACT\)/i) {
	    $search .= "P";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(GENDER\s+MASC\)/i) {
	    $search .= "M";
	} else {
	    $search .= ".";
	}

	if ($fs =~ m/\(NUMBER\s+SG\)/i) {
	    $search .= "S";
	} else {
	    $search .= ".";
	}

    } elsif ($pos eq "PUNCT") {
	$search = "F";
    } else {
	return $fs;
    }

    #print "Search string $search\n";
    $derivedfss = "";

    $filepos = $citepos{$lex};

    # Search for derived forms with matching feature/values
    open(SPAN, $spanishfile) or die $!;
    seek(SPAN, $filepos, 0);

    print "SEARCH $search\n";

    while ($line = <SPAN>) {
	$line =~ s/[\r\n]*$//;
	($citation, $features, $derived) = ($line =~ m/^(\S+)\#(\S+)\s+(.*)$/);
	if (ucLatin1($citation) ne ucLatin1($lex)) {
	    last;
	}
	if (($features =~ m/^$search$/) or ($search2 ne "" and $features =~ m/^$search2$/)) {
	    #print "$line\n";
	    @derivations = split(/\s+/, $derived);
	    foreach $derivation (@derivations) {
		#print " Derivation $derivation\n";
		$derivedfs = $fs;
		# Change LEX to derived form
		$derivedfs =~ s/\(LEX ([^\)]+)\)/"(LEX $derivation)"/ie;
		# Add features not in original fs
		for ($i = 1; $i < length($search); $i++) {
		    if (substr($search, $i, 1) eq ".") {
			$newfeature = substr($features, 0, 1) . ":" . ($i+1) . ":" . substr($features, $i, 1);
			#print " New feature $newfeature\n";
			if (defined($fvs{$newfeature})) {
			    $newfv = $fvs{$newfeature};
			    #print " FValue: $newfv\n";
			    $derivedfs =~ s/\)$/" $newfv\)"/e;
			}
		    }
		}
		#print "Derived fs: $derivedfs\n";

		$derivedfss .= $derivedfs . " ";
	    }
	}
    }
    close(SPAN);

    if ($derivedfss eq "") { return $fs; }
    
    return $derivedfss;
}