;; FIRST, preprocess: /usr0/aria/eng2spa/bin/input-xfer-preprocessing.pl ; ./input-xfer-preprocessing.pl < ../corpus/elicitation-EN.txt.rest >! ../corpus/elicitation-EN.txt.rest.clean ;; AND eliminate duplicates: cat elicitation-EN.txt.rest.clean | uniq > elicitation-EN.txt.rest.clean.uniq ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; latin1 ;to processed accented characters correctly normalizecase on ;topnode S ; default: SLword -> leave it for cases where TRword is same (proper names, num, etc,) ;unkpolicy [*copy*|*delete*|word] ; Set policy for handling unknown words openclass V N ; Specify parts of speech to use for an unknown word loadrules /usr0/aria/eng2spa/grammars/grammar3.trf ;loadrules /usr0/aria/eng2spa/grammars/grammar2.trf loadrules /usr0/aria/eng2spa/lexicons/lexicon3.trf ;loadrules /usr0/aria/eng2spa/lexicons/lexicon2.trf ; lexicon.trf contains verb/trf and noun.trf findall on includesource on showtrace src ; [off|top(node)|full(parse tree)|src(+alignments)] ;grammardebug on ;parsedebug on ;; to see what features get passed uncomment next two lines ;transferdebug on ;fsdebug on ;sortrules on ;transfile /usr0/aria/eng2spa/corpus/elicitation-EN.txt ;transfile /usr0/aria/eng2spa/corpus/EN-sentences.txt ;transfile /usr0/aria/eng2spa/corpus/EN-test-sentences.txt ;transfile /usr0/aria/eng2spa/corpus/test.txt ;transfile /usr0/aria/eng2spa/corpus/input-xfer ;transfile /usr0/aria/eng2spa/corpus/elicitation-EN.txt.rest.clean.uniq transfile /usr0/aria/eng2spa/corpus/input-simulation ;trans Gaudi was a great artist quit