package cmonson.morphologyChallengeUtilities;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class AnalyzeASpanishWordlistIntoAMorphoChallengeAnswerKeyUsingMacoAndFreeLingData {
	
	/*
	Read in Maco file                                 CHECK
	for each word form in the analyzed corpus
	match it against the maco file in two ways
		1) whole word match
		2) matching with a clitic on VALID verb forms, infinitive, particples, etc.
	Write out each analysis of each word form in Morpho Challenge format.
	*/
	
	/*
	 * A Word form may occur on/in more than one line of the Maco Lexicon file.
	 * This happens when a single word form can be analyzed both as a verb and
	 * as a noun, for example.
	 */
	private static class MacoWordFormData {
		String wordform;
		List<MacoAnalysis> macoAnalyses = new ArrayList<MacoAnalysis>();
		
		public void addMacoAnalysis(MacoAnalysis macoAnalysis) {
			macoAnalyses.add(macoAnalysis);
		}
		
		@Override
		public String toString() {
			String toReturn = "";
			toReturn += wordform;
			for (MacoAnalysis macoAnalysis : macoAnalyses) {
				toReturn += " " + macoAnalysis.toString();
			}
			return toReturn;
		}
	}
	
	private static class MacoAnalysis {
		String citationForm;
		String macoFeatureString;
	
		private MacoAnalysis(String citationForm, String macoFeatureString) {
			this.citationForm = citationForm;
			this.macoFeatureString = macoFeatureString;
		}
		
		@Override
		public String toString() {
			return citationForm + "#" + macoFeatureString;
		}
	}
	

	
	private static class MorphoChallengeAnswerKeyEntry {
		
		String wordForm;
		
		Set<MorphoChallengeAnalysis> morphoChallengeAnalyses =
			new HashSet<MorphoChallengeAnalysis>();

		private boolean isValid = true;

		private Map<String, List<String>> cliticGroups;
		
		public 
		MorphoChallengeAnswerKeyEntry(
				String wordForm, 
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			initClitics();
			
			this.wordForm = wordForm;
			
			calculateMorphoChallengeAnalyses(macoLexiconByWordForm);
		}

		
		private void initClitics() {
			cliticGroups = new HashMap<String, List<String>>();
			
			cliticGroups.put("lo",       new ArrayList<String>());
			cliticGroups.put("los",      new ArrayList<String>());
			cliticGroups.put("la",       new ArrayList<String>());
			cliticGroups.put("las",      new ArrayList<String>());
			cliticGroups.put("le",       new ArrayList<String>());
			cliticGroups.put("les",      new ArrayList<String>());
			cliticGroups.put("me",       new ArrayList<String>());
			cliticGroups.put("te",       new ArrayList<String>());
			cliticGroups.put("nos",      new ArrayList<String>());
			cliticGroups.put("os",       new ArrayList<String>());
			cliticGroups.put("se",       new ArrayList<String>());
			cliticGroups.put("mela",     new ArrayList<String>());
			cliticGroups.put("melas",    new ArrayList<String>());
			cliticGroups.put("melo",     new ArrayList<String>());
			cliticGroups.put("melos",    new ArrayList<String>());
			cliticGroups.put("tela",     new ArrayList<String>());
			cliticGroups.put("telas",    new ArrayList<String>());
			cliticGroups.put("telo",     new ArrayList<String>());
			cliticGroups.put("telos",    new ArrayList<String>());
			cliticGroups.put("nosla",    new ArrayList<String>());
			cliticGroups.put("noslas",   new ArrayList<String>());
			cliticGroups.put("noslo",    new ArrayList<String>());
			cliticGroups.put("noslos",   new ArrayList<String>());
			cliticGroups.put("osla",     new ArrayList<String>());
			cliticGroups.put("oslas",    new ArrayList<String>());
			cliticGroups.put("oslo",     new ArrayList<String>());
			cliticGroups.put("oslos",    new ArrayList<String>());
			cliticGroups.put("seme",     new ArrayList<String>());
			cliticGroups.put("sete",     new ArrayList<String>());
			cliticGroups.put("senos",    new ArrayList<String>());
			cliticGroups.put("seos",     new ArrayList<String>());
			cliticGroups.put("sela",     new ArrayList<String>());
			cliticGroups.put("selas",    new ArrayList<String>());
			cliticGroups.put("selo",     new ArrayList<String>());
			cliticGroups.put("selos",    new ArrayList<String>());
			cliticGroups.put("sele",     new ArrayList<String>());
			cliticGroups.put("seles",    new ArrayList<String>());
			cliticGroups.put("nosle",    new ArrayList<String>());
			cliticGroups.put("nosles",   new ArrayList<String>());
			cliticGroups.put("osle",     new ArrayList<String>());
			cliticGroups.put("osles",    new ArrayList<String>());
			cliticGroups.put("semela",   new ArrayList<String>());
			cliticGroups.put("semelas",  new ArrayList<String>());
			cliticGroups.put("semelo",   new ArrayList<String>());
			cliticGroups.put("semelos",  new ArrayList<String>());
			cliticGroups.put("setela",   new ArrayList<String>());
			cliticGroups.put("setelas",  new ArrayList<String>());
			cliticGroups.put("setelo",   new ArrayList<String>());
			cliticGroups.put("setelos",  new ArrayList<String>());
			cliticGroups.put("senosla",  new ArrayList<String>());
			cliticGroups.put("senoslas", new ArrayList<String>());
			cliticGroups.put("senoslo",  new ArrayList<String>());
			cliticGroups.put("senoslos", new ArrayList<String>());
			cliticGroups.put("seosla",   new ArrayList<String>());
			cliticGroups.put("seoslas",  new ArrayList<String>());
			cliticGroups.put("seoslo",   new ArrayList<String>());
			cliticGroups.put("seoslos",  new ArrayList<String>());
			
			cliticGroups.get("lo").       add("+clitic_lo");
			cliticGroups.get("los").      add("+clitic_los");
			cliticGroups.get("la").       add("+clitic_la");
			cliticGroups.get("las").      add("+clitic_las");
			cliticGroups.get("le").       add("+clitic_le");
			cliticGroups.get("les").      add("+clitic_les");
			cliticGroups.get("me").       add("+clitic_me");
			cliticGroups.get("te").       add("+clitic_te");
			cliticGroups.get("nos").      add("+clitic_nos");
			cliticGroups.get("os").       add("+clitic_os");
			cliticGroups.get("se").       add("+clitic_se");
			
			cliticGroups.get("mela").     add("+clitic_me");
			cliticGroups.get("mela").     add("+clitic_la");
			
			cliticGroups.get("melas").    add("+clitic_me");
			cliticGroups.get("melas").    add("+clitic_las");
			
			cliticGroups.get("melo").     add("+clitic_me");
			cliticGroups.get("melo").     add("+clitic_lo");
			
			cliticGroups.get("melos").    add("+clitic_me");
			cliticGroups.get("melos").    add("+clitic_los");
			
			cliticGroups.get("tela").     add("+clitic_te");
			cliticGroups.get("tela").     add("+clitic_la");
			
			cliticGroups.get("telas").    add("+clitic_te");
			cliticGroups.get("telas").    add("+clitic_las");
			
			cliticGroups.get("telo").     add("+clitic_te");
			cliticGroups.get("telo").     add("+clitic_lo");
			
			cliticGroups.get("telos").    add("+clitic_te");
			cliticGroups.get("telos").    add("+clitic_los");
			
			cliticGroups.get("nosla").    add("+clitic_nos");
			cliticGroups.get("nosla").    add("+clitic_la");
			
			cliticGroups.get("noslas").   add("+clitic_nos");
			cliticGroups.get("noslas").   add("+clitic_las");
			
			cliticGroups.get("noslo").    add("+clitic_nos");
			cliticGroups.get("noslo").    add("+clitic_lo");
			
			cliticGroups.get("noslos").   add("+clitic_nos");
			cliticGroups.get("noslos").   add("+clitic_los");
			
			cliticGroups.get("osla").     add("+clitic_os");
			cliticGroups.get("osla").     add("+clitic_la");
			
			cliticGroups.get("oslas").    add("+clitic_os");
			cliticGroups.get("oslas").    add("+clitic_las");
			
			cliticGroups.get("oslo").     add("+clitic_os");
			cliticGroups.get("oslo").     add("+clitic_lo");
			
			cliticGroups.get("oslos").    add("+clitic_os");
			cliticGroups.get("oslos").    add("+clitic_los");
			
			cliticGroups.get("seme").     add("+clitic_se");
			cliticGroups.get("seme").     add("+clitic_me");
			
			cliticGroups.get("sete").     add("+clitic_se");
			cliticGroups.get("sete").     add("+clitic_te");
			
			cliticGroups.get("senos").    add("+clitic_se");
			cliticGroups.get("senos").    add("+clitic_nos");
			
			cliticGroups.get("seos").     add("+clitic_se");
			cliticGroups.get("seos").     add("+clitic_os");
			
			cliticGroups.get("sela").     add("+clitic_se");
			cliticGroups.get("sela").     add("+clitic_la");
			
			cliticGroups.get("selas").    add("+clitic_se");
			cliticGroups.get("selas").    add("+clitic_las");
			
			cliticGroups.get("selo").     add("+clitic_se");
			cliticGroups.get("selo").     add("+clitic_lo");
			
			cliticGroups.get("selos").    add("+clitic_se");
			cliticGroups.get("selos").    add("+clitic_los");
			
			cliticGroups.get("sele").     add("+clitic_se");
			cliticGroups.get("sele").     add("+clitic_le");
			
			cliticGroups.get("seles").    add("+clitic_se");
			cliticGroups.get("seles").    add("+clitic_les");
			
			cliticGroups.get("nosle").    add("+clitic_nos");
			cliticGroups.get("nosle").    add("+clitic_le");
			
			cliticGroups.get("nosles").   add("+clitic_nos");
			cliticGroups.get("nosles").   add("+clitic_les");
			
			cliticGroups.get("osle").     add("+clitic_os");
			cliticGroups.get("osle").     add("+clitic_le");
			
			cliticGroups.get("osles").    add("+clitic_os");
			cliticGroups.get("osles").    add("+clitic_les");
			
			cliticGroups.get("semela").   add("+clitic_se");
			cliticGroups.get("semela").   add("+clitic_me");
			cliticGroups.get("semela").   add("+clitic_la");
			
			cliticGroups.get("semelas").  add("+clitic_se");
			cliticGroups.get("semelas").  add("+clitic_me");
			cliticGroups.get("semelas").  add("+clitic_las");
			
			cliticGroups.get("semelo").   add("+clitic_se");
			cliticGroups.get("semelo").   add("+clitic_me");
			cliticGroups.get("semelo").   add("+clitic_lo");
			
			cliticGroups.get("semelos").  add("+clitic_se");
			cliticGroups.get("semelos").  add("+clitic_me");
			cliticGroups.get("semelos").  add("+clitic_los");
			
			cliticGroups.get("setela").   add("+clitic_se");
			cliticGroups.get("setela").   add("+clitic_te");
			cliticGroups.get("setela").   add("+clitic_la");
			
			cliticGroups.get("setelas").  add("+clitic_se");
			cliticGroups.get("setelas").  add("+clitic_te");
			cliticGroups.get("setelas").  add("+clitic_las");
			
			cliticGroups.get("setelo").   add("+clitic_se");
			cliticGroups.get("setelo").   add("+clitic_te");
			cliticGroups.get("setelo").   add("+clitic_lo");
			
			cliticGroups.get("setelos").  add("+clitic_se");
			cliticGroups.get("setelos").  add("+clitic_te");
			cliticGroups.get("setelos").  add("+clitic_los");
			
			cliticGroups.get("senosla").  add("+clitic_se");
			cliticGroups.get("senosla").  add("+clitic_nos");
			cliticGroups.get("senosla").  add("+clitic_la");
			
			cliticGroups.get("senoslas"). add("+clitic_se");
			cliticGroups.get("senoslas"). add("+clitic_nos");
			cliticGroups.get("senoslas"). add("+clitic_las");
			
			cliticGroups.get("senoslo").  add("+clitic_se");
			cliticGroups.get("senoslo").  add("+clitic_nos");
			cliticGroups.get("senoslo").  add("+clitic_lo");
			
			cliticGroups.get("senoslos"). add("+clitic_se");
			cliticGroups.get("senoslos"). add("+clitic_nos");
			cliticGroups.get("senoslos"). add("+clitic_los");
			
			cliticGroups.get("seosla").   add("+clitic_se");
			cliticGroups.get("seosla").   add("+clitic_os");
			cliticGroups.get("seosla").   add("+clitic_la");
			
			cliticGroups.get("seoslas").  add("+clitic_se");
			cliticGroups.get("seoslas").  add("+clitic_os");
			cliticGroups.get("seoslas").  add("+clitic_las");
			
			cliticGroups.get("seoslo").   add("+clitic_se");
			cliticGroups.get("seoslo").   add("+clitic_os");
			cliticGroups.get("seoslo").   add("+clitic_lo");
			
			cliticGroups.get("seoslos").  add("+clitic_se");
			cliticGroups.get("seoslos").  add("+clitic_os");
			cliticGroups.get("seoslos").  add("+clitic_los");

		}


		private void 
		calculateMorphoChallengeAnalyses(
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			boolean completeMatchSucceeded =
				calculateCompleteMatchMorphoChallengeAnalyses(macoLexiconByWordForm);
			boolean cliticMatchSucceeded =
				calculateCliticMatchingMorphoChallengeAnalyses(macoLexiconByWordForm);
			
			if ( ( ! completeMatchSucceeded) && ( ! cliticMatchSucceeded)) {
				isValid  = false;
			}
		}

		private boolean 
		calculateCompleteMatchMorphoChallengeAnalyses(
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			// There is no complete match
			if ( ! macoLexiconByWordForm.containsKey(wordForm)) {
				return false;
			}
			
			MacoWordFormData macoWordFormData = macoLexiconByWordForm.get(wordForm);
			
			boolean successfullyBuiltAMorphoChallengeAnalysis = false;
			for (MacoAnalysis macoAnalysis : macoWordFormData.macoAnalyses) {
				
				MorphoChallengeAnalysis morphoChallengeAnalysis = 
					new MorphoChallengeAnalysis();
				
				boolean successfullyIncorporated =
					morphoChallengeAnalysis.incorporate(macoAnalysis, macoLexiconByWordForm);
				
				if (successfullyIncorporated) {
					successfullyBuiltAMorphoChallengeAnalysis = true;
					morphoChallengeAnalyses.add(morphoChallengeAnalysis);
				}
			}
			
			if (successfullyBuiltAMorphoChallengeAnalysis) {
				return true;
			}
			
			System.err.println();
			System.err.println("Odd... failed to successfully build a Morpho Challenge Analysis");
			System.err.println("  for the word form: " + wordForm);
			return false;
		}

		private boolean 
		calculateCliticMatchingMorphoChallengeAnalyses(
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			boolean successfullyBuiltAMorphoChallengeAnalysis = false;
			
			for (String cliticGroup : cliticGroups.keySet()) {
				Pattern cliticGroupPattern = Pattern.compile("^(.*)" + cliticGroup + "$"); 
				Matcher cliticGroupMatcher = cliticGroupPattern.matcher(wordForm);
				boolean matched = cliticGroupMatcher.matches();
				if (matched) {
					String stem = cliticGroupMatcher.group(1);
					
					// From here it is very similar to 
					//calculateCompleteMatchMorhpoChallengeAnalyses()
					if ( ! macoLexiconByWordForm.containsKey(stem)) {
						
						// TODO: this deacentizing of spanish after clitic stripping
						// could probably be improved but, this simple trick works quite
						// well for spanish. I would estimate at least 95% of the time,
						// probably more.
						stem = stem.replaceAll("á", "a");
						stem = stem.replaceAll("é", "e");
						stem = stem.replaceAll("í", "i");
						
						if ( ! macoLexiconByWordForm.containsKey(stem)) {
							continue;
							
						} else {
							System.err.println("After stripping accents, we found a clitic match for: " + stem + " from: " + wordForm);
							System.err.println();
						}
					}
					
					MacoWordFormData macoWordFormDataOfStem = macoLexiconByWordForm.get(stem);
					
					for (MacoAnalysis macoAnalysisOfStem : macoWordFormDataOfStem.macoAnalyses) {
						
						if ( ! stemAllowsCliticAttachment(macoAnalysisOfStem)) {
							
							// Print out cases of unexpected clitic matches, to manually 
							// make sure I allow clitic attachment everywhere I should
							//
							// Only print out cases where the stem is analyzed as a verb AND
							//
							// V.I.... : clitics can't attach to indicative forms
							// V.S.... : clitics can't attach to subjunctive forms
							// V.M.... : clitics can't attach to imperative forms
							// V.P.... : clitics can't attach to participle forms.
							//
							if (    macoAnalysisOfStem.macoFeatureString.matches("V......")  &&
							    ( ! macoAnalysisOfStem.macoFeatureString.matches("V.I....")) &&
							    ( ! macoAnalysisOfStem.macoFeatureString.matches("V.S....")) &&
							    ( ! macoAnalysisOfStem.macoFeatureString.matches("V.M....")) &&
							    ( ! macoAnalysisOfStem.macoFeatureString.matches("V.P...."))) {
								System.err.println();
								System.err.println(
										wordForm + " matched the clitic group: " + cliticGroup);
								System.err.println(
										" but the stem: " + stem + " analyzed as: " + 
										macoAnalysisOfStem.toString());
								System.err.println(
								" does not permit clitic attachment");
								System.err.println();
							}
							
							continue;
						}
						
						MorphoChallengeAnalysis morphoChallengeAnalysis =
							new MorphoChallengeAnalysis();
						
						boolean successfullyIncorporated =
							morphoChallengeAnalysis.incorporate(
									macoAnalysisOfStem, 
									macoLexiconByWordForm);
						
						morphoChallengeAnalysis.addFeatures(cliticGroups.get(cliticGroup));
						
						if (successfullyIncorporated) {
							successfullyBuiltAMorphoChallengeAnalysis = true;
							morphoChallengeAnalyses.add(morphoChallengeAnalysis);
						}
					}
				}

			}
			
			if (successfullyBuiltAMorphoChallengeAnalysis) {
				return true;
			}
			
			return false;
		}
	
		private boolean stemAllowsCliticAttachment(MacoAnalysis macoAnalysisOfStem) {
			if (macoAnalysisOfStem.macoFeatureString.matches("V.N....") ||
				macoAnalysisOfStem.macoFeatureString.matches("V.G....")) {
				return true;
			}
			return false;
		}


		public String getMorphoChallengeAnalyses_morphemeLike() {
			String toReturn = "";
			
			toReturn += wordForm + "\t";
			
			boolean first = true;
			for (MorphoChallengeAnalysis morphoChallengeAnalysis : morphoChallengeAnalyses) {
				
				if (first) {
					first = false;
				} else {
					toReturn += ", ";
				}
				
				toReturn += morphoChallengeAnalysis.getMorphemeLikeString();
			}
			
			return toReturn;
		}
		
		public String getMorphoChallengeAnalyses_featureLike() {
			String toReturn = "";
			
			toReturn += wordForm + "\t";
			
			boolean first = true;
			for (MorphoChallengeAnalysis morphoChallengeAnalysis : morphoChallengeAnalyses) {
				
				if (first) {
					first = false;
				} else {
					toReturn += ", ";
				}
				
				toReturn += morphoChallengeAnalysis.getFeatureLikeString();
			}
			
			return toReturn;
		}	
		
		@Override
		public String toString() {
			String toReturn = "";
			toReturn += getMorphoChallengeAnalyses_morphemeLike();
			toReturn += String.format("%n");
			toReturn += getMorphoChallengeAnalyses_featureLike();
			return toReturn;
		}
	}
	
	private static class MorphoChallengeAnalysis {
		
		// A central repository for the official feature names that are output to
		// the morpho challenge style answer key. These constants are particularly
		// important when more than one MACO feature string encodes the same
		// morphosyntactic feature--especially when that morphosyntactic feature is
		// always encoded by the same morpheme, such as 's', 'a', and 'o' which 
		// consistently mark plural, feminine, and masculine respectively accross
		// various MACO parts of speech (adjective, determiner, etc.)
		//
		private static final String SINGULAR      = "+sg";
		private static final String PLURAL        = "+pl";
		
		private static final String FIRST_PERSON  = "+1st";
		private static final String SECOND_PERSON = "+2nd";
		private static final String THIRD_PERSON  = "+3rd";
		
		private static final String MASCULINE     = "+masc";
		private static final String FEMININE      = "+fem";
		
		private static final String INDICATIVE    = "+indic";
		private static final String SUBJUNCTIVE   = "+subjunc";
		private static final String IMPERATIVE    = "+imperative";
		
		private static final String INFINITIVE    = "+inf";
		private static final String GERUND        = "+gerund";
		private static final String PARTICIPLE    = "+part";
		
		private static final String PRESENT       = "+pres";
		private static final String IMPERFECT     = "+imperf";
		private static final String FUTURE        = "+future";
		private static final String PAST          = "+past";
		private static final String CONDITIONAL   = "+cond";
		
		
		// a morpho challenge 'feature' is any string, that string
		// could be a stem, it could be a literal morpheme, it could
		// be an abstract string representation of one or more
		// morphosyntactic features.
		//
		// '...morphemeLike' analyses put all morphosyntactic features that are 
		// marked by a single morpheme as one 'feature' in any particular 
		// morpho-challenge style analysis of this MacoEntry.
		//
		// '...featureLike' analyses put each separate morphosyntactic feature as
		// a separate feature in any particular morpho-challenge style analysis of this
		// MacoEntry.
		//
		// For example the word 'canto' which is listed in the Maco-lexicon file as, 
		// 'cantar#VMIP1S0 canto', might get the morphemeLike and 
		// featureLike morpho-challenge style analyses of:
		//
		// morphemeLike: canto	cantar +1sgPresIndic
		// featureLike:  canto	cartar +1 +sg +Pres +Indic
		//
		// The point of this is that we might not expect our morphological
		// analyzer to identify 4 separate features for the single morpheme 'o'.
		// But if we use the featureLike analysis, then a segmentation algorithm will 
		// be peanalized when 'canto' and 'hablamos' arn't analyzed as sharing the
		// +1, +Pres, and +Indic features.
		// 
		List<String> features_morphemeLike = new ArrayList<String>();
		List<String> features_featureLike  = new ArrayList<String>();
		
		public MorphoChallengeAnalysis() { }
		
		public void addFeatures(List<String> features) {
			features_morphemeLike.addAll(features);
			features_featureLike.addAll(features);
		}

		public boolean 
		incorporate(
				MacoAnalysis macoAnalysis, 
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			// Nearly all Adjectives that are marked as participles also appear in
			// the maco file as verbs marked as participles.  I think the only
			// difference between the adjective participles and the verbal
			// participles is semantic or maybe syntactic, but certainly not
			// morphological, so we don't need have duplicate analyses in
			// the morpho challenge answer key. 
			//
			// But some participles don't have a corresponding verb form. Who
			// am I to say the maco file is bad. I'm going to leave in the
			// adjectives.
			//
			//if (macoAnalysis.macoFeatureString.matches("A....P")) {
			//	return false;
			//}
			
			incorporateCitationForm(macoAnalysis, macoLexiconByWordForm);

			
			if (macoAnalysis.macoFeatureString.matches("A.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnAdjective(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("R.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnAdverb(
							macoAnalysis.macoFeatureString);

			} else if (macoAnalysis.macoFeatureString.matches("D.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnDeterminer(
							macoAnalysis.macoFeatureString);

			} else if (macoAnalysis.macoFeatureString.matches("N.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnNoun(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("V.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnVerb(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("P.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnPronoun(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("C.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnConjunction(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("I.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnInterjection(
							macoAnalysis.macoFeatureString);
				
			} else if (macoAnalysis.macoFeatureString.matches("S.*")) {
				 
					incorporateMorphoSyntacticFeaturesMarkedOnPreposition(
							macoAnalysis.macoFeatureString);
					
			} else if (macoAnalysis.macoFeatureString.matches("Y.*")) {
				// do nothing. The feature 'Y' isn't even documented but it occurs in
				// the maco lexicon. Just ignore these entries, they look like computer
				// codes or maybe acronyms or something.
				
			} else {
				System.err.println(" OOPS. We matched some strange thing in the maco lexicon:");
				System.err.println();
				System.err.println(macoAnalysis.toString());
				System.err.println();
				System.err.println();
			}
			
			return true;
		}

		private void 
		incorporateCitationForm(
				MacoAnalysis macoAnalysis, 
				Map<String, MacoWordFormData> macoLexiconByWordForm) {
			
			features_morphemeLike.add(macoAnalysis.citationForm);
			features_featureLike.add(macoAnalysis.citationForm);

			/*
			 * All the below was done under the impression that participles
			 * were never analyzed as verbs in the maco lexicon. But this is wrong.
			 * Participles generally are analyzed as verbs and sometimes also as
			 * adjectives, presumably if there is an adjective with a separate
			 * semantics or some such
			 *
			 *
			 
			// Adjective slot 6: This is the trickiest feature in the entire Maco
			//                       Universe. If this feature is a 'P' then this 
			//                       'adjective' is actually a verbal participle.  And
			//                       for my answer key I want the citation form to be
			//                       the corresponding verb -- NOT the adjective!!
			//                       because participles are completely productive and
			//                       it is generally just traditional Spanish grammars
			//                       which classify participles as adjectives and not
			//                       verbs.  Changing the citation form is handled in
			//                       incorporateCitationForm(), here we just add a
			//                       feature to mark all participles. NOTE: We DO still
			//                       add separate gender and number features to the 
			//                       morphemeLike features for participles because a
			//                       suffix like: 'adas' is really three morphemes:
			//                       'ad' + 'a' + 's', with one feature per morpheme.
			
			String citationForm = null;
			
			// Participles in the Maco file are marked as Adjectives and the
			// citation form listed in the Maco file is the masculine singular
			// participle. While it depends on who you ask, a good case can
			// be made for considering participles to be verbal inflections.
			// 
			// This is a design choice. Here is an informal analysis of the
			// kinds of mistakes we would make with the different answer keys
			//
			//            TRUTH
			// ---------------------------------
			//          ADJ     |   VERB
			// -----------------+---------------
			// D  A             | miss the +Participle
			// E  D             | feature, miss that participles
			// C  J             | share a stem with verb forms
			// I ---------------+---------------
			// S  V  over       |
			// I  E  generalize |
			// O  E  verb stem  |
			// N  B             |
			//
			
			// Do all non-participles first
			if ( ! macoAnalysis.macoFeatureString.matches("^A....P$")) {
				citationForm = macoAnalysis.citationForm;
				
			} else {
				
				// Participles are either ar verb participles which end en 'ado'
				// or else they are 'er' or 'ir' verb participles which end in 'ido'.
				if (macoAnalysis.citationForm.matches("^.*ado$")) {
					
					String verbCitationForm = 
						macoAnalysis.citationForm.replaceAll("ado$", "ar");
					
					if ( ! macoLexiconByWordForm.containsKey(verbCitationForm)) {
						System.err.println();
						System.err.println("WHOOPS! '" + macoAnalysis.citationForm + "' is " +
										   "the citation form of some entry in the");
						System.err.println("  Maco Spanish Lexicon that is marked as being" +
										   "the participle of some verb, but the corresponding");
						System.err.println("  verb '" + verbCitationForm + "' is NOT in" +
										   "the Maco Lexicon!!");
						System.err.println();
						System.exit(0);
					}
					citationForm = verbCitationForm;
					
				} else {
					if ( ! macoAnalysis.citationForm.matches("^.*ido$")) {
						System.err.println();
						System.err.println(" ?!?!?  Non-ado Non-ido participle: " +
										   macoAnalysis.citationForm + "  ?!?!?");
						System.err.println();
						System.exit(0);
					}
					
					String verbCitationForm = 
						macoAnalysis.citationForm.replaceAll("ido$", "er");
					
					if (macoLexiconByWordForm.containsKey(verbCitationForm)) {
						citationForm = verbCitationForm;
						
					} else {
						verbCitationForm =
							macoAnalysis.citationForm.replaceAll("ido$", "ir");
						
						if ( ! macoLexiconByWordForm.containsKey(verbCitationForm)) {
							System.err.println();
							System.err.println("WHOOPS! '" + macoAnalysis.citationForm + "' is " +
							                   "the citation form of some entry in the");
							System.err.println("  Maco Spanish Lexicon that is marked as being" +
							                   "the participle of some verb, but the corresponding");
							System.err.println("  verb '" + verbCitationForm + "' is NOT in" +
							                   "the Maco Lexicon (nor is the 'er' verb)!!");
							System.err.println();
							System.exit(0);
						}
						citationForm = verbCitationForm;
					}
					
				}
			}
			
			features_morphemeLike.add(citationForm);
			features_featureLike.add(citationForm);
			*/
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnAdjective(String macoFeatureString) {
			// Six slots
			//
			// Slot 1: Category: A for Adjective
			//
			// Slot 2: Type: Ignore -- Marks ordinal adjectives (like 'third', 'twentyfifth', etc.)
			//
			// Slot 3: Grade: Ignore -- 'Grado', Never actually used in my Maco file
			//
			// Slot 4: Gender -- When Masculine and Feminine gender are explicitly marked
			//                   in Spanish by morphemes, they require a feature
			//                   in the Morpho Challenge answer key.
			//
			if (macoFeatureString.matches("A..M..")) {
				features_morphemeLike.add(MASCULINE);
				features_featureLike.add(MASCULINE);
			}
			if (macoFeatureString.matches("A..F..")) {
				features_morphemeLike.add(FEMININE);
				features_featureLike.add(FEMININE);
			}
			//
			// Slot 5: Number -- When Plural is explicitly marked in Spanish on adjectives
			//                   plurality requires a feature in the Morpho Challenge answer
			//                   key.
			//
			if (macoFeatureString.matches("A...P.")) {
				features_morphemeLike.add(PLURAL);
				features_featureLike.add(PLURAL);
			}
			//
			// Slot 6: Function: Participle -- Nearly all Adjectives that are marked as 
			// participles also appear in the maco file as verbs marked as participles.  
			// I think the only difference between the adjective participles and the verbal
			// participles is semantic or maybe syntactic, but certainly not
			// morphological. Still there are some participles with no corresponding verb
			// form like ilimidado 'not limited' but there is no verb form *ilimidar only
			// limitar exists.
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnAdverb(String macoFeatureString) {
			// beyond the citation form there are no features marked on 
			// Spanish adverbs.
			//
			// Two slots
			//
			// Slot 1: Category: R for Adjective
			//
			// Slot 2: Type: General or Negative. This is not a marked feature
			//         but an inherent feature of particular adverbs.
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnDeterminer(String macoFeatureString) {
			// Six slots
			//
			// Slot 1: Category: D for Determiner
			//
			// Slot 2: Type: Ignore -- signals various unmarked inherent features of particular
			//                         determiners, specifically, demonstrative, possessive,
			//                         interrogative, exclamative, indefinite, article, and
			//                         numeral.
			//
			// Slot 3: Person: Used to mark the person feature of possessive 
			//                 determiners. Person is inherent in the unique stem of the
			//                 possessive determiner and is not marked by a separate morpheme
			//                 hence we ignore this feature
			//
			// Slot 4: Gender -- Many determiners carry gender marked with a morpheme
			if (macoFeatureString.matches("D..M..")) {
				features_morphemeLike.add(MASCULINE);
				features_featureLike.add(MASCULINE);
			}
			if (macoFeatureString.matches("D..F..")) {
				features_morphemeLike.add(FEMININE);
				features_featureLike.add(FEMININE);
			}
			// Slot 5: Number -- Many determiners carry number marked with a morpheme
			//                   Singular is unmarked, so it does not get placed in
			//                   the morpho challenge answer key.
			if (macoFeatureString.matches("D...P.")) {
				features_morphemeLike.add(PLURAL);
				features_featureLike.add(PLURAL);
			}
			// Slot 6: Possessor Number -- Similar to slot 3, used to mark the number
			//                             feature on possessive determiners.  But
			//                             possessor number is inherent to the stem
			//                             and not marked with a separate morpheme
			//                             and so we ignore this for the the morpho
			//                             challenge answer key.
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnNoun(String macoFeatureString) {
			// Seven slots
			//
			// Slot 1: Category: N for Noun
			//
			// Slot 2: Type: Ignore -- Common or Proper. There are no proper nouns in the
			//                         maco file anyway
			//
			// Slot 3: Gender: Ignore -- Gender on nouns is inherent and not marked with
			//                 a separate morpheme
			//
			// Slot 4: Number: Nouns mark plural with an explicit morpheme
			if (macoFeatureString.matches("N..P...")) {
				features_morphemeLike.add(PLURAL);
				features_featureLike.add(PLURAL);
			}
			//
			// Slot 5,6: Semantic Classification: Ignore
			//
			// Slot 7: Grade: Appriciative: Ignore. Never even used in the maco lexicon file
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnVerb(String macoFeatureString) {
			// Seven slots
			//
			// Slot 1: Category: V for Verb
			//
			// Slot 2: Type: Principle, Auxiliary, or Semiauxiliary. This is an inherent
			//               feature of the stem, not a morphological feature, hence
			//               ignore.
			//
			// Slot 3: Mood: Here things get interesting. If the mood is Indicative (I),
			//               Subjunctive (S), or Imperative (M), then the morphemeLike
			//               morpho-challenge feature for this verb form will have
			//               more than one morphosyntactic feature in it. If the mood
			//               is Infinitive (N), Gerund (G), or Participle (P), then
			//               we don't need to build a compound morphemeLike feature.
			//               The infinitive and gerund forms will end up only having
			//               a single inflectional feature marked, but the participle
			//               form will still have several separate features, one for
			//               +part, one for gender, and if this is a plural form, one
			//               marking plural.
			boolean buildMorphemeLikeFeatureString = false;
			if (macoFeatureString.matches("V.I....") ||
				macoFeatureString.matches("V.S....") ||
				macoFeatureString.matches("V.M....")) {
				buildMorphemeLikeFeatureString = true;
			}
			
			String morphemeLikeFeatureString = "";
			
			if (macoFeatureString.matches("V.I....")) {
				morphemeLikeFeatureString += INDICATIVE;
				features_featureLike.add(INDICATIVE);
			}
			if (macoFeatureString.matches("V.S....")) {
				morphemeLikeFeatureString += SUBJUNCTIVE;
				features_featureLike.add(SUBJUNCTIVE);
			}
			if (macoFeatureString.matches("V.M....")) {
				morphemeLikeFeatureString += IMPERATIVE;
				features_featureLike.add(IMPERATIVE);
			}
			
			if (macoFeatureString.matches("V.N....")) {
				features_morphemeLike.add(INFINITIVE);
				features_featureLike.add(INFINITIVE);
			}
			if (macoFeatureString.matches("V.G....")) {
				features_morphemeLike.add(GERUND);
				features_featureLike.add(GERUND);
			}
			if (macoFeatureString.matches("V.P....")) {
				features_morphemeLike.add(PARTICIPLE);
				features_featureLike.add(PARTICIPLE);
			}
			// Slot 4: Tense: one of Present (P), Imperfect (I), Future (F), Past (S),
			//                or Conditional (C). None of these features are marked
			//                on infinitives, gerunds, or participles, so these 'tense'
			//                features should always be part of a compound feature
			//                for the morphemeLikeFeatures. Or to say the same thing
			//                in a different way, these features are always part of a
			//                fusional morpheme.
			if (macoFeatureString.matches("V..P...")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = PRESENT.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (present)");
				}
				features_featureLike.add(PRESENT);
			}
			if (macoFeatureString.matches("V..I...")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = IMPERFECT.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (imperfect)");
				}
				features_featureLike.add(IMPERFECT);
			}
			if (macoFeatureString.matches("V..F...")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = FUTURE.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (future)");
				}
				features_featureLike.add(FUTURE);
			}
			if (macoFeatureString.matches("V..S...")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = PAST.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (past)");
				}
				features_featureLike.add(PAST);
			}
			if (macoFeatureString.matches("V..C...")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = CONDITIONAL.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (conditional)");
				}
				features_featureLike.add(CONDITIONAL);
			}
			// Slot 5: Person: person agrees with the subject and is marked in a
			//                 fusional inflection morpheme. Person is not marked on
			//                 infinitives, gerunds, or participles, so these
			//                 person features should always be part of a compound
			//                 feature for morphemeLike features.
			if (macoFeatureString.matches("V...1..")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = FIRST_PERSON.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (first person)");
				}
				features_featureLike.add(FIRST_PERSON);
			}
			if (macoFeatureString.matches("V...2..")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = SECOND_PERSON.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (second person)");
				}
				features_featureLike.add(SECOND_PERSON);
			}
			if (macoFeatureString.matches("V...3..")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = THIRD_PERSON.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					System.err.println("Should never get here!! (third person)");
				}
				features_featureLike.add(THIRD_PERSON);
			}
			// Slot 6: Number: for main-form (non-participle) verbs, number agrees 
			//                 with the subject and is marked in a fusional inflectional
			//                 morpheme that should be part of a compound feature for
			//                 morphemeLike features. But for participles, plural number is
			//                 marked with a separate morpheme while singular is not marked.
			if (macoFeatureString.matches("V....S.")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = SINGULAR.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					// do nothing, singular is not explicitly marked for participles
				}
				
				// again, singular is not explicitly marked for participles
				if ( ! macoFeatureString.matches("V.P....")) {  
					features_featureLike.add(SINGULAR);
				}
			}
			if (macoFeatureString.matches("V....P.")) {
				if (buildMorphemeLikeFeatureString) {
					String feature = PLURAL.replaceAll("\\+", "_");
					morphemeLikeFeatureString += feature;
				} else {
					features_morphemeLike.add(PLURAL); // explicitly add a separate plural feature
				}
				features_featureLike.add(PLURAL);
			}
			// Slot 7: Gender: Only marked for participles.
			if (macoFeatureString.matches("V.....M")) {
				if (buildMorphemeLikeFeatureString) {
					System.err.println("Should never get here!! (masculine)");
				} else {
					features_morphemeLike.add(MASCULINE);
				}
				features_featureLike.add(MASCULINE);
			}
			if (macoFeatureString.matches("V.....F")) {
				if (buildMorphemeLikeFeatureString) {
					System.err.println("Should never get here!! (feminine)");
				} else {
					features_morphemeLike.add(FEMININE);
				}
				features_featureLike.add(FEMININE);
			}
			
			if (buildMorphemeLikeFeatureString) {
				features_morphemeLike.add(morphemeLikeFeatureString);
			}
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnPronoun(String macoFeatureString) {
			// Pronouns are very idiosyncratic. Still some pronouns fairly consistently
			// mark gender and number. For consistency with the other parts of speech
			// I will include morpho challenge features for gender and number accross
			// all types of Pronoun.  Other features are inherent features of the stem
			// and not morphologically marked, hence I will not add morpho challenge 
			// answer key features for any maco lexicon features besides gender and number.
			//
			// Eight slots
			//
			// Slot 1: Category P for Pronoun
			//
			// Slot 2: Type: Personal, Demonstrative, Possessive, Indefinite, Interrogative,
			//               Relative, Numeral, Exclamative
			//
			// Slot 3: Person: 1, 2, 3
			//
			// Slot 4: Gender: Masculine and Feminine
			if (macoFeatureString.matches("P..M....")) {
				features_morphemeLike.add(MASCULINE);
				features_featureLike.add(MASCULINE);
			}
			if (macoFeatureString.matches("P..F....")) {
				features_morphemeLike.add(FEMININE);
				features_featureLike.add(FEMININE);
			}
			// Slot 5: Number -- Plural is marked
			if (macoFeatureString.matches("P...P...")) {
				features_morphemeLike.add(PLURAL);
				features_featureLike.add(PLURAL);
			}
			// Slot 6: Case: Nominative, Accusative, Dative, Oblique
			//
			// Slot 7: Possessor Number: Singular or Plural
			// 
			// Slot 8: Politeness
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnConjunction(String macoFeatureString) {
			// Conjunctions are not morphologically marked in Spanish
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnInterjection(String macoFeatureString) {
			// Interjections are not morphologically marked in Spanish
		}

		private void 
		incorporateMorphoSyntacticFeaturesMarkedOnPreposition(String macoFeatureString) {
			// Prepositons are not morphologically marked in Spanish, there are
			// some obligatory contractions for masculine singular preposition objects
			// but these do not require any markup.
		}

		public String getMorphemeLikeString() {
			String morphoChallengeAnalysisString = "";
			
			boolean first = true;
			for (String feature : features_morphemeLike) {
				if (first) {
					first = false;
				} else {
					morphoChallengeAnalysisString += " ";
				}
				
				morphoChallengeAnalysisString += feature;
			}
			return morphoChallengeAnalysisString;
		}

		public String getFeatureLikeString() {
			String morphoChallengeAnalysisString = "";
			
			boolean first = true;
			for (String feature : features_featureLike) {
				if (first) {
					first = false;
				} else {
					morphoChallengeAnalysisString += " ";
				}
				
				morphoChallengeAnalysisString += feature;
			}
			return morphoChallengeAnalysisString;
		}
		
		@Override
		public String toString() {
			String toReturn = "";
			toReturn += getMorphemeLikeString();
			toReturn += String.format("%n");
			toReturn += getFeatureLikeString();
			return toReturn;
		}
		
		@Override
		public int hashCode() {
			return features_morphemeLike.hashCode() * features_featureLike.hashCode();
		}
		
		@Override
		public boolean equals(Object o) {
			if ( ! (o instanceof MorphoChallengeAnalysis)) {
				return false;
			}
			MorphoChallengeAnalysis that = (MorphoChallengeAnalysis)o;
			
			if ( ! this.features_morphemeLike.containsAll(that.features_morphemeLike)) {
				return false;
			}
			if ( ! that.features_morphemeLike.containsAll(this.features_morphemeLike)) {
				return false;
			}
			
			if ( ! this.features_featureLike.containsAll(that.features_featureLike)) {
				return false;
			}
			if ( ! that.features_featureLike.containsAll(this.features_featureLike)) {
				return false;
			}
			
			return true;
		}
	}
	
	
	private BufferedReader spanishWordlistToAnalyze_BufferedReader;
	private PrintWriter morphoChallengeAnswerKey_morphemeLike_PrintWriter;
	private PrintWriter morphoChallengeAnswerKey_featureLike_PrintWriter;
	private BufferedReader macoLexiconBufferedReader;
	
	
	private Map<String, MacoWordFormData> macoLexiconByWordForm = 
		new TreeMap<String, MacoWordFormData>();
	
	private Map<String, MorphoChallengeAnswerKeyEntry> 
		morphoChallengeAnswerKeyEntriesByWordForm =
			new TreeMap<String, MorphoChallengeAnswerKeyEntry>();
	
	
	public 
	AnalyzeASpanishWordlistIntoAMorphoChallengeAnswerKeyUsingMacoAndFreeLingData(
			File spanishWordlistFileToAnalyze, 
			String prefixToMorphoChallengeAnswerKeyFileNames,
			File macoLexiconFile) throws IOException {
		
		spanishWordlistToAnalyze_BufferedReader =
			openFileForReading(spanishWordlistFileToAnalyze);
		
		File morphoChallengeAnswerKeyFile = 
			new File(prefixToMorphoChallengeAnswerKeyFileNames + "-morphemeLike.txt");
		morphoChallengeAnswerKey_morphemeLike_PrintWriter = 
			openFileForWriting(morphoChallengeAnswerKeyFile, "ISO-8859-1"); // latin-1
		
		morphoChallengeAnswerKeyFile = 
			new File(prefixToMorphoChallengeAnswerKeyFileNames + "-featureLike.txt");
		morphoChallengeAnswerKey_featureLike_PrintWriter = 
			openFileForWriting(morphoChallengeAnswerKeyFile, "ISO-8859-1");  // latin-1
		
		macoLexiconBufferedReader =
			openFileForReading(macoLexiconFile);
		
		readMacoLexicon();
	}

	private void readMacoLexicon() throws IOException {
	
		System.err.println();
		System.err.println("Reading the Spanish Maco Lexicon...");
		System.err.println();
		
		int lineCounter = 0;
		String lineFromMacoLexicon;
		while ((lineFromMacoLexicon = macoLexiconBufferedReader.readLine()) != null) {
			
			// skip blank lines
			if (lineFromMacoLexicon.matches("^\\s*$")) {
				continue;
			}
			
			lineCounter++;
			if ((lineCounter%100000) == 0) {
				System.err.println("  " + lineCounter + " " + lineFromMacoLexicon);
				System.err.flush();
			}
			
			// Each lines starts with a citation form before a '#' character
			Pattern citationFormPattern = Pattern.compile("^\\s*([^#]+)(.*)$"); 
			Matcher citationFormMatcher = 
				citationFormPattern.matcher(lineFromMacoLexicon);
			boolean matches = citationFormMatcher.matches();
		
			if ( ! matches) {
				System.err.println("The Maco Lexicon file is bad, no citation form found");
				System.err.println("  EXITING...");
				System.err.println();
				return;
			}
			String citationForm = citationFormMatcher.group(1);
			String rest = citationFormMatcher.group(2);
			
			// immediately after the '#' character is the Maco Feature String
			// followed by a space separated list of word forms of that citation
			// form and Maco Feature String
			Pattern macoFeatureStringPattern = Pattern.compile("^#(\\S+)(.*)$"); 
			Matcher macoFeatureStringMatcher = macoFeatureStringPattern.matcher(rest);
			matches = macoFeatureStringMatcher.matches();
		
			if ( ! matches) {
				System.err.println("The Maco Lexicon file is bad, no feature string found");
				System.err.println("  EXITING...");
				System.err.println();
				return;
			}
			String macoFeatureString = macoFeatureStringMatcher.group(1);
			rest = macoFeatureStringMatcher.group(2);

			
			// Get each word form with this citation form and this Maco feature string
			// There may be more than one inflected form with the same Maco feature
			// string. In Spanish this occurs for verbs with the future subjunctive--
			// there are two equally valid inflections for future subjunctive, one is
			// somewhat archaic and used in writing and the other is more common.
			
			// each analysis in a FreeLing morphologically analyzed file is three
			// space separated fields long. The fields are:
			//
			// 1) Citation form
			// 2) a features String
			// 3) the probability of this analysis
			Pattern analysisPattern = Pattern.compile("(\\S+)");
			Matcher analysisMatcher = analysisPattern.matcher(rest);
			
			boolean continueFindingWordForms = true;
			while (continueFindingWordForms) {
				
				boolean foundAWordForm = analysisMatcher.find(); // finds the first match
				
				if (foundAWordForm) {
				
					String wordForm = analysisMatcher.group(1);
					
					MacoAnalysis macoAnalysis =	
						new MacoAnalysis(citationForm, macoFeatureString);
					
					if ( ! macoLexiconByWordForm.containsKey(wordForm)) {
						macoLexiconByWordForm.put(wordForm, new MacoWordFormData());
					}
					MacoWordFormData macoWordFormData = macoLexiconByWordForm.get(wordForm);
					
					macoWordFormData.addMacoAnalysis(macoAnalysis);
					
				} else {
					continueFindingWordForms = false;
				}
			}	
		}
	}

	public BufferedReader openFileForReading(File fileToOpen) {
		
		BufferedReader bufferedReaderToReturn = null;
		
		try {
			bufferedReaderToReturn = 
				new BufferedReader(
						new InputStreamReader(
								new FileInputStream(fileToOpen),
							    "ISO-8859-1")); //latin 1
		}
		catch(FileNotFoundException e) {	
			System.err.println();
			System.err.println("  Sorry.  The file: " + fileToOpen.getAbsolutePath());
			System.err.println("    could not be read.  Here is the full Java error:");
			System.err.println();
			System.err.println(e.getMessage());
			System.err.println();
			System.err.println("  Did NOT successfully set the corpus path.");
			System.err.println();
			System.exit(0);
		}
		catch(Exception e) {
			System.err.println();
			System.err.println("  Sorry.  While opening the file: " + fileToOpen.getAbsolutePath());
			System.err.println("    an error was encountered.  Here is the full Java error:");
			System.err.println();
			System.err.println(e.getMessage());
			System.err.println();
			System.err.println("  Did NOT successfully set the corpus path.");
			System.err.println();
			System.exit(0);
		}
		
		return bufferedReaderToReturn;
	}
	
	private PrintWriter openFileForWriting(File fileToOpen, String encoding) {

		PrintWriter printWriterToReturn = null;
		
		try {
			printWriterToReturn = 
				new PrintWriter(
						new BufferedWriter(
								new OutputStreamWriter(
										new FileOutputStream(fileToOpen),
										encoding)),
						true); // true to autoflush
			
		} catch (FileNotFoundException e) {
			System.err.println();
			System.err.println("Cannot set the output file:");
			System.err.println("  " + fileToOpen.getAbsolutePath());
			System.err.println();
			System.exit(0);

		} catch (IOException e) {
			System.err.println("Failed to open the output file because");
			System.err.println("  of the following internal error:");
			e.printStackTrace();
			System.err.println();
			System.exit(0);
		}
		
		return printWriterToReturn;
	}

	
	/**
	 * @param args the command line arguments which must look like:
	 *
	 * <stem-data-file> <suffix-data-file>
	 * @throws IOException 
	 */
	public static void main(String[] args) throws IOException {
		if (args.length != 3) {
			System.out.println("The command line must look like:");
			System.out.println(
			 "    java AnalyzeASpanishWordlistIntoAMorphoChallengeAnswerKeyUsingMacoAndFreeLingData " + String.format("%n") +
			 "        <path-to-SpanishWordListToAnalyze> " + String.format("%n") +
			 "        <path-to-MorphoChallengeAnswerKeyOutputFile-PREFIX>  <-- NOTE: Prefix" + String.format("%n") +
			 "        <path-to-MacoLexiconFile>" + String.format("%n%n"));
			System.out.println("    Exiting...");
			System.out.println();
			System.out.println();
			System.exit(0);
		}
		
		AnalyzeASpanishWordlistIntoAMorphoChallengeAnswerKeyUsingMacoAndFreeLingData analyzer = 
			new AnalyzeASpanishWordlistIntoAMorphoChallengeAnswerKeyUsingMacoAndFreeLingData(
					new File(args[0]), 
					args[1],
					new File(args[2]));
		
		analyzer.analyze();
		analyzer.writeMorphoChallengeAnswerKeys();
	}    
	
	private void analyze() throws IOException {
		System.err.println();
		System.err.println("Analyzing the Spanish wordlist:");
		
		String lineFromSpanishWordlistToAnalyze;
		
		int wordFormCounter = 0;
		while ((lineFromSpanishWordlistToAnalyze = 
					spanishWordlistToAnalyze_BufferedReader.readLine()) != null) {
			
			// skip blank lines
			if (lineFromSpanishWordlistToAnalyze.matches("^\\s*$")) {
				continue;
			}
			
			// Get the first (and likely only) alpha token on each line
			Pattern wordFormPattern = Pattern.compile("^(\\S+)(.*)"); 
			Matcher wordFormMatcher = 
				wordFormPattern.matcher(lineFromSpanishWordlistToAnalyze);
			boolean matches = wordFormMatcher.matches();
			if ( ! matches) {
				System.err.println("The list of Spanish word forms is bad");
				System.err.println("  EXITING...");
				System.err.println();
				return;
			}
			String wordForm = wordFormMatcher.group(1);
			
			wordFormCounter++;
			if ((wordFormCounter%1000) == 0) {
				System.err.println("  " + wordFormCounter + " " + wordForm);
			}
			
			MorphoChallengeAnswerKeyEntry morphoChallengeAnswerKeyEntry =
				new MorphoChallengeAnswerKeyEntry(wordForm, macoLexiconByWordForm);
			
			if (morphoChallengeAnswerKeyEntry.isValid) {
				morphoChallengeAnswerKeyEntriesByWordForm.put(
					wordForm, 
					morphoChallengeAnswerKeyEntry);
			}
		}
	}

	private void writeMorphoChallengeAnswerKeys() {
		
		for (MorphoChallengeAnswerKeyEntry morphoChallengeAnswerKeyEntry : 
			morphoChallengeAnswerKeyEntriesByWordForm.values()) {
			
			String morphoChallengeAnswerKeyEntryString_morphemeLike =
				morphoChallengeAnswerKeyEntry.getMorphoChallengeAnalyses_morphemeLike();
			String morphoChallengeAnswerKeyEntryString_featureLike =
				morphoChallengeAnswerKeyEntry.getMorphoChallengeAnalyses_featureLike();
			
			morphoChallengeAnswerKey_morphemeLike_PrintWriter.println(
					morphoChallengeAnswerKeyEntryString_morphemeLike);
			morphoChallengeAnswerKey_featureLike_PrintWriter.println(
					morphoChallengeAnswerKeyEntryString_featureLike);
		}
	}	
}

