package edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive;

import info.jonclark.properties.PropertiesException;
import info.jonclark.properties.SmartProperties;
import info.jonclark.stat.F1HierarchicalCalculator;
import info.jonclark.util.ArrayUtils;
import info.jonclark.util.FileUtils;
import info.jonclark.util.FormatUtils;
import info.jonclark.util.StringUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeMap;
import java.util.Map.Entry;

/**
 * Scores the results of inductive feature detection against a gold standard.
 * 
 * @author jon
 */
public class InductiveResultsEvaluator {

	public static boolean VERBOSE = false;

	private HashSet<String> featuresWithoutMinPairs;
	private HashSet<String> featuresWithApproximateMinPairs;
	private HashSet<String> featuresRequiringSubsententialComparisons;
	private boolean penalizeSpurious;
	private boolean ignoreFeaturesWithoutMinPairs;
	private boolean ignoreFeaturesWithApproximateMinPairs;
	private boolean ignoreFeaturesRequiringSubsententialComparisons;
	private boolean ignoreAffixes;
	private boolean requireExactAffixMatches;
	private final Map<String, InductiveResultRow> goldRows;

	int agreed = 0;
	int disagreed = 0;

	public InductiveResultsEvaluator(SmartProperties props) throws FileNotFoundException,
			IOException, PropertiesException {

		File goldFile = props.getPropertyFile("inference.eval.goldStandard");

		featuresWithoutMinPairs =
				new HashSet<String>(
						Arrays.asList(props.getPropertyStringArray("inference.eval.featuresWithoutMinPairs")));
		featuresWithApproximateMinPairs =
				new HashSet<String>(
						Arrays.asList(props.getPropertyStringArray("inference.eval.featuresWithApproximateMinPairs")));
		featuresRequiringSubsententialComparisons =
				new HashSet<String>(
						Arrays.asList(props.getPropertyStringArray("inference.eval.featuresRequiringSubsententialComparisons")));

		penalizeSpurious = !props.getPropertyBoolean("inference.eval.ignoreExtraRows");
		ignoreFeaturesWithoutMinPairs =
				props.getPropertyBoolean("inference.eval.ignoreFeaturesWithoutMinPairs");
		ignoreFeaturesWithApproximateMinPairs =
				props.getPropertyBoolean("inference.eval.ignoreFeaturesWithApproximateMinPairs");
		ignoreFeaturesRequiringSubsententialComparisons =
				props.getPropertyBoolean("inference.eval.ignoreFeaturesRequiringSubsententialComparisons");
		ignoreAffixes = props.getPropertyBoolean("inference.eval.ignoreAffixes");
		requireExactAffixMatches =
				props.getPropertyBoolean("inference.eval.requireExactAffixMatches");

		// read in files
		goldRows = readFile(goldFile);
	}

	public InductiveResultRow getRowForFeatureValue(String valueName) {
		return goldRows.get(valueName);
	}

	public static void main(String[] args) throws Exception {

		if (args.length != 1) {
			System.err.println("Usage: program <detection_properties_file>");
			System.exit(1);
		}

		File propsFile = new File(args[0]);
		SmartProperties props = new SmartProperties(propsFile);
		InductiveResultsEvaluator eval = new InductiveResultsEvaluator(props);

		File resultsFile = props.getPropertyFile("inference.eval.actualResults");
		File togetherFile = props.getPropertyFile("inference.eval.baselineTogether");
		File separateFile = props.getPropertyFile("inference.eval.baselineSeparate");
		File randomFile = props.getPropertyFile("inference.eval.baselineRandom");
		File logDir = props.getPropertyFile("global.logDir");

		String morphemeReport = eval.evaluateMorphemes(resultsFile);
		String expressionReport = eval.evaluateExpression(resultsFile);
		// evaluate(togetherFile, goldRows, penalizeSpurious);
		// evaluate(separateFile, goldRows, penalizeSpurious);
		// evaluate(randomFile, goldRows, penalizeSpurious);

		System.out.println(morphemeReport);
		System.out.println(expressionReport);

		// write out the config file that was used to generate these results and
		// the results themselves as a permenant record;
		PrintWriter permenantRecord =
				new PrintWriter(logDir + "/" + FormatUtils.formatDateTimeShort(new Date())
						+ ".results");
		FileUtils.insertFile(propsFile, permenantRecord);
		permenantRecord.println(morphemeReport);
		permenantRecord.println(expressionReport);
		permenantRecord.close();
	}

	public String evaluateExpression(File resultsFile) throws FileNotFoundException, IOException {

		StringBuilder builder = new StringBuilder();

		Map<String, InductiveResultRow> resultRows = readFile(resultsFile);

		F1HierarchicalCalculator expressionCalc = new F1HierarchicalCalculator();

		// foreach gold row, make sure the result row is the same
		// what about extra result rows? -- remove result rows from the
		// collection as we go, and tally these separately
		for (final Entry<String, InductiveResultRow> goldEntry : goldRows.entrySet()) {
			InductiveResultRow goldRow = goldEntry.getValue();
			InductiveResultRow resultRow = resultRows.get(goldEntry.getKey());

			if (resultRow == null) {
				// if (resultRow.when == null || resultRow.when.equals("") ==
				// false) {
				builder.append("Row not found (no output was provided) for: " + goldEntry.getKey()
						+ "\n");
				// }
			} else {
				if (goldRow.unobserved) {

					if (resultRow.unobserved == false) {
						builder.append("Ignoring extra result row." + "\n");
					}

					continue;
				} else {

					String featureType = StringUtils.substringBefore(goldRow.name, "-");

					if (resultRow.morphemes.length > 0) {
						agreed++;
						int total = agreed + disagreed;
						// result.append("AGREED WITH BASELINE ON: " +
						// resultRow.name + " "
						// + resultRow.value + "(" + agreed + " / " + total +
						// ")");
					} else {
						disagreed++;
						int total = agreed + disagreed;
						// result.append("DISAGREED WITHBASELINE ON: " +
						// resultRow.name + " "
						// + resultRow.value + "(" + disagreed + " / " + total +
						// ")");
					}

					if ((goldRow.always || goldRow.sometimes) && (resultRow.morphemes.length > 0)) {
						expressionCalc.addCorrectOutcome(featureType, goldRow.name, goldRow.value);
					} else {

					}

					if (VERBOSE) {
						builder.append(goldRow.name + " " + goldRow.value + "\n");
						builder.append("GOLD STANDARD SAID: "
								+ (goldRow.always || goldRow.sometimes) + "\n");
						builder.append("RESULT ROW SAID: "
								+ (resultRow.always || resultRow.sometimes) + "\n");
						builder.append(Arrays.toString(resultRow.morphemes) + "\n");
						builder.append("\n");
					}

					expressionCalc.addObservedOutcome(featureType, goldRow.name, goldRow.value);
					expressionCalc.addExpectedOutcome(featureType, goldRow.name, goldRow.value);
				}
			}

		}

		builder.append(resultsFile.getAbsolutePath() + ": Expression report:" + "\n");
		builder.append(expressionCalc.getF1Report(2) + "\n");

		return builder.toString();
	}

	public String evaluateMorphemes(File resultsFile) throws FileNotFoundException, IOException {

		StringBuilder builder = new StringBuilder();

		Map<String, InductiveResultRow> resultRows = readFile(resultsFile);

		// 1) Evaluate how many clusters are correct
		F1HierarchicalCalculator clusterCalc = new F1HierarchicalCalculator();
		F1HierarchicalCalculator morphemeCalc = new F1HierarchicalCalculator();

		// foreach gold row, make sure the result row is the same
		// what about extra result rows? -- remove result rows from the
		// collection as we go, and tally these separately
		for (final Entry<String, InductiveResultRow> goldEntry : goldRows.entrySet()) {
			InductiveResultRow goldRow = goldEntry.getValue();
			InductiveResultRow resultRow = resultRows.get(goldEntry.getKey());

			if (resultRow == null) {
				// if (resultRow.when == null || resultRow.when.equals("") ==
				// false) {
				builder.append("ERROR: " + "Row not found: " + goldEntry.getKey() + "\n");
				// }
			} else {

				// record that we've already analyzed this entry
				resultRows.remove(goldEntry.getKey());

				if (goldRow.unobserved || (goldRow.always == false && goldRow.sometimes == false)) {
					continue;
				}

				// we consider a row correct in terms of clustering if:
				// 1) it has the same "same-as" value as the gold standard
				String featureType = StringUtils.substringBefore(goldRow.name, "-");
				if (ArrayUtils.equalsSetwise(goldRow.sameAs, resultRow.sameAs)) {
					// context0 = root
					// context1 = np- or c-
					// context2 = feature-name
					// context3 = feature-value
					clusterCalc.addCorrectOutcome(featureType, goldRow.name, goldRow.value);
				} else {
					// result.append("wrong! " + goldRow.value);
				}
				clusterCalc.addObservedOutcome(featureType, goldRow.name, goldRow.value);
				clusterCalc.addExpectedOutcome(featureType, goldRow.name, goldRow.value);

				// evaluate morphemes
				boolean[] resultMorphemeCoverage = new boolean[resultRow.morphemes.length];
				for (final String goldMorpheme : goldRow.morphemes) {

					boolean matchFound = false;
					for (int i = 0; i < resultRow.morphemes.length; i++) {

						// find out which type of morpheme we're dealing with
						// and do matches for pieces of morphemes
						if (requireExactAffixMatches == false && goldMorpheme.startsWith("-")) {
							if (resultRow.morphemes[i].endsWith(goldMorpheme.substring(1))) {
								matchFound = true;
								resultMorphemeCoverage[i] = true;
								break;
							}
						} else if (requireExactAffixMatches == false && goldMorpheme.endsWith("-")) {
							if (resultRow.morphemes[i].startsWith(goldMorpheme.substring(0,
									goldMorpheme.length() - 1))) {
								matchFound = true;
								resultMorphemeCoverage[i] = true;
								break;
							}
						} else {
							// do an exact match
							if (goldMorpheme.equals(resultRow.morphemes[i])) {
								matchFound = true;
								resultMorphemeCoverage[i] = true;
								break;
							}
						}
					}

					String morphemeType =
							(goldMorpheme.startsWith("-") || goldMorpheme.endsWith("-")) ? "affix"
									: "word";
					if (matchFound) {
						morphemeCalc.addCorrectOutcome(morphemeType, featureType, goldRow.name,
								goldRow.value);
						morphemeCalc.addObservedOutcome(morphemeType, featureType, goldRow.name,
								goldRow.value);

						builder.append("Morpheme MATCHED: GOLD: " + goldMorpheme + " ACTUAL: ("
								+ StringUtils.untokenize(resultRow.morphemes) + ") for "
								+ goldRow.name + " " + goldRow.value + "\n");
					} else {
						builder.append("Morpheme not matched: GOLD: " + goldMorpheme + " ACTUAL: ("
								+ StringUtils.untokenize(resultRow.morphemes) + ") for "
								+ goldRow.name + " " + goldRow.value + "\n");
					}
					morphemeCalc.addExpectedOutcome(morphemeType, featureType, goldRow.name,
							goldRow.value);
				}

				if (penalizeSpurious) {
					// penalize spurious morphemes
					for (final String resultMorpheme : resultRow.morphemes) {
						String morphemeType =
								(resultMorpheme.startsWith("-") || resultMorpheme.endsWith("-"))
										? "affix" : "word";
						morphemeCalc.addObservedOutcome(morphemeType, featureType, goldRow.name,
								goldRow.value);
					}
				}
			}
		}

		if (penalizeSpurious) {
			// penalize spurious rows
			for (final Entry<String, InductiveResultRow> resultEntry : resultRows.entrySet()) {
				InductiveResultRow resultRow = resultEntry.getValue();

				if (resultRow.value.endsWith("-n/a"))
					continue;

				if (resultRow.when.equals("") == false) {
					builder.append("ERROR: Row not analyzed: " + resultEntry.getKey() + "\n");
				}
				String featureType = StringUtils.substringBefore(resultRow.name, "-");
				clusterCalc.addObservedOutcome(featureType, resultRow.name, resultRow.value);
				builder.append("found spurious row: " + resultRow.value + " (penalizing)" + "\n");
			}
		}

		final boolean showClusterReport = false;
		if (showClusterReport) {
			builder.append(resultsFile.getAbsolutePath() + ": Cluster report:" + "\n");
			builder.append(clusterCalc.getF1Report(2) + "\n");
		}
		builder.append(resultsFile.getAbsolutePath() + ": Morpheme report:" + "\n");
		builder.append(morphemeCalc.getF1Report(2) + "\n");

		// we can't guarantee there's the same number of rows in each file
		// because extra rows will be added when "when" is non-blank

		return builder.toString();
	}

	public Map<String, InductiveResultRow> readFile(File file) throws FileNotFoundException,
			IOException {

		TreeMap<String, InductiveResultRow> rows = new TreeMap<String, InductiveResultRow>();
		BufferedReader in = new BufferedReader(new FileReader(file));

		int nLine = 1;
		String line;
		while ((line = in.readLine()) != null) {
			try {
				InductiveResultRow row =
						new InductiveResultRow(line, file.getAbsolutePath(), nLine);

				if (ignoreFeaturesWithoutMinPairs && featuresWithoutMinPairs.contains(row.name)) {
					continue;
				}
				if (ignoreFeaturesWithApproximateMinPairs
						&& featuresWithApproximateMinPairs.contains(row.name)) {
					continue;
				}
				if (ignoreFeaturesRequiringSubsententialComparisons
						&& featuresRequiringSubsententialComparisons.contains(row.name)) {
					continue;
				}
				if (ignoreAffixes) {
					row.affixes = new String[0];
					ArrayList<String> morphemes = new ArrayList<String>();
					for (String str : row.morphemes) {
						if (str.startsWith("-") == false && str.endsWith("-") == false) {
							morphemes.add(str);
						}
					}
					row.morphemes = morphemes.toArray(new String[morphemes.size()]);
				}

				if (row.when.equals("") == false) {
					String key = row.value + "X" + row.when;
					rows.put(key, row);
				}
				rows.put(row.value, row);
			} catch (RuntimeException e) {
				System.err.println("Error at " + file.getAbsolutePath() + ":" + nLine);
				throw e;
			}
			nLine++;
		}
		in.close();
		return rows;
	}
}
