/**
 * The AVENUE Project
 * Language Technologies Institute
 * School of Computer Science
 * (c) 2007 Carnegie Mellon University
 * 
 * Corpus Navigator
 * Written by Jonathan Clark
 */
package edu.cmu.cs.lti.avenue.navigation.featuredetection.deductive;

import static edu.cmu.cs.lti.avenue.navigation.featuredetection.deductive.RuleConstants.ALL;
import static edu.cmu.cs.lti.avenue.navigation.featuredetection.deductive.RuleConstants.DEFAULT;
import static edu.cmu.cs.lti.avenue.navigation.featuredetection.deductive.RuleConstants.NONE;
import info.jonclark.util.StringUtils;

import java.text.ParseException;

import edu.cmu.cs.lti.avenue.corpus.SentencePair;
import edu.cmu.cs.lti.avenue.trees.smart.TreeNode;

public class OverlapEvaluator {

	// /**
	// * Though this function is very expensive to run, it may make some
	// otherwise
	// * intractable lattices tractable. Checks lattice 4-grams to see if proper
	// * pairings exist
	// *
	// * @param rule
	// * @param lattice
	// * @throws ParseException
	// */
	// protected static void pruneResultLattice(Rule rule,
	// ArrayList<LexicalResult>[] lattice)
	// throws ParseException {
	//
	// int nPruned = 0;
	//
	// for (int x = 0; x <= lattice.length - 4; x++) {
	// for (int i = lattice[x].size() - 1; i >= 0; i--) {
	//
	// // can this element be legally paired with ANY element in the
	// // next position?
	// boolean hasMatch = false;
	//
	// for (int j = lattice[x + 1].size() - 1; j >= 0; j--) {
	// for (int k = lattice[x + 2].size() - 1; k >= 0; k--) {
	// for (int l = lattice[x + 3].size() - 1; l >= 0; l--) {
	// if (isProperPair(rule, false, lattice[x].get(i), lattice[x + 1].get(j),
	// lattice[x + 2].get(k), lattice[x + 3].get(l))) {
	// hasMatch = true;
	// break;
	// }
	// }
	// }
	// }
	// if (!hasMatch) {
	// lattice[x].remove(i);
	// nPruned++;
	// }
	// }
	// }
	//
	// System.out.println("Pruned " + nPruned);
	// }

	/**
	 * Ensure that if each variable corresponds to a unique sentence instance.
	 * (We don't want to compare a sentence to itself unless the user intended
	 * that. Likewise, we want to compare a sentence only to itself if that's
	 * what the user intended).
	 * <p>
	 * This method also checks that the overlap filter is satisfied.
	 * <p>
	 * Passing in a results array with null elements is fine; the null elements
	 * will be ignored
	 * <p>
	 * You should always set checkOverlap to true unless you are just using this
	 * method as a preemptive pruning strategy and you plan to call it again
	 * later with checkOverlap true.
	 * 
	 * @return
	 * @throws ParseException
	 */
	protected static boolean isProperPair(Rule rule, boolean checkOverlap, Result... results)
			throws ParseException {

		// try {
		// Thread.sleep(100);
		// } catch (InterruptedException e) {
		// // TODO Auto-generated catch block
		// e.printStackTrace();
		// }

		// TODO: Take advantage of triangular matrix situation and prune half of
		// the calculations here

		for (int i = 0; i < results.length; i++) {

			Result resultA = results[i];
			if (resultA == null)
				continue;

			for (final SentencePatternMatch matchA : resultA.getPatternMatches()) {

				assert matchA.getSentenceVariable() == matchA.getSentenceVariable().intern() : "Sentence variable not interned.";

				for (int j = i + 1; j < results.length; j++) {

					Result resultB = results[j];
					if (resultB == null)
						continue;

					for (final SentencePatternMatch matchB : resultB.getPatternMatches()) {

						if (matchA.getSentenceVariable() == matchB.getSentenceVariable()) {
							// same variable => sentences must be the same
							if (!matchA.getSentencePair().equals(matchB.getSentencePair())) {
								return false;
							}
						} else {
							// different variable => sentences must not be the
							// same
							if (matchA.getSentencePair().equals(matchB.getSentencePair())) {
								return false;
							}
						}
					}
				}
			}
		}

		if (!checkOverlap)
			return true;

		// the variable matching checks out
		// what about the overlap?
		TreeNode overlapFunction = rule.getOverlapFunction();
		String functionName = overlapFunction.getValues().get(0);
		if(functionName == NONE) {
			return true;
		}
		float distance = getOverlap(results);
		for (Result result : results) {
			result.addComment("editdistance=" + distance);
		}

		if (functionName == ALL) {
			return (distance == 1.0);
		} else if (functionName == DEFAULT) {
			return (distance >= .55);
		} else {
			throw new ParseException("Unknown overlap function name: " + functionName, -1);
		}
	}

	private static float getOverlap(Result... results) {

		int nTotalMatch = 0;
		int nTotalTokens = 0;

		SentencePair backbone = null;
		for (int i = 0; i < results.length; i++) {
			for (SentencePatternMatch match : results[i].getPatternMatches()) {

				SentencePair pair = match.getSentencePair();

				// use the first match as our backbone;
				if (backbone == null) {
					backbone = pair;
					continue;
				}

				String[] s1 = backbone.getNormalizedSourceTokens();
				String[] s2 = pair.getNormalizedSourceTokens();
				int nMatches = 0;
				int nTokens = Math.max(s1.length, s2.length);
				for (int k = 0; k < s1.length; k++) {
					for (int l = 0; l < s2.length; l++) {
						if (s1[k] == s2[l]) {
							nMatches++;
						}
					}
				}

				nTotalMatch += nMatches;
				nTotalTokens += nTokens;

				// getEditDistance(s1, s2);

				// TODO: F-Structure Overlap
			}
		}

		float overlap = (float) nTotalMatch / (float) nTotalTokens;
		return overlap;
	}

	// modified from http://www.merriampark.com/ld.htm#JAVA
	// assumed strings have been interned
	private static int getEditDistance(String[] s1, String[] s2) {
		int d[][]; // matrix
		int n; // length of s
		int m; // length of t
		int i; // iterates through s
		int j; // iterates through t
		String s_i; // ith character of s
		String t_j; // jth character of t
		int cost; // cost

		// Step 1
		n = s1.length;
		m = s2.length;
		if (n == 0) {
			return m;
		}
		if (m == 0) {
			return n;
		}
		d = new int[n + 1][m + 1];

		// Step 2
		for (i = 0; i <= n; i++) {
			d[i][0] = i;
		}

		for (j = 0; j <= m; j++) {
			d[0][j] = j;
		}

		// Step 3
		for (i = 1; i <= n; i++) {
			s_i = s1[i - 1];

			// Step 4
			for (j = 1; j <= m; j++) {
				t_j = s2[j - 1];

				// Step 5
				assert s_i == s_i.intern() : "s_i not interned: " + s_i;
				assert t_j == t_j.intern() : "t_j not interned: " + t_j;
				if (s_i == t_j) {
					cost = 0;
				} else {
					cost = 1;
				}

				// Step 6
				d[i][j] =
						Math.min(d[i - 1][j] + 1, Math.min(d[i][j - 1] + 1, d[i - 1][j - 1] + cost));
			}
		}

		// Step 7
		return d[n][m];
	}

	public static void main(String[] args) throws Exception {
		String[] s1 = StringUtils.tokenize("hi there");
		String[] s2 = StringUtils.tokenize("there hi bleh blah");
		StringUtils.internTokens(s1);
		StringUtils.internTokens(s2);
		System.out.println(getEditDistance(s1, s2));

	}
}
