package edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive;

import info.jonclark.io.StringTable;
import info.jonclark.util.ArrayUtils;
import info.jonclark.util.DebugUtils;
import info.jonclark.util.FormatUtils;
import info.jonclark.util.StringUtils;
import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectRBTreeMap;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map.Entry;

import edu.cmu.cs.lti.avenue.corpus.CorpusException;
import edu.cmu.cs.lti.avenue.corpus.SentencePair;
import edu.cmu.cs.lti.avenue.featurespecification.FeatureContext;
import edu.cmu.cs.lti.avenue.featurespecification.FeatureStructureException;
import edu.cmu.cs.lti.avenue.morphology.Segmenter;
import edu.cmu.cs.lti.avenue.morphology.SegmenterException;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.evidence.ArcEvidenceCluster;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.evidence.FeatureMarking;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.evidence.FeatureSimilarity;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.evidence.NodeEvidence;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.matrices.MatrixVisitor;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.matrices.TriangularMatrixEvidenceCluster;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.matrices.TriangularMatrixFloat;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.simulation.SimulatedArc;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.simulation.SimulatedNode;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.simulation.SimulationResult;

/**
 * A representation of a MinimalCluster that contains examples that differ by
 * each other by one (or nInteractions) feature(s). This class is the workhorse
 * of ruleless feature detection. If all feature values in a feature group have
 * been explored, this is a complete graph; otherwise, it is a partial graph
 * with some nodes possibly having no arcs.
 * 
 * @author jon
 */
public class FeatureExpressionGraph {

	public static int nSim = 0;
	public static int nDif = 0;

	private boolean calculated1 = false;
	private boolean calculated2 = false;
	private boolean calculated3 = false;
	private boolean calculated4 = false;
	private boolean calculated5 = false;
	private boolean calculated6 = false;
	private boolean calculated7 = false;

	private final PlateauFunction distancePenalty;
	private FeatureInteraction featureInteraction;
	private final String[] values;
	private boolean[] valuesObserved;

	// matrices of connections between values
	// each cell represents an arc in a graphical FEG
	private TriangularMatrixEvidenceCluster<FeatureMarking> timesObservedAsDifferent;
	private TriangularMatrixEvidenceCluster<FeatureSimilarity> timesObservedAsSame;
	private TriangularMatrixFloat percentObservedDifferent;

	private final Long2ObjectRBTreeMap<CompatibleObservations> observations =
			new Long2ObjectRBTreeMap<CompatibleObservations>();
	private boolean calculated = false;
	private final double expressionThreshold;

	private final Long2ObjectRBTreeMap<CompatibleObservations> simulatedObservations =
			new Long2ObjectRBTreeMap<CompatibleObservations>();
	private boolean inSimulation = false;

	// don't overflow memory with unused arraylist entries
	private static final int SMALL_SIZE = 1;

	private HashSet<FeatureValueCluster> valueClusters = new HashSet<FeatureValueCluster>();
	private ArrayList<FeatureValueInteraction> unobservedValues =
			new ArrayList<FeatureValueInteraction>();

	private final Segmenter segmenter;

	// private ArrayList<EvidenceCluster> evidenceClusters = new
	// ArrayList<EvidenceCluster>();

	private static int width(int n) {
		int high = Integer.highestOneBit(n);
		if (high == 0) {
			return 0;
		} else {
			return Integer.numberOfTrailingZeros(high) + 1;
		}
	}

	private static class Observation {
		public SentencePair sent;
		public MinimalPairMapping mapping;

		public Observation(SentencePair sent, MinimalPairMapping mapping) {
			this.sent = sent;
			this.mapping = mapping;
		}
	}

	private static class CompatibleObservations {

		private final Int2ObjectRBTreeMap<ArrayList<Observation>> observations =
				new Int2ObjectRBTreeMap<ArrayList<Observation>>();
		private static final ArrayList<Observation> EMPTY_LIST = new ArrayList<Observation>(0);

		/**
		 * Combine 3 parameters into a single long without loss of information
		 */
		public static long makeKey(int lexCluster, int minPair, int featureContext) {

			// This "hashing" scheme allows for:
			// 2^24 lexClusters = 16,777,216 (offset = 48)
			// 2^34 minPairs = 67,108,864 (offset = 14)
			// 2^14 featureContexts = 16,384 (offset = 0)
			if (DebugUtils.isAssertEnabled()) {
				int lexSize = width(lexCluster);
				int mpSize = width(minPair);
				int fcSize = width(featureContext);
				assert lexSize <= 24 : "lexCluster overflow";
				assert mpSize <= 34 : "mpSize overflow";
				assert fcSize <= 14 : "fsSize overflow";
			}

			long value = ((long) lexCluster << 48) | ((long) minPair << 14) | (long) featureContext;
			return value;
		}

		/**
		 * @param nValueIndex
		 *            The index of the feature value in the FeatureSpec's array
		 * @param sentence
		 */
		public void addObservation(int nValueIndex, SentencePair sentence,
				MinimalPairMapping minPairMapping) {
			ArrayList<Observation> list = observations.get(nValueIndex);
			if (list == null) {
				list = new ArrayList<Observation>(SMALL_SIZE);
				observations.put(nValueIndex, list);
			}
			list.add(new Observation(sentence, minPairMapping));
		}

		public ArrayList<Observation> getObservations(int nValueIndex) {
			ArrayList<Observation> result = observations.get(nValueIndex);
			if (result != null) {
				return result;
			} else {
				return EMPTY_LIST;
			}
		}
	}

	public FeatureExpressionGraph(FeatureInteraction featureInteraction,
			double expressionThreshold, Segmenter segmenter, PlateauFunction distancePenalty) {

		this.expressionThreshold = expressionThreshold;
		this.featureInteraction = featureInteraction;

		this.values = featureInteraction.crossedValues;
		this.segmenter = segmenter;
		this.distancePenalty = distancePenalty;
	}

	public void addObservation(SentencePair sentence, String[] featureValues, int nLexCluster,
			int nMinPair, int nFeatureContext, MinimalPairMapping minPairMapping)
			throws FeatureStructureException {

		calculated = false;
		observe(sentence, featureValues, nLexCluster, nMinPair, nFeatureContext, minPairMapping,
				false);
	}

	private void observe(SentencePair sentence, String[] featureValues, int nLexCluster,
			int nMinPair, int nFeatureContext, MinimalPairMapping minPairMapping, boolean simulate)
			throws FeatureStructureException {

		// first determine which of our feature values is expressed here
		String strCrossed = StringUtils.untokenize(featureValues, FeatureInteraction.X);
		int index = ArrayUtils.findInUnsortedArray(values, strCrossed);
		checkObservationSanity(nFeatureContext, strCrossed, index);

		// HashUtils.append(sentencesByFeatureValue, strCrossed, sentence);

		// now add this observation to the proper lexical cluster

		// TODO: don't put hard bound between feature contexts if feature values
		// already sort this out
		// TODO: Show lex cluster, min pair, and feature context in final output

		// nFeatureContext = 0;

		Long2ObjectRBTreeMap<CompatibleObservations> observations;
		if (simulate) {
			observations = this.simulatedObservations;
		} else {
			observations = this.observations;
		}

		long searchKey = CompatibleObservations.makeKey(nLexCluster, nMinPair, nFeatureContext);
		CompatibleObservations cluster = observations.get(searchKey);
		if (cluster == null) {
			cluster = new CompatibleObservations();
			observations.put(searchKey, cluster);
		}
		// if (sentence.getDisplayTargetSentence().contains("vahan")) {
		// System.out.print(".");
		// }
		// for (List<SentencePair> list : cluster.observations.values()) {
		// for (SentencePair sent : list) {
		// if (sent.getDisplayTargetSentence().contains("hai")) {
		// System.out.print(".");
		// }
		// }
		// }
		cluster.addObservation(index, sentence, minPairMapping);
	}

	private void checkObservationSanity(int nFeatureContext, String strCrossed, int index)
			throws FeatureStructureException {
		if (index == -1) {
			throw new FeatureStructureException("No match found for feature value interaction: "
					+ strCrossed);
		}

		if (featureInteraction.featureContext.id != FeatureContext.ANY.id) {
			if (featureInteraction.featureContext.id != nFeatureContext) {
				throw new RuntimeException("This FEG is not typed for featureContext: "
						+ nFeatureContext + " (expected: " + featureInteraction.featureContext.id
						+ ")");
			}
		}
	}

	/**
	 * Put this FEG in a mode that allows discovery of which
	 */
	public void beginSimulation() {
		inSimulation = true;
	}

	public void endSimulation() {
		simulatedObservations.clear();
		inSimulation = false;
	}

	/**
	 * NOTE: Can only be called after a call to beginSimulation() and before its
	 * matching call to endSimulation().
	 * 
	 * @param sentence
	 * @param featureValues
	 * @param nLexCluster
	 * @param nMinPair
	 * @param nFeatureContext
	 * @return
	 * @throws FeatureStructureException
	 */
	public void simulateObserving(SentencePair sentence, String[] featureValues, int nLexCluster,
			int nMinPair, int nFeatureContext, MinimalPairMapping minPairMapping)
			throws FeatureStructureException {

		if (inSimulation == false) {
			throw new RuntimeException("Must be in simulation mode to carry out this operation.");
		}

		observe(sentence, featureValues, nLexCluster, nMinPair, nFeatureContext, minPairMapping,
				true);
	}

	public SimulationResult getSimulationResult() throws SegmenterException, CorpusException {

		final int INIT_MATRICES = 1;
		calculate(INIT_MATRICES);

		final int[] createdNodes = new int[this.values.length];

		final ArrayList<SimulatedArc> simulatedArcs = new ArrayList<SimulatedArc>();

		// figure out which arcs and nodes will be created and return
		// them

		// this will only interact with "compatible observations"
		// this can only produce a single piece of node evidence
		// multiple arcs can be created, but never redundant evidence
		// across the same arc

		// this CompatibleObservations object is indexed by feature
		// value, but includes everything relevant for this FEG

		for (Entry<Long, CompatibleObservations> entry : simulatedObservations.entrySet()) {
			long key = entry.getKey();

			final CompatibleObservations realCompatibleObservations = this.observations.get(key);
			if (realCompatibleObservations != null) {

				final CompatibleObservations simCompatibleObservations = entry.getValue();

				timesObservedAsDifferent.visit(new MatrixVisitor() {
					public void visit(int i, int j) {

						// we don't want arcs going to and from the same value
						if (i != j) {
							ArrayList<Observation> realSentencesI =
									realCompatibleObservations.getObservations(i);
							ArrayList<Observation> realSentencesJ =
									realCompatibleObservations.getObservations(j);

							ArrayList<Observation> simSentencesI =
									simCompatibleObservations.getObservations(i);
							ArrayList<Observation> simSentencesJ =
									simCompatibleObservations.getObservations(j);

							int nSimulatedArcs = simSentencesI.size() * simSentencesJ.size();
							nSimulatedArcs += realSentencesI.size() * simSentencesJ.size();
							nSimulatedArcs += simSentencesI.size() * realSentencesJ.size();
							// DO NOT add realI.size crossed with realJ.size

							// the number of pieces of node evidence is equal to
							// the
							// number of arcs (as in evidence) coming from
							// each node
							createdNodes[i] += nSimulatedArcs;
							createdNodes[j] += nSimulatedArcs;

							// Get name information for the values being
							// compared
							// here and embed information about the
							// pre-simulation
							// knowledge that we have from each of these arcs
							if (nSimulatedArcs > 0) {
								SimulatedArc arc =
										new SimulatedArc(featureInteraction.getName(), values[i],
												values[j], nSimulatedArcs);
								simulatedArcs.add(arc);
							}
						}
					}
				});

			} else {
				// this simulated observation is not compatible with any
				// existing real (non-simulated) observations and so will not
				// create any new NodeEvidence or ArcEvidence.
			}
		}

		ArrayList<SimulatedNode> simulatedNodes = new ArrayList<SimulatedNode>();

		assert createdNodes.length == values.length : "createNodes to values array length mismatch";
		for (int i = 0; i < createdNodes.length; i++) {
			if (createdNodes[i] > 0) {
				SimulatedNode simulatedNode =
						new SimulatedNode(featureInteraction.getName(), values[i], createdNodes[i]);
				simulatedNodes.add(simulatedNode);
			}
		}

		return new SimulationResult(simulatedArcs, simulatedNodes);
	}

	public void calculate() throws SegmenterException, CorpusException {
		// don't calculate evidence clusters
		calculate(5);
	}

	public void calculate(int nStep) throws SegmenterException, CorpusException {

		// TODO: calculate as we go instead of all-at-once so that we never have
		// to invalidate the calculations

		if (nStep >= 1) {
			if (!calculated1)
				initializeUpperTriangularMatrices();
		}

		// NOTES ON ITERATING OVER UPPER TRIANGULAR MATRIX W/O DIAGONAL:
		// i will never touch first feature value
		// j will never touch last feature value

		if (nStep >= 2) {
			assert calculated1 : "step 1 not calculated";
			if (!calculated2)
				analyzeAndCountObservations();
		}

		if (nStep >= 3) {
			assert calculated2 : "step 2 not calculated";
			if (!calculated3) {
				valueClusters.clear();
				calculatePercentagesAndCluster();
			}
		}

		if (nStep >= 4) {
			assert calculated3 : "step 3 not calculated";
			if (!calculated4) {
				int before = valueClusters.size();
				breakClusters();
				int after = valueClusters.size();
				assert after >= before : "lost value clusters";
			}

			if (DebugUtils.isAssertEnabled()) {
				for (final FeatureValueCluster cluster : valueClusters) {
					assert cluster.getFeatureInteractions().size() > 0 : "zero length cluster";
				}
			}
		}

		if (nStep >= 5) {
			assert calculated4 : "step 4 not calculated";
			// ensureUniqueClusters();
			if (!calculated5)
				findUnobservedValues();
		}

		if (nStep >= 6) {
			assert calculated5 : "step 5 not calculated";
			if (!calculated6)
				createEvidenceClusters();
		}

		if (nStep >= 7) {
			assert calculated6 : "step 6 not calculated";
			if (!calculated7)
				analyzeMorphemes();
		}

		calculated = true;
	}

	@SuppressWarnings("unchecked")
	private void analyzeAndCountObservations() throws CorpusException {
		calculated2 = true;

		// calculate counts for upper triangular matrix for each lexical cluster
		// / minPairCluster
		// for (final ObservationsForLexCluster lexicalCluster :
		// observationsByLexCluster.values()) {
		// for (final ObservationsForMinPair minPairCluster :
		// lexicalCluster.observationsByMinPair.values()) {
		FeatureExpressionGraph.nSim = 0;
		FeatureExpressionGraph.nDif = 0;
		TriangularMatrixEvidenceCluster.nEntries = 0;
		for (final CompatibleObservations minPairCluster : observations.values()) {

			// iterate over triangular matrix
			timesObservedAsDifferent.visit(new MatrixVisitor() {
				public void visit(int i, int j) throws CorpusException {

					if (i == j)
						return;

					for (final Observation pairA : minPairCluster.getObservations(i)) {
						for (final Observation pairB : minPairCluster.getObservations(j)) {

							valuesObserved[i] = true;
							valuesObserved[j] = true;

							FeatureValueInteraction valueA =
									featureInteraction.featureValueInteractions[i];
							FeatureValueInteraction valueB =
									featureInteraction.featureValueInteractions[j];
							String[] sentenceA = pairA.sent.getNormalizedTargetTokens();
							String[] sentenceB = pairB.sent.getNormalizedTargetTokens();

							if (Arrays.equals(sentenceA, sentenceB)) {
								// we have a similarity
								FeatureSimilarity similarity =
										new FeatureSimilarity(pairA.sent, pairB.sent, valueA,
												valueB);

								// TODO: keep a list only of unique evidence
								// clusters at this stage
								// count occurrances of each instead of keeping
								// them all around

								timesObservedAsSame.append(i, j, similarity, distancePenalty);
								nSim++;
							} else {
								// we have a difference
								FeatureMarking marking =
										new FeatureMarking(pairA.sent, pairB.sent, valueA, valueB,
												pairA.mapping, pairB.mapping);

								// we must decide how to hash this baby
								// System.out.println("DIFFERENT! " + valueA + "
								// XXX " + valueB
								// + " ::: " + pairA.getDisplayTargetSentence()
								// + " XXXX "
								// + pairB.getDisplayTargetSentence());

								timesObservedAsDifferent.append(i, j, marking, distancePenalty);
								nDif++;
							}
						}
					}
				}
			});

		}
		// }

		System.out.println("MATRIX ARC ENTRIES: " + TriangularMatrixEvidenceCluster.nEntries);
		System.out.println("FEATURE SIMILARITY ARCS: " + FeatureExpressionGraph.nSim);
		System.out.println("FEATURE DIFFERENCE ARCS: " + FeatureExpressionGraph.nDif);
	}

	private void calculatePercentagesAndCluster() throws CorpusException {
		calculated3 = true;

		// calculate percentages for upper triangular matrix
		// clustering phase 1: cluster unexpressed feature values

		// iterate over triangular matrix
		timesObservedAsDifferent.visit(new MatrixVisitor() {
			public void visit(int i, int j) {

				if (i == j)
					return;

				Collection<ArcEvidenceCluster<FeatureMarking>> differenceEvidence =
						timesObservedAsDifferent.get(i, j);
				Collection<ArcEvidenceCluster<FeatureSimilarity>> similarityEvidence =
						timesObservedAsSame.get(i, j);

				float nDiff = 0;
				for (final ArcEvidenceCluster<FeatureMarking> cluster : differenceEvidence) {
					nDiff += cluster.getWeightedFrequency();
				}
				float nSame = 0;
				for (final ArcEvidenceCluster<FeatureSimilarity> cluster : similarityEvidence) {
					nSame += cluster.getWeightedFrequency();
				}
				float nTotal = nDiff + nSame;
				float percent = (float) nDiff / (float) nTotal;

				System.out.print("MERGING: For nodes " + values[i] + " & " + values[j]
						+ ", percentDifferent = " + FormatUtils.formatDouble2(percent) + "... ");

				if (nTotal == 0) {
					System.out.println("Not merged (no data)");
				} else if (nTotal > 0) {
					percentObservedDifferent.set(i, j, percent);

					FeatureValueInteraction valueA = featureInteraction.featureValueInteractions[i];
					FeatureValueInteraction valueB = featureInteraction.featureValueInteractions[j];

					// we now need to find only the sentences that support this
					// hypothesis

					if (Double.isNaN(percent)) {
						// ignore; this will be handled by unobserved value
						// clustering
					} else if (Double.isInfinite(percent)) {
						// ignore; this will be handled by unobserved value
						// clustering
					} else if (percent < expressionThreshold) {

						System.out.println("Mering.");
						// these feature are unexpressed (expressed jointly /
						// similarly)

						// first, look for an existing feature cluster with
						// either
						// value and put them both in it
						FeatureValueCluster clusterA = getAndRemoveFeatureCluster(valueA);
						FeatureValueCluster clusterB = getAndRemoveFeatureCluster(valueB);

						FeatureValueCluster merged = new FeatureValueCluster();
						if (clusterA != null)
							merged.addCluster(clusterA);
						if (clusterB != null)
							merged.addCluster(clusterB);

						// now add all evidence collected for each of these
						// values
						merged.addSimilarityEvidence(similarityEvidence, valueA, valueB);
						merged.addSimilarityCounterEvidence(differenceEvidence);

						assert merged.getFeatureInteractions().size() > 0 : "zero length cluster: merged";

						valueClusters.add(merged);

					} else {

						System.out.println("Not merged.");

						// these feature are expressed distinctly, so put each
						// in its own feature cluster
						FeatureValueCluster clusterA = getAndRemoveFeatureCluster(valueA);
						if (clusterA == null)
							clusterA = new FeatureValueCluster();

						clusterA.addDifferenceEvidence(differenceEvidence, valueA);
						clusterA.addDifferenceCounterEvidence(similarityEvidence);
						assert clusterA.getFeatureInteractions().size() > 0 : "zero length cluster: clusterA";
						valueClusters.add(clusterA);

						FeatureValueCluster clusterB = getAndRemoveFeatureCluster(valueB);
						if (clusterB == null)
							clusterB = new FeatureValueCluster();
						clusterB.addDifferenceEvidence(differenceEvidence, valueB);
						clusterB.addDifferenceCounterEvidence(similarityEvidence);
						assert clusterB.getFeatureInteractions().size() > 0 : "zero length cluster: clusterB";
						valueClusters.add(clusterB);
					}
				}
			}
		}); // end matrix visitor
	}

	private void breakClusters() throws CorpusException {
		calculated4 = true;

		// clustering phase 2: break off conflicting expressed feature values

		// iterate over triangular matrix
		timesObservedAsDifferent.visit(new MatrixVisitor() {
			public void visit(int i, int j) {

				if (i == j)
					return;

				double percentDifferent = percentObservedDifferent.get(i, j);
				System.out.print("SPLITTING: For nodes " + values[i] + " & " + values[j]
						+ ", percentDifferent = " + FormatUtils.formatDouble2(percentDifferent)
						+ "... ");

				if (percentDifferent >= expressionThreshold) {
					System.out.println("Split.");

					// make sure these two values don't co-occur in any cluster
					FeatureValueInteraction valueA = featureInteraction.featureValueInteractions[i];
					FeatureValueInteraction valueB = featureInteraction.featureValueInteractions[j];

					ArrayList<FeatureValueCluster> offendingClusters =
							getAndRemoveFeatureClusters(valueA, valueB);
					for (final FeatureValueCluster offendingCluster : offendingClusters) {

						// break this cluster into two pieces, each not
						// containing one of the offending feature values
						FeatureValueCluster subtractedA = offendingCluster.subtract(valueA);
						FeatureValueCluster subtractedB = offendingCluster.subtract(valueB);
						assert subtractedA.getFeatureInteractions().size() > 0 : "zero length cluster: subtractedA";
						assert subtractedB.getFeatureInteractions().size() > 0 : "zero length cluster: subtractedB";
						valueClusters.add(subtractedA);
						valueClusters.add(subtractedB);
					}
				} else {
					System.out.println("Not splittable.");
				}
			}
		}); // end matrix visitor
	}

	// /**
	// * Looks for duplicate clusters and recombines them
	// */
	// private void ensureUniqueClusters() {
	//
	// // map every valueCluster to the first index at which it was encountered
	// HashMap<String, Integer> existing = new HashMap<String,
	// Integer>(valueClusters.size());
	//
	// for (int i = 0; i < valueClusters.size(); i++) {
	// FeatureValueCluster cluster = valueClusters.get(i);
	// String id = cluster.getId();
	// Integer nOther = existing.get(id);
	// if (nOther == null) {
	// // first instance
	// existing.put(id, i);
	// } else {
	// // merge this duplicate with the first cluster and delete this
	// // one
	// FeatureValueCluster originalCluster = valueClusters.get(nOther);
	// originalCluster.addCluster(cluster);
	// valueClusters.remove(i);
	// i--;
	// }
	// }
	//
	// for (final FeatureValueCluster cluster : valueClusters) {
	// cluster.pruneNonuniqueEvidence();
	// }
	// }

	private void findUnobservedValues() {
		calculated5 = true;

		unobservedValues.clear();

		// clustering phase 3: report any unknown values
		for (int i = 0; i < valuesObserved.length; i++) {
			if (valuesObserved[i] == false) {
				FeatureValueInteraction value = featureInteraction.featureValueInteractions[i];
				unobservedValues.add(value);
			}
		}
	}

	/**
	 * Get the cluster containing the specified value. Asserts that there is
	 * only one cluster containing the given value. Returns null if no cluster
	 * could be found.
	 * 
	 * @param values
	 * @return
	 */
	private FeatureValueCluster getAndRemoveFeatureCluster(FeatureValueInteraction value) {
		FeatureValueCluster found = null;
		for (FeatureValueCluster c : valueClusters) {
			if (c.contains(value)) {
				assert found == null : "duplicate feature cluster encountered";
				found = c;
			}
		}
		valueClusters.remove(found);
		return found;
	}

	/**
	 * Gets all feature clusters containing the specified two values and removes
	 * them from the value list
	 * 
	 * @param valueA
	 * @param valueB
	 * @return
	 */
	private ArrayList<FeatureValueCluster> getAndRemoveFeatureClusters(
			FeatureValueInteraction valueA, FeatureValueInteraction valueB) {

		ArrayList<FeatureValueCluster> clusters = new ArrayList<FeatureValueCluster>();
		for (final FeatureValueCluster c : valueClusters) {
			if (c.contains(valueA) && c.contains(valueB)) {
				clusters.add(c);
			}
		}
		valueClusters.removeAll(clusters);
		return clusters;
	}

	@SuppressWarnings("unchecked")
	private void initializeUpperTriangularMatrices() {
		calculated1 = true;

		assert values.length > 0 : "no values for feature";

		// create upper triangular matrix w/o diagonal
		this.valuesObserved = new boolean[values.length];
		this.timesObservedAsDifferent =
				new TriangularMatrixEvidenceCluster<FeatureMarking>(values.length);
		this.timesObservedAsSame =
				new TriangularMatrixEvidenceCluster<FeatureSimilarity>(values.length);
		this.percentObservedDifferent = new TriangularMatrixFloat(Float.NaN, values.length);

		// this.timesObservedAsDifferent =
		// (ArrayList<FeatureMarking>[][]) new ArrayList[values.length - 1][];
		// this.timesObservedAsSame =
		// (ArrayList<FeatureSimilarity>[][]) new ArrayList[values.length -
		// 1][];
		// this.percentObservedDifferent = new float[values.length - 1][];
		//
		// for (int i = 0; i < timesObservedAsSame.length; i++) {
		// this.timesObservedAsDifferent[i] = (ArrayList<FeatureMarking>[]) new
		// ArrayList[i + 1];
		// this.timesObservedAsSame[i] = (ArrayList<FeatureSimilarity>[]) new
		// ArrayList[i + 1];
		// this.percentObservedDifferent[i] = new float[i + 1];
		//
		// for (int j = 0; j < i + 1; j++) {
		// this.timesObservedAsDifferent[i][j] =
		// new ArrayList<FeatureMarking>(EXPECTED_CLUSTERS);
		// this.timesObservedAsSame[i][j] =
		// new ArrayList<FeatureSimilarity>(EXPECTED_CLUSTERS);
		// }
		// }
	}

	/**
	 * Turn ArcEvidence into NodeEvidence
	 * 
	 * @throws SegmenterException
	 * @throws CorpusException
	 */
	private void analyzeMorphemes() throws SegmenterException, CorpusException {
		for (FeatureValueCluster cluster : getObservedValueClusters()) {
			for (ArcEvidenceCluster<FeatureMarking> arcEvidence : cluster.getDifferenceEvidence()) {

				if (cluster.getFeatureInteractions().contains(arcEvidence.getFeatureValueA())) {

					String addedWordsString =
							StringUtils.untokenize(arcEvidence.getAddedWordsA(), "_");
					NodeEvidence nodeEvidence = cluster.getWords().get(addedWordsString);
					if (nodeEvidence == null) {

						nodeEvidence =
								new NodeEvidence(arcEvidence.getAddedWordsA(),
										arcEvidence.getAddedMorphemesA(segmenter),
										arcEvidence.getPairA());
						cluster.getWords().put(addedWordsString, nodeEvidence);
					}
					nodeEvidence.addFeatureMarking(arcEvidence);
				}

				if (cluster.getFeatureInteractions().contains(arcEvidence.getFeatureValueB())) {

					String addedWordsString =
							StringUtils.untokenize(arcEvidence.getAddedWordsB(), "_");
					NodeEvidence nodeEvidence = cluster.getWords().get(addedWordsString);
					if (nodeEvidence == null) {
						nodeEvidence =
								new NodeEvidence(arcEvidence.getAddedWordsB(),
										arcEvidence.getAddedMorphemesB(segmenter),
										arcEvidence.getPairB());
						cluster.getWords().put(addedWordsString, nodeEvidence);
					}
					nodeEvidence.addFeatureMarking(arcEvidence);
				}
			}
		}
	}

	private void createEvidenceClusters() {
		calculated6 = true;
		//
		// evidenceClusters.clear();
		// for (final FeatureValueCluster valueCluster : valueClusters) {
		// for (final FeatureMarking marking :
		// valueCluster.getDifferenceEvidence()) {
		// clusterFeatureMarking(marking);
		// }
		// for (final FeatureMarking marking :
		// valueCluster.getSimilarityCounterEvidence()) {
		// clusterFeatureMarking(marking);
		// }
		// }
	}

	private void clusterFeatureMarking(final FeatureMarking marking) {
		// boolean matched = false;
		// for (final EvidenceCluster evidenceCluster : evidenceClusters) {
		// if (evidenceCluster.isLike(marking)) {
		// evidenceCluster.addEvidence(marking);
		// matched = true;
		// break;
		// }
		// }
		// if (!matched) {
		// EvidenceCluster evidenceCluster = new EvidenceCluster(marking);
		// evidenceClusters.add(evidenceCluster);
		// }
	}

	/**
	 * Returns list of feature values that are uniquely or jointly expressed,
	 * but none that are unobserved. An inner array of length one indicates that
	 * the value is fully expressed. An outer array of length one indicates that
	 * the feature is completely unexpressed.
	 * 
	 * @return
	 * @throws SegmenterException
	 * @throws CorpusException
	 */
	public Collection<FeatureValueCluster> getObservedValueClusters() throws SegmenterException,
			CorpusException {
		if (!calculated) {
			calculate();
		}
		return valueClusters;
	}

	/**
	 * Gets a Collection of FeatureValueClusters for all feature values in this
	 * FEG *including* unobserved values. If you don't want the unobserved
	 * values, consider using getObservedValueClusters().
	 * 
	 * @return
	 * @throws SegmenterException
	 * @throws CorpusException
	 */
	public Collection<FeatureValueCluster> getAllValueClusters() throws SegmenterException,
			CorpusException {
		if (!calculated) {
			calculate();
		}

		ArrayList<FeatureValueCluster> list = new ArrayList<FeatureValueCluster>();
		list.addAll(valueClusters);

		for (FeatureValueInteraction interaction : unobservedValues) {
			FeatureValueCluster cluster = new FeatureValueCluster();
			cluster.setUnobserved(true);
			cluster.addInteraction(interaction);
			list.add(cluster);
		}

		return list;
	}

	/**
	 * Gets the feature values (one or more), which may interact within this FEG
	 * as a FeatureInteraction.
	 * 
	 * @return
	 */
	public FeatureInteraction getFeatureInteractions() {
		return featureInteraction;
	}

	public ArrayList<SentencePair> getSentencesForFeatureValue(String featureValue) {
		// ArrayList<SentencePair> list =
		// sentencesByFeatureValue.get(featureValue);
		// assert list != null : "featureValue not found: " + featureValue;
		// return list;
		throw new Error("unsupported");
	}

	/**
	 * Gets the clusters that uniquely show how the feature value clusters are
	 * differentiated among each other.
	 * 
	 * @return
	 */
	// public ArrayList<EvidenceCluster> getEvidenceClusters() {
	// if (!calculated) {
	// calculate();
	// }
	//
	// return evidenceClusters;
	// }
	public void releaseStrangleHoldOnMemory() {
		calculated1 = false;
		calculated2 = false;
		calculated3 = false;
		calculated4 = false;
		calculated5 = false;
		calculated6 = false;
		calculated7 = false;

		valuesObserved = null;

		timesObservedAsDifferent.clear();
		timesObservedAsSame.clear();
		percentObservedDifferent.clear();

		// observations.clear();
		valueClusters.clear();
		// unobservedValues.clear();
	}

	public StringTable getExpressionMatrixAsStringTable() throws CorpusException {

		final String[][] rows = new String[values.length + 1][values.length + 1];

		// put value indices along the top of the table
		rows[0][0] = "";
		for (int i = 1; i < rows[0].length; i++) {
			rows[0][i] = "(" + i + ")";
		}

		for (int i = 1; i < rows.length; i++) {
			for (int j = 0; j < rows[i].length; j++) {
				if (j == 0) {
					rows[i][0] = "(" + i + ") " + values[i - 1];
				} else {
					rows[i][j] = "--";
				}
			}
		}

		percentObservedDifferent.visit(new MatrixVisitor() {
			public void visit(int i, int j) {

				if (i == j)
					return;

				double perc = percentObservedDifferent.get(i, j);
				if (Double.isNaN(perc) || Double.isInfinite(perc)) {
					rows[i + 1][j + 1] = "X";
				} else {
					rows[i + 1][j + 1] = FormatUtils.formatDouble2(perc * 100);
				}
			}
		});

		StringTable table = new StringTable();
		for (int i = 0; i < rows.length; i++) {
			table.addRow(rows[i]);
		}
		return table;
	}

	public StringTable getExpressionSummary() throws SegmenterException, CorpusException {
		Collection<FeatureValueCluster> clusters = getObservedValueClusters();

		StringTable table = new StringTable();
		table.addRow("Value Cluster", "Expression", "ForDiff", "ForSame", "NotDiff", "NotSame");

		for (final FeatureValueCluster cluster : clusters) {

			String diff = cluster.getDifferenceEvidence().size() + "";
			String same = cluster.getSimilarityEvidence().size() + "";
			String diffX = cluster.getDifferenceCounterEvidence().size() + "";
			String sameX = cluster.getSimilarityCounterEvidence().size() + "";

			StringBuilder featureValues = new StringBuilder();
			for (final FeatureValueInteraction spec : cluster.getFeatureInteractions()) {
				featureValues.append(spec.getName() + ", ");
			}
			featureValues.delete(featureValues.length() - 2, featureValues.length());

			if (cluster.getFeatureInteractions().size() == 1) {
				table.addRow(featureValues.toString(), "full", diff, same, diffX, sameX);
			} else {
				table.addRow(featureValues.toString(), "joint", diff, same, diffX, sameX);
			}
		}

		for (final FeatureValueInteraction value : unobservedValues) {
			table.addRow(value.getName(), "unobserved", "0", "0", "0", "0");
		}

		return table;
	}

	/**
	 * Print a string describing which values are jointly expressed an
	 * unexpressed
	 */
	public String toString() {

		try {
			Collection<FeatureValueCluster> clusters = getObservedValueClusters();

			if (clusters.size() == 1 && unobservedValues.size() == 0) {
				return featureInteraction.getName() + " is completely unexpressed.\n";
			} else {
				StringBuilder builder = new StringBuilder();
				builder.append(featureInteraction.getName() + ":\n");
				for (final FeatureValueCluster cluster : clusters) {
					if (cluster.getFeatureInteractions().size() == 1) {
						String name =
								cluster.getFeatureInteractions().toArray(
										new FeatureValueInteraction[1])[0].getName();
						builder.append(name + " is fully expressed.");
					} else {
						for (final FeatureValueInteraction spec : cluster.getFeatureInteractions()) {
							builder.append(spec.getName() + ", ");
						}
						builder.append("are jointly expressed.");
					}

					// output evidence stats
					builder.append("(" + cluster.getDifferenceEvidence().size()
							+ " support difference, " + cluster.getSimilarityEvidence().size()
							+ " support similar, " + cluster.getDifferenceCounterEvidence().size()
							+ " not different, " + cluster.getSimilarityCounterEvidence().size()
							+ " not similar)\n");
				}

				for (final FeatureValueInteraction value : unobservedValues) {
					builder.append(value.getName() + " is unobserved.\n");
				}

				return builder.toString();
			}
		} catch (SegmenterException e) {
			throw new RuntimeException(e);
		} catch (CorpusException e) {
			throw new RuntimeException(e);
		}

	}
}
