package edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive;

import java.util.ArrayList;
import java.util.HashSet;

import edu.cmu.cs.lti.avenue.corpus.CorpusException;
import edu.cmu.cs.lti.avenue.corpus.SentencePair;
import edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive.LexicalCluster.LexClusterScores;

/**
 * Determines which sentences are lexically compatible with each other
 * 
 * @author jon
 */
public class LexicalClusterManager {

	private ArrayList<LexicalCluster> clusters = new ArrayList<LexicalCluster>();
	private final LexClusterScores thresholds;
	private final HashSet<String> stopwords;

	/**
	 * @param threshold
	 *            Percentage threshold for vocabulary similarity
	 */
	public LexicalClusterManager(LexClusterScores thresholds, HashSet<String> stopwords) {
		this.thresholds = thresholds;
		this.stopwords = stopwords;
	}

	public int mapToCluster(SentencePair pair) throws CorpusException {

		for (int i = 0; i < clusters.size(); i++) {
			LexicalCluster cluster = clusters.get(i);
			if (cluster.isLike(pair)) {
				cluster.addSentence(pair);
				return i;
			}
		}

		// no matching cluster was found, so add a new one
		LexicalCluster cluster = new LexicalCluster(thresholds, stopwords, pair);
		clusters.add(cluster);
		return clusters.size() - 1;
	}
	
	public Iterable<LexicalCluster> getClusters() {
		return clusters;
	}
	
	public int getNumClusters() {
		return clusters.size();
	}
}
