package edu.cmu.cs.lti.avenue.navigation.featuredetection.inductive;

import info.jonclark.properties.SmartProperties;
import info.jonclark.util.HashUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;

public class InductiveBaselineCreator {

	public static HashMap<String, ArrayList<InductiveResultRow>> readFile(File file)
			throws FileNotFoundException, IOException {

		HashMap<String, ArrayList<InductiveResultRow>> rows =
				new HashMap<String, ArrayList<InductiveResultRow>>();
		BufferedReader in = new BufferedReader(new FileReader(file));

		int nLine = 0;
		String line;
		while ((line = in.readLine()) != null) {
			nLine++;
			InductiveResultRow row = new InductiveResultRow(line, file.getAbsolutePath(), nLine);
			HashUtils.append(rows, row.name, row);
		}
		in.close();

		return rows;
	}

	public static void main(String[] args) throws Exception {
		if (args.length != 1) {
			System.err.println("Usage: program <detection_properties_file>");
			System.exit(1);
		}

		File propsFile = new File(args[0]);
		SmartProperties props = new SmartProperties(propsFile);
		File goldFile = props.getPropertyFile("inference.eval.goldStandard");
		File togetherFile = props.getPropertyFile("inference.eval.baselineTogether");
		File separateFile = props.getPropertyFile("inference.eval.baselineSeparate");
		File randomFile = props.getPropertyFile("inference.eval.baselineRandom");

		HashMap<String, ArrayList<InductiveResultRow>> rowsByFeature = readFile(goldFile);

		// clear irrelevant data from gold standard data
		for (ArrayList<InductiveResultRow> feature : rowsByFeature.values()) {
			for (InductiveResultRow value : feature) {
				value.always = true;
				value.sometimes = false;
				value.unobserved = false;

				value.when = "";
				value.comments = "";

				value.addedCWord = false;
				value.addedFWord = false;
				value.addedOther = false;

				value.markedOn = ' ';
				value.evidenceIds = new int[0];
				value.morphemes = new String[0];
				value.words = new String[0];
				value.affixes = new String[0];
				value.changedWord = false;
			}
		}

		// 1) cluster all values of each feature together
		PrintWriter togetherOut = new PrintWriter(togetherFile);
		for (ArrayList<InductiveResultRow> feature : rowsByFeature.values()) {

			String[] sameAs = new String[feature.size() - 1];
			for (InductiveResultRow value : feature) {

				int j = 0;
				for (InductiveResultRow value2 : feature) {
					if (value.equals(value2) == false) {
						sameAs[j] = value2.value;
						j++;
					}
				}

				value.sameAs = sameAs;
				togetherOut.println(value.toString());
			}
		}
		togetherOut.close();

		// 2) leave all values of each feature separate
		PrintWriter separateOut = new PrintWriter(separateFile);
		for (ArrayList<InductiveResultRow> feature : rowsByFeature.values()) {
			for (InductiveResultRow value : feature) {
				value.sameAs = new String[0];
				separateOut.println(value.toString());
			}
		}
		separateOut.close();

		// 3) randomly choose a cluster to add each feature to (including the
		// option to create a new cluster)

		Random rand = new Random();
		PrintWriter randomOut = new PrintWriter(randomFile);
		for (ArrayList<InductiveResultRow> feature : rowsByFeature.values()) {

			// decide which feature values go in each cluster
			int nClusters = 1;
			int[] clusterAssignments = new int[feature.size()];
			clusterAssignments[0] = 0;
			for (int i = 1; i < feature.size(); i++) {
				clusterAssignments[i] = rand.nextInt(nClusters + 1);
				if (clusterAssignments[i] == nClusters) {
					nClusters++;
				}
			}

			// determine which values ended up in each cluster
			ArrayList<String>[] clusters = new ArrayList[nClusters];
			for (int i = 0; i < clusters.length; i++) {
				clusters[i] = new ArrayList<String>();
			}

			for (int i = 0; i < feature.size(); i++) {
				clusters[clusterAssignments[i]].add(feature.get(i).value);
			}

			// output results
			int i = 0;
			for (InductiveResultRow value : feature) {

				String[] sameAs = new String[clusters[clusterAssignments[i]].size() - 1];
				int j = 0;
				for (String strLabel : clusters[clusterAssignments[i]]) {
					if (strLabel.equals(value.value) == false) {
						sameAs[j] = strLabel;
						j++;
					}
				}

				value.sameAs = sameAs;
				randomOut.println(value.toString());
				i++;
			}
		}
		randomOut.close();

	}
}
