package edu.cmu.cs.lti.letras.corpus;

import info.jonclark.log.LogUtils;
import info.jonclark.util.StringUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.logging.Logger;

import edu.cmu.cs.lti.letras.featurebitmaps.Feature;
import edu.cmu.cs.lti.letras.featurebitmaps.FeatureFactory;
import edu.cmu.cs.lti.letras.featurebitmaps.FeatureGroup;
import edu.cmu.cs.lti.letras.trees.SmartTree;
import edu.cmu.cs.lti.letras.trees.SmartTree.LabelMode;

public class Serializer {

	private static final Logger log = LogUtils.getLogger();

	public static ArrayList<SentencePair> loadSentencePairs(File elicitationCorpusFile)
			throws IOException, ParseException {
		ArrayList<SentencePair> sentencePairs = new ArrayList<SentencePair>();

		// TODO: Respect the encoding specified at the top of the file
		// TODO: Handle other possible tags besides newpair -- read header info

		BufferedReader in = new BufferedReader(new FileReader(elicitationCorpusFile));
		int nLine = 0;

		// advance to the first pair
		String corpusLine;
		while ((corpusLine = in.readLine()) != null && !(corpusLine.equals("newpair")))
			nLine++;

		// read file to exhaustion
		while (corpusLine != null) {

			nLine++;

			int id = -1;
			String[] eSentence = null;
			String[] fSentence = new String[0];
			String alignments = "";
			String context = "";
			String comment = "";
			SmartTree featureStructure = null;
			SmartTree constituentStructure = null;
			PhiPlusMapping phiPlusMapping = null;
			String fstLine = null;
			String cstLine = null;
			String myLine = elicitationCorpusFile + ":" + nLine;

			while ((corpusLine = in.readLine()) != null && !(corpusLine.equals("newpair"))) {

				nLine++;
				corpusLine = corpusLine.trim();

				try {
					if (corpusLine.equals("")) {
						continue;
					} else if (corpusLine.startsWith("(")) {
						featureStructure = SmartTree.parse(corpusLine.trim(), "f",
								LabelMode.LABEL_ODD_NODES);
					} else {

						String value = StringUtils.substringAfter(corpusLine, ":");
						value = value.trim();

						// System.out.println("reading: " + corpusLine);

						if (corpusLine.startsWith("sentid#")) {
							id = Integer.parseInt(value);
						} else if (corpusLine.startsWith("srcsent")) {
							eSentence = StringUtils.tokenize(value);
						} else if (corpusLine.startsWith("tgtsent")) {
							fSentence = StringUtils.tokenize(value);
						} else if (corpusLine.startsWith("aligned")) {
							alignments = value;
						} else if (corpusLine.startsWith("context")) {
							context = value;
						} else if (corpusLine.startsWith("comment")) {
							comment = value;
						} else if (corpusLine.startsWith("fstruct")) {
							featureStructure = SmartTree.parse(value.trim(), "f",
									LabelMode.LABEL_ODD_NODES);
						} else if (corpusLine.startsWith("cstruct")) {
							constituentStructure = SmartTree.parse(value.trim(), "n",
									LabelMode.LABEL_ALL_NODES);
						} else if (corpusLine.startsWith("phiplus")) {
							phiPlusMapping = PhiPlusMapping.deserialize(value);
						} else if (corpusLine.startsWith("srcline")) {
							fstLine = value;
						} else if (corpusLine.startsWith("cstline")) {
							cstLine = value;
						} else {
							log.warning("Unknown tag in elicitation corpus ("
									+ elicitationCorpusFile.getName() + ":" + nLine + "): "
									+ corpusLine);
						}
					}
				} catch (Throwable e) {
					throw new ParseException("Error while parsing file " + elicitationCorpusFile
							+ ":" + nLine + "\n" + StringUtils.getStackTrace(e), nLine);
				}
			}

			// any of these being null means we have an incomplete entry (or
			// the
			// file ended in the middle of an entry)
			// assert id != -1;
			if(eSentence == null)
				throw new ParseException("Error while parsing file (null eSentence): " + elicitationCorpusFile
						+ ":" + nLine, nLine);

			SentencePair sentencePair = SentencePairFactory.getInstance(id, eSentence, fSentence,
					alignments, context, comment, featureStructure, constituentStructure,
					phiPlusMapping, fstLine, cstLine, myLine);
			sentencePairs.add(sentencePair);

			// System.out.println("Adding: " +
			// StringUtils.untokenize(eSentence));
		} // end if not in multiply

		in.close();

		return sentencePairs;
	}

	public static ArrayList<FeatureGroup<Feature>> loadImplicationalUniversals(File universalsFile,
			double minProb) throws NumberFormatException, IOException {

		ArrayList<FeatureGroup<Feature>> implicationalUniverals = new ArrayList<FeatureGroup<Feature>>();

		BufferedReader in = new BufferedReader(new FileReader(universalsFile));

		String line;
		while ((line = in.readLine()) != null) {

			String[] tokens = StringUtils.split(line, "\t", 11);

			double prob = Double.parseDouble(tokens[0]);
			if (prob > minProb) {
				String strTrigger = tokens[1] + ": " + tokens[3];
				String strImplication = tokens[2] + ": " + tokens[4];

				Feature trigger = FeatureFactory.getInstance(strTrigger);
				Feature implication = FeatureFactory.getInstance(strImplication);

				implicationalUniverals.add(new FeatureGroup<Feature>(trigger, implication));
			}
		}

		return implicationalUniverals;
	}

	/**
	 * Load feature groups into an array list, enclosing each in a feature group
	 * that represents all features that either trigger the test or are fired by
	 * the test.
	 * 
	 * @param sentencePairs
	 * @return
	 * @throws IOException
	 */
	public static ArrayList<FeatureGroup<SentenceTest>> loadSentenceTests(File testsFile,
			HashSet<SentencePair> sentencePairs) throws IOException {
		ArrayList<FeatureGroup<SentenceTest>> sentenceTests = new ArrayList<FeatureGroup<SentenceTest>>();

		BufferedReader in = new BufferedReader(new FileReader(testsFile));

		// TODO: Handle other possible tags besides newtest
		String testLine;
		while ((testLine = in.readLine()) != null && !testLine.equals("newtest"))
			;

		while (testLine != null) {

			// send a group of sentences (1 for now)
			ArrayList<Feature> ifEqual = null;
			ArrayList<Feature> ifNotEqual = null;
			int[] required = null;
			while ((testLine = in.readLine()) != null && !(testLine.equals(""))) {

				String value = StringUtils.substringAfter(testLine, ":");
				value = value.trim();

				// System.err.println("reading: " + testLine);

				if (testLine.startsWith("newtest")) {
					continue;
				} else if (testLine.startsWith("ifequal")) {

					String[] features = StringUtils.tokenize(value, ",");
					ifEqual = new ArrayList<Feature>(features.length);
					for (String feature : features) {
						ifEqual.add(FeatureFactory.getInstance(feature.trim()));
					}

				} else if (testLine.startsWith("ifnoteq")) {

					String[] features = StringUtils.tokenize(value, ",");
					ifNotEqual = new ArrayList<Feature>(features.length);
					for (String feature : features) {
						ifNotEqual.add(FeatureFactory.getInstance(feature.trim()));
					}

				} else if (testLine.startsWith("require")) {
					required = StringUtils.toIntArray(StringUtils.tokenize(value));
				} else {
					log.warning("Unknown tag in elicitation corpus: " + testLine);
				}
			}

			// any of these being null means we have an incomplete entry (or the
			// file ended in the middle of an entry)
			assert ifEqual != null;
			assert ifNotEqual != null;
			assert required != null;

			SentenceTest test = new SentenceTest(required, ifEqual, ifNotEqual);

			ArrayList<Feature> allFeatures = new ArrayList<Feature>(ifEqual.size()
					+ ifNotEqual.size());
			allFeatures.addAll(ifEqual);
			allFeatures.addAll(ifNotEqual);

			FeatureGroup<SentenceTest> node = new FeatureGroup<SentenceTest>(allFeatures, test);
			sentenceTests.add(node);
		} // end while corpusLine != null

		in.close();

		return sentenceTests;
	}

}
