package edu.cmu.cs.lti.avenue.atavi;

import info.jonclark.util.ArrayUtils;
import info.jonclark.util.FileUtils;
import info.jonclark.util.LatexUtils;
import info.jonclark.util.StringUtils;

import java.io.File;
import java.io.IOException;

import edu.cmu.cs.lti.avenue.corpus.Corpus;
import edu.cmu.cs.lti.avenue.corpus.CorpusException;
import edu.cmu.cs.lti.avenue.corpus.SentencePair;
import edu.cmu.cs.lti.avenue.corpus.Serializer;
import edu.cmu.cs.lti.avenue.navigation.tools.UtfUtils;
import edu.cmu.cs.lti.avenue.projection.ConstituentStructureProjector;
import edu.cmu.cs.lti.avenue.projection.ProjectionAnalyzer;
import edu.cmu.cs.lti.avenue.projection.ProjectionConstraints;
import edu.cmu.cs.lti.avenue.projection.ProjectionFeatures;
import edu.cmu.cs.lti.avenue.projection.ordering.EndpointOrderingModel;
import edu.cmu.cs.lti.avenue.projection.ordering.OrderingModel;
import edu.cmu.cs.lti.avenue.trees.cfg.SyncCfgRule;
import edu.cmu.cs.lti.avenue.trees.smart.SmartTree;
import edu.cmu.cs.lti.avenue.trees.smart.TreeNode;
import edu.cmu.cs.lti.avenue.trees.smart.SmartTree.LabelDisplay;
import edu.cmu.cs.lti.avenue.trees.smart.SmartTree.LabelMode;

public class AtaviWrapper {

	public static final File DEFAULT_OUT_DIR =
			new File("/Users/jon/Documents/workspace/letras/ATAVI/Corpus/Sentences");

	/**
	 * @param pair
	 * @param target
	 *            true == target side / false == source side
	 * @return
	 */
	public static SmartTree generateDummyCStruct(SentencePair pair, boolean target) {

		String label = (target ? SmartTree.TARGET_C_STRUCT_LABEL : SmartTree.SOURCE_C_STRUCT_LABEL);
		SmartTree tree = SmartTree.createRootedTree(label, LabelMode.LABEL_ALL_NODES);
		TreeNode root = tree.getRootNode();
		root.addValue("S");

		String[] tokens = pair.getDisplayTargetTokens();
		for (final String token : tokens) {
			TreeNode child = root.addChild();
			child.addValue("LEX");
			child.addValue(token);
		}

		return tree;
	}

	public static void main(String[] args) throws Exception {

		if (args.length < 3) {
			System.err.println("Usage: program <first_sent> <last_sent> <in_file> [--dummy]");
			System.exit(1);
		}

		String encoding = "UTF-8";

		int first = Integer.parseInt(args[0]);
		int last = Integer.parseInt(args[1]);

		// 1) extract source tree, target tree, alignments, and comments from
		// each SentencePair
		Corpus corpus =
				Serializer.loadSentencePairs(new File(args[2]), encoding);

		boolean dummy = ArrayUtils.unsortedArrayContains(args, "--dummy");

		for (int i = 0; i < corpus.getSentences().size(); i++) {
			SentencePair pair = corpus.getSentences().get(i);
			int id = pair.getId();

			if (id >= first && id < last) {
				project(pair, dummy, true);
			}
		}
	}

	public static void project(SentencePair pair, boolean dummy, boolean addProjectionComments)
			throws CorpusException, IOException {

		int id = pair.getId();

		if (!dummy) {
			OrderingModel orderingModel = new EndpointOrderingModel(true);
			ProjectionConstraints projectionConstraints = new ProjectionConstraints();
			ConstituentStructureProjector projector =
					new ConstituentStructureProjector(orderingModel, projectionConstraints);

			pair.setTargetConstituentStructure(projector.project(pair));
		} else {
			pair.setTargetConstituentStructure(generateDummyCStruct(pair, true));
		}

		StringBuilder comments = new StringBuilder();
		comments.append(getBasicSentenceComments(pair));
		if (addProjectionComments) {
			comments.append(getProjectionFeaturesComments(pair));
			comments.append(getRulesAndLinksComments(pair));
		}

		writeAtaviSentence(DEFAULT_OUT_DIR, id, pair, comments.toString());
	}

	public static String getBasicSentenceComments(SentencePair pair) {
		StringBuilder comments = new StringBuilder();
		comments.append("sentnum: " + pair.getId() + "\n");
		comments.append("ALIGN: " + pair.getNormalizedAlignment().toString() + "\n");
		comments.append(pair.getComment() + "\n");
		return comments.toString();
	}

	public static String getProjectionFeaturesComments(SentencePair pair) {

		StringBuilder comments = new StringBuilder();
		ProjectionFeatures features = ProjectionAnalyzer.getSentenceLevelStatistics(pair);

		// alignment grouping really isn't an interesting feature since
		// we resolve almost all of these now
		comments.append("REORDERINGS: " + features.r + "\n");
		comments.append("RHS EDIT DISTANCE: " + features.ed + "\n");
		// comments.append("AMBIGUITIES BY ALIGNMENT GROUPING: " +
		// features.ag+ "\n");
		comments.append("FAILURES B/C CROSS-CONSTITUENT ALGINMENT: " + features.dd + "\n");
		comments.append("FAILURES B/C TARGET CONSTITUENT INTERRUPTION: " + features.di + "\n");
		comments.append("FAILURES B/C UNALIGNED SOURCE WORD: " + features.dus + "\n");
		comments.append("FAILURES B/C UNALIGNED TARGET WORD: " + features.dut + "\n");
		comments.append("LEFTOVERS: " + features.lo + "\n");

		return comments.toString();
	}

	public static String getRulesAndLinksComments(SentencePair pair) {
		StringBuilder comments = new StringBuilder();

		for (final SyncCfgRule rule : ProjectionAnalyzer.getLearnedRules(pair, true)) {
			comments.append("RULE:  " + rule.toString() + "\n");
		}
		for (final String rule : ProjectionAnalyzer.getFailedRules(pair)) {
			comments.append("FAIL:  " + rule + "\n");
		}
		comments.append("LINKS: ");
		for (final String link : ProjectionAnalyzer.getLearnedLinks(pair)) {
			comments.append(link + ", ");
		}
		comments.append("\n");

		return comments.toString();
	}

	public static String removeLatexKillers(String str) {
		str = StringUtils.replaceFast(str, "$", "");
		str = StringUtils.replaceFast(str, ":", "PUNC");

		// make sure possessives don't kill alignments
		str = StringUtils.replaceFast(str, "'", "");

		str = LatexUtils.replaceLatexKillers(str);
		str = UtfUtils.replaceUnicodeCharsWith(str, 'x');
		return str;
	}

	public static void writeAtaviSentence(File outDir, int i, SentencePair pair, String comments)
			throws IOException {

		// we already check the alignment sanity in SentencePair, but we should
		// make sure the trees reflect the original sentences
		pair.assertConstituentStructureSanity();

		String strSourceTree = pair.getSourceConstituentStructure().toString(LabelDisplay.DASHES);
		String strTargetTree = pair.getTargetConstituentStructure().toString(LabelDisplay.DASHES);
		String strAlignments = pair.getNormalizedAlignment().transpose().toString();
		String strComments = "\\starttyping\n" + comments.toString() + "\\stoptyping\n";

		strSourceTree = removeLatexKillers(strSourceTree);
		strTargetTree = removeLatexKillers(strTargetTree);
		strAlignments = removeLatexKillers(strAlignments);
		// strComments = removeLatexKillers(strComments);

		FileUtils.saveFileFromString(new File(outDir, "sen-" + i + "-chi.txt"), strSourceTree);
		FileUtils.saveFileFromString(new File(outDir, "sen-" + i + "-eng.txt"), strTargetTree);
		FileUtils.saveFileFromString(new File(outDir, "sen-" + i + "-align.txt"), strAlignments);
		FileUtils.saveFileFromString(new File(outDir, "sen-" + i + "-subalign.txt"), strComments);
	}
}
