package edu.cmu.cs.lti.avenue.navigation.tools;

import info.jonclark.util.StringUtils;

import java.io.File;

import edu.cmu.cs.lti.avenue.corpus.Corpus;
import edu.cmu.cs.lti.avenue.corpus.SentencePair;
import edu.cmu.cs.lti.avenue.corpus.Serializer;

public class AlternatePruner {
	public static void main(String[] args) throws Exception {
		if (args.length != 2) {
			System.err.println("Usage: program <in_file> <out_file>");
			System.exit(1);
		}

		String encoding = "UTF-8";

		File inFile = new File(args[0]);
		File outFile = new File(args[1]);

		int nReplaced = 0;
		Corpus corpus = Serializer.loadSentencePairs(inFile, encoding);
		for (int i = 0; i < corpus.getSentences().size(); i++) {
			if (corpus.getSentences().get(i).getAlternates().size() > 0) {
				if (corpus.getSentences().get(i).getAlternates().size() > 1) {
					System.err.println("WARNING: More than one alternate found: "
							+ corpus.getSentences().get(i));
					System.exit(1);
				} else {
					SentencePair orig = corpus.getSentences().get(i);
					SentencePair alt = corpus.getSentences().get(i).getAlternates().get(0);

					String origComment = orig.getComment();
					String altComment = alt.getComment();
					origComment = UtfUtils.removeUnicodeChars(origComment);
					altComment = UtfUtils.removeUnicodeChars(altComment);

					System.err.println("Replacing original: " + origComment + "\t with: "
							+ altComment);

					alt.setAlternate(false);
					alt.setComment(altComment + " :: " + orig.getDisplayTargetSentence() + " ==> "
							+ alt.getDisplayTargetSentence());

					corpus.getSentences().set(i, alt);
					nReplaced++;
				}
			}
		}
		Serializer.saveSentencePairs(corpus, outFile);
		System.out.println(nReplaced + " alternates replaced.");
	}
}
