package edu.cmu.cs.lti.avenue.navigation.tools;

import info.jonclark.util.ArrayUtils;
import info.jonclark.util.FileUtils;
import info.jonclark.util.StringUtils;

import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;

public class PrepForCharniak {
	public static void main(String[] args) throws Exception {

		if (args.length < 1) {
			System.err.println("Usage: program <in_file> [--ec-tgtsent] [--ec-srcsent] [--seg-format]");
			System.err.println("Default format: one sentence per line.");
		}

		boolean ecTgtSent = ArrayUtils.unsortedArrayContains(args, "--ec-tgtsent");
		boolean ecSrcSent = ArrayUtils.unsortedArrayContains(args, "--ec-srcsent");
		boolean segFormat = ArrayUtils.unsortedArrayContains(args, "--seg-format");

		ArrayList<String> sentences;
		if (segFormat) {
			sentences =
					StringUtils.allSubstringsBetween(FileUtils.getFileAsString(new File(args[0]),
							Charset.forName("UTF-8")), "<seg ", "</seg>", false);
		} else if (ecTgtSent) {
			sentences =
					StringUtils.allSubstringsBetween(FileUtils.getFileAsString(new File(args[0]),
							Charset.forName("UTF-8")), "tgtsent: ", "\n", false);
		} else if (ecSrcSent) {
			sentences =
					StringUtils.allSubstringsBetween(FileUtils.getFileAsString(new File(args[0]),
							Charset.forName("UTF-8")), "srcsent: ", "\n", false);
		} else {
			sentences =
					ArrayUtils.toArrayList(StringUtils.tokenize(FileUtils.getFileAsString(new File(
							args[0])), "\n"));
		}

		for (String sentence : sentences) {
			sentence = StringUtils.substringAfter(sentence, ">");
			sentence = StringUtils.replaceFast(sentence, "“", "\" ");
			sentence = StringUtils.replaceFast(sentence, "\"\"", "\"");
			sentence = StringUtils.replaceFast(sentence, "’", " '");
			System.out.println("<s> " + sentence + " </s>");
		}
	}
}
