package edu.cmu.cs.lti.letras.tools;

import info.jonclark.util.StringUtils;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.TreeSet;

/**
 * A tool to list/count the features in the implicational universals data.
 */
public class FeatureCounter {
	public static void main(String[] args) throws Exception {
		if (args.length != 1) {
			System.err.println("Usage: program <file>");
			System.exit(1);
		}

		BufferedReader in = new BufferedReader(new FileReader(args[0]));
		TreeSet<String> f = new TreeSet<String>();

		int nLines = 0;
		String line;
		while ((line = in.readLine()) != null) {
			nLines++;

			String[] tokens = StringUtils.split(line, "\t", 11);

			String f1 = tokens[1] + ": " + tokens[3];
			String f2 = tokens[2] + ": " + tokens[4];

			f.add(f1);
			f.add(f2);
		}

		System.out.println("Read " + nLines + " lines.");
		System.out.println("Found " + f.size() + " unique features.");

		// filter unapplicable ones
		for (String s : ((TreeSet<String>) f.clone())) {
			String lower = s.toLowerCase();
			if (lower.contains("consonant"))
				f.remove(s);
			if (lower.contains("hand"))
				f.remove(s);
			if (lower.contains("finger"))
				f.remove(s);
			if (lower.contains("vowel"))
				f.remove(s);
			if (lower.contains("voicing"))
				f.remove(s);
			if (lower.contains("rhythm"))
				f.remove(s);
			if (lower.contains("tone"))
				f.remove(s);
		}

		System.out.println("Found " + f.size() + " unique features after pruning.");

		int i = 0;
		for (String s : f) {
			i++;
			System.out.println("\\item " + s);
		}

		in.close();

	}
}
