import java.io.File;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
import java.util.regex.*;

import edu.cmu.hcii.problemreports.*;

public class Analyze {

	public static final void main(String[] args) {
		
		if(args.length < 1) {
			System.err.println("You must supply at least one csv file to analyze.");
			System.exit(0);
		}
		
		Table table = new Table(new File(args[0]));

		for(int i = 1; i < args.length; i++) {
			
			table.addRows(new File(args[i]));
			
		}
		
		System.err.println("");
		System.err.println("Read " + args[0]);
		System.err.println("" + table.getColumnCount() + " columns:");
		System.err.println("" + table.getCommaSeparatedColumns());
		System.err.println("" + table.getRowCount() + " rows");
		System.err.println("");

		readEvalPrint(table);
		
	}
	
	public static void readEvalPrint(Table table) {
		
		String input = "";
		try {

			System.err.print("\n\n\n> ");

			char c = ' ';
			while(true) {
				c = (char)System.in.read();
				if(c == '\n') {
					
					System.err.println("");
					
					input = input.trim();
					String[] args = input.split("\\s");
					
					if(args[0].equalsIgnoreCase("q")) {

						System.exit(0);
					
					}
					else if(args[0].startsWith("s")) {

						if(args.length >= 3) {
							int seed = Integer.parseInt(args[1]);
							int n = Integer.parseInt(args[2]);
							Vector<FieldExpression> expressions = extractFieldExpressions(args);
							sample(System.out, table.filter(expressions), seed, n);
						} else {
							System.err.println("s seed count");
						}
						
					}
					else if(args[0].equals("w")) {

						// Default args
						String sort = "count";
						boolean expand = false;
						Vector<FieldExpression> expressions = new Vector<FieldExpression>();
						
						for(int i = 1; i < args.length; i++) {

							String arg = args[i];
							if(arg.equals("+")) expand = true;
							else if(arg.equals("-")) expand = false;
							else if(arg.matches("count|tag|word")) sort = arg;
							else if(arg.matches("[a-zA-Z]+=\".*\"")) {
								String[] nameValue = arg.split("=");
								expressions.add(new FieldExpression(nameValue[0], nameValue[1].substring(1, nameValue[1].length() - 1)));
								System.err.println("Parsing " + arg);
							}
							else System.err.println("Don't understand " + arg);
							
						}
						
						Table filtered = table.filter(expressions);
						wordQuery(filtered, expressions, sort, expand);
						
					}
					else if(args[0].equals("f")) {
						
						if(args.length >= 2) {

							frequency(System.out, table.filter(extractFieldExpressions(args)), args[1]);							
							
						}
						else System.err.println("f only takes one argument, the field name you want.");
					}
					else if(args[0].equals("c")) {

						System.err.println(table.getCommaSeparatedColumns());
					
					}
					else if(args[0].equals("r")) {
						
						String regex = args[1].substring(1, args[1].length() - 1);
						Pattern pattern = Pattern.compile(regex); 
						
						for(Row row : table) {
							
							Matcher matcher = pattern.matcher(row.getSentence().toTagSequence());
							if(matcher.matches()) {
								String group = matcher.group(1);
								System.err.println("Group is " + group);
								System.out.println(row.getSentence().toUntaggedString());
							}
							
						}
						
					}
					else if(args[0].equals("g")) {
						
						Vector<FieldExpression> expressions = extractFieldExpressions(args);
						Table matches = table.filter(expressions);
						for(Row row : matches) {
							
							System.out.println(row.getSentence().toUntaggedString());
							
						}
						
					}
					else if(args[0].equals("t")) {
						
						for(String[] tag : Tags.tags) {
							System.out.println("" + tag[0] + "\t" + tag[1]);
						}
						
					}
					else if(input.equals("") || input.matches("h|\\?|help")){

						System.err.println("q                                          \t\tQuit this analysis program");
						System.err.println("c                                          \t\tShow the column names");
						System.err.println("w [field=\"regex\"]* [count|tag|word] [-|+]\t\tQuery words");
						System.err.println("s seed count [field=\"regex\"]*            \t\tSample [count] number of descriptions");
						System.err.println("f field [field=\"regex\"]*                 \t\tShow the frequency of values in [field]");
						System.err.println("r [field=\"regex\"]*                       \t\tQuery rows");
						System.err.println("g [field=\"regex\"]*                       \t\tGet sentences whose rows match...");
						System.err.println("t 					                     \t\tPrint the part of speech tags used...");
						
					}
					
					input = "";
					System.err.print("\n> ");
				}
				else input = input + c;
			}
			
		} catch(java.io.IOException e) { System.err.println("Error reading input"); }
		
	}

	private static Vector<FieldExpression> extractFieldExpressions(String[] args) {

		Vector<FieldExpression> expressions = new Vector<FieldExpression>();
		for(String arg : args) {
			if(arg.matches("[a-zA-Z]+=\".*\"")) {
				String[] nameValue = arg.split("=");
				expressions.add(new FieldExpression(nameValue[0], nameValue[1].substring(1, nameValue[1].length() - 1)));
			}
		}
		return expressions;

	}
	
	public static void sample(PrintStream stream, Table table, int seed, int n) {
		
		Hashtable<String,Integer> projectTable = frequency(stream, table, "project");
		Vector<String> projects = new Vector<String>();
		for(String project : projectTable.keySet()) projects.add(project);
		
		stream.println("Sampling " + n + " sentences from " + table.getRowCount() + " rows equally from...");

		HashSet<Integer> rowsSampled = new HashSet<Integer>();
		
		if(n > table.getRowCount()) {
			
			stream.println("Can't sample more rows than there are rows in the table!");
			
		}
		
		Random generator = new Random(seed);
		int project = 0;
		for(; n > 0; n--) {
			
			int randomRow = generator.nextInt(table.getRowCount());
			// Keep generating while we've already chosen the row or its not the right project.
			while(rowsSampled.contains(randomRow) || !table.getRow(randomRow).getValueInColumnNamed("project").equals(projects.get(project)))
				randomRow = generator.nextInt(table.getRowCount());
			rowsSampled.add(randomRow);
			stream.println(table.getRow(randomRow).getSentence().toUntaggedString());

			// Pick the next project
			project = (project + 1) % projects.size();

		}

	}

	private static void wordQuery(Table table, Vector<FieldExpression> expressions, String sortBy, boolean expand) {

		System.out.println("Querying...");		
		System.out.println("\t- " + (expand ? "expanding" : "collapsing") + " sentence instances");
		System.out.println("\t- sorting by " + sortBy);
		for(FieldExpression expression : expressions)
			System.out.println("\t- " + expression.fieldName + " must match " + expression.pattern);
		System.out.println("");

		Analyzer printer = new Analyzer() {
			public void analyze(Sentence s, Word w) {
				System.err.println(w);
			}
		};

		WordQuery nouns = new WordQuery(expressions);
		int i = 0;
		System.out.println("Analyzing " + table.getRowCount() + " sentences...");
		System.out.println("0%..............................................................................................100%");
		for(Row row : table) {

			i++;
			
			row.getSentence().analyze(nouns);
			if(table.getRowCount() >= 100 && i % (table.getRowCount()/100) == 0)
				System.err.print(">");

		}

		System.out.println("");
		
		nouns.getResult(sortBy, expand).print();

	}
	
	private static Hashtable<String,Integer> frequency(PrintStream stream, Table table, String field) {
		
		Hashtable<String,Integer> freq = new Hashtable<String,Integer>(table.getRowCount());
		for(Row row : table) {

			String value = row.getValueInColumnNamed(field);
			
			if(value != null) {
				if(freq.containsKey(value))
					freq.put(value, freq.get(value) + 1);
				else freq.put(value, 1);
			}
			
		}

		stream.println("");
		stream.println("" + table.getRowCount() + " rows");
		stream.println("" + freq.size() + " unique values for field " + field);
		for(String key : freq.keySet()) {

			stream.println("" + freq.get(key) + "\t\t" + key);
			
		}
		
		return freq;
		
	}

}