package lexicon.analyse;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.StringTokenizer;

import javax.xml.bind.JAXBException;

import lexicon.dbUtils.InflectRecord;
import lexicon.dbUtils.PrefixRecord;
import lexicon.utils.Translate;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import corpus.CreateCorpusXML;

/**
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GPL
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.
 * 
 * This class is the main class of the morphological analyzer <br>
 * For running it follow the listed instructions: <br>
 * arguments to the pc program : pc inputFile outputFile arguments to the UNIX
 * program : No arguments, the input and output can be redirected <br>
 * See analyzer.properties as an example. <br>
 * The input file to be analyzed must be saved as UTF-8 file (you can use
 * notepad for creating it) it also must be a tokenized file <br>
 * run the program by : java -jar analyzer.jar <br>
 * <p>
 * Main Flow: <br>
 * load the inflections For each line from the input file{ <br>
 * verify if the token is a number (create the relevant analysis) <br>
 * verify if the token is a punctuation (create the relevant analysis) <br>
 * get all the relevant analysis (query the inflections table and create the
 * relevant analysys) <br>
 * scan the token and verify if there exist relevant prefix + base combinations
 * <br>
 * (query the inflections table and the prefixes table) <br>
 * <p>
 * 
 * @author dalia bojan
 * @version 1.0
 */
public class XMLMorphAnalyzer extends MorphAnalyzer implements Constants {

	protected boolean analyzePrefixGimatriaAndInvertedCommas(String base,
			String prefix) {
		pos = "quantifier";
		outputPattern = GEMATRIA;
		int gimatriaVal = -1;
		try {
			gimatriaVal = Data.getGimatrias(base);
		} catch (Exception e2) {
			System.out
					.println("XMLAnalyzer:analyzePrefixGimatriaAndInvertedCommas Exception occured while getting Gimatria for base="
							+ base + " and prefix=" + prefix);
			e2.printStackTrace();
		}
		//found the relevant entry in the gimatria table
		if (gimatriaVal != -1) {
			//System.out.println("gimatriaVal="+ gimatriaVal);
			if (prefix.equals("h")) {
				foundBase = true;
				pr = null;
				try {
					TextOutput.buildXMLPrefixOutput(pr, outputPattern,
							outputInflectionRec, createXML, suffixFunctioni,
							baseDefinitnessi, gimatriaVal, base);
				} catch (Exception e1) {
					System.out
							.println("XMLAnalyzer:analyzePrefixGimatriaAndInvertedCommas Exception occured while buildXMLPrefixOutput for base="
									+ base + " and prefix=" + prefix);
					e1.printStackTrace();
				}
			} else {
				if (prefix.endsWith("h")) {
					prefix = prefix.substring(0, prefix.length() - 1);
					baseDefinitnessi = 2;
				}
				int prefixListSize = 0;
				try {
					prefixListSize = Data.getPrefixes(prefix);
				} catch (Exception e1) {
					System.out
							.println("XMLAnalyzer:analyzePrefixGimatriaAndInvertedCommas Exception occured while getting prefixes for base="
									+ base + " and prefix=" + prefix);
					e1.printStackTrace();
				}
				if (prefixListSize > 0)
					for (int j = 0; j < prefixListSize; j++) {
						foundBase = true;
						pr = new PrefixRecord();
						pr = Data.analyzePrefixList(j);
						getPrefixesAttributes();
						try {
							TextOutput.buildXMLPrefixOutput(pr, outputPattern,
									outputInflectionRec, createXML,
									suffixFunctioni, baseDefinitnessi,
									gimatriaVal, base);
						} catch (Exception e) {
							System.out
									.println("XMLAnalyzer:analyzePrefixGimatriaAndInvertedCommas Exception occured while buildXMLPrefixOutput for base="
											+ base + " and prefix=" + prefix);
							e.printStackTrace();
						}
					}
			}
		}
		return foundBase;
	}

	protected void handlePrefix(String hebPrefix) {
		String origPrefix = "";
		int prefixListSize = 0;
		baseDefinitnessi = 1;

		int equallIndex = hebPrefix.indexOf("=");
		hebPrefix = hebWord = hebPrefix.substring(equallIndex + 1);
		String prefix = origPrefix = Translate.Heb2Eng(hebPrefix);
		if (prefix.equals("h")) {
			try {
				outputPattern = PREFIXES;
				TextOutput.buildXMLPrefixOutput(null, outputPattern,
						outputInflectionRec, createXML, suffixFunctioni,
						baseDefinitnessi, -1, hebPrefix);
			} catch (Exception e) {
				System.out
						.println("XMLAnalyzer:handlePrefix Exception occured while buildXMLPrefixOutput for prefix="
								+ prefix);
				e.printStackTrace();
			}
		} else {
			if (prefix.endsWith("h")) {
				prefix = prefix.substring(0, prefix.length() - 1);
				baseDefinitnessi = 0;
			}
			try {
				prefixListSize = Data.getPrefixes(prefix);
			} catch (Exception e1) {
				System.out
						.println("XMLAnalyzer:handlePrefix Exception occured while getting prefixes for prefix="
								+ prefix);
				e1.printStackTrace();
			}
			//return the h which was removed for searching the prefix list
			if (prefixListSize > 0) {
				for (int j = 0; j < prefixListSize; j++) {
					//foundBase = true;
					outputPattern = PREFIXES;
					pr = new PrefixRecord();
					pr = Data.analyzePrefixList(j);
					getPrefixesAttributes();
					try {
						TextOutput.buildXMLPrefixOutput(pr, outputPattern,
								outputInflectionRec, createXML,
								suffixFunctioni, baseDefinitnessi, -1,
								hebPrefix);
					} catch (Exception e) {
						System.out
								.println("XMLAnalyzer:handlePrefix Exception occured while buildXMLPrefixOutput for prefix="
										+ prefix);
						e.printStackTrace();
					}
				}
			}
		}
	}

	protected void analyzeBaseNoPrefix() {
		InflectRecord wordRec = null;
		StringTokenizer st = null;
		String field = "";
		int listSize = 0;
		try {
			listSize = Data.getInflections(word);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		//System.out.println("listSize =" + listSize);
		if (listSize > 0)
			foundWord = true;
		for (int i = 0; i < listSize; i++) {
			try {
				outputInflectionRec = Data.analyzeInflectionList(i, word);
			} catch (Exception e1) {
				System.out
						.println("XMLAnalyzer:handlePrefix Exception occured while analyzeBaseNoPrefix for word="
								+ word);
				e1.printStackTrace();
			}
			baseQuantifierTypei = Data.getBaseQuantifierTypei();
			constructi = Data.getConstructi();
			baseDefinitnessi = Data.baseDefinitnessi;
			basePronounTypei = Data.basePronounTypei;
			suffixFunctioni = Data.getSuffixFunctioni();
			binyani = Data.getBinyani();
			tensei = Data.getTensei();
			outputPattern = Data.getOutputPattern();
			pos = Data.getPos();
			posi = Data.getPosi();
			scripti = Data.getScripti();
			if (validateByRulesWithoutPrefixes()) {
				foundBase = true;
				try {
					TextOutput.buildXMLOutput(Data.getOutputPattern(),
							outputInflectionRec, hebWord, constructi,
							suffixFunctioni, createXML);
				} catch (UnsupportedEncodingException e2) {
					System.out
							.println("XMLAnalyzer:handlePrefix UnsupportedEncodingException occured while buildXMLOutput for word="
									+ word);
					e2.printStackTrace();
				} catch (JAXBException e2) {
					System.out
							.println("XMLAnalyzer:handlePrefix JAXBException occured while buildXMLOutput for word="
									+ word);
					e2.printStackTrace();
				}
			}
		}
	}

	protected boolean analyzeURL() {
		boolean isURL = false;
		try {
			isURL = super.analyzeURL();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		if (isURL) {
			try {
				TextOutput.buildSimpleXMLAnalysis(outputPattern, createXML,
						hebWord, -1);
			} catch (JAXBException e1) {
				System.out
						.println("XMLAnalyzer:analyzeURL Exception occured  for hebWord="
								+ hebWord);
				e1.printStackTrace();
			}
		}
		return isURL;
	}

	protected boolean analyzeForeign() {
		boolean isForeign = false;
		try {
			isForeign = super.analyzeForeign();
		} catch (Exception e) {
			System.out
					.println("XMLAnalyzer:analyzeForeign Exception occured  for hebWord="
							+ hebWord);
			e.printStackTrace();
		}
		if (isForeign) {
			try {
				TextOutput.buildSimpleXMLAnalysis(outputPattern, createXML,
						hebWord, -1);
			} catch (JAXBException e1) {
				System.out
						.println("XMLAnalyzer:analyzeForeign JAXBException occured  for hebWord="
								+ hebWord);
				e1.printStackTrace();
			}
		}
		return isForeign;
	}

	protected boolean analyzeNumbers() {
		boolean isNumber = false;
		try {
			isNumber = super.analyzeNumbers();
		} catch (Exception e) {
			System.out
					.println("XMLAnalyzer:analyzeNumbers Exception occured  for hebWord="
							+ hebWord);
			e.printStackTrace();
		}
		if (isNumber) {
			try {
				TextOutput.buildSimpleXMLAnalysis(outputPattern, createXML,
						hebWord, -1);
			} catch (JAXBException e1) {
				System.out
						.println("XMLAnalyzer:analyzeNumbers JAXBException occured  for hebWord="
								+ hebWord);
				e1.printStackTrace();
			}
		}
		return isNumber;
	}

	protected boolean analyzePunctuations() {

		boolean isPunctuation = false;
		try {
			isPunctuation = super.analyzePunctuations();
		} catch (Exception e) {
			System.out
					.println("XMLAnalyzer:analyzePunctuations Exception occured  for hebWord="
							+ hebWord);
			e.printStackTrace();
		}
		if (isPunctuation) {
			try {
				TextOutput.buildSimpleXMLAnalysis(outputPattern, createXML,
						hebWord, -1);
			} catch (JAXBException e1) {
				System.out
						.println("XMLAnalyzer:analyzePunctuations JAXBException occured  for hebWord="
								+ hebWord);
				e1.printStackTrace();
			}
		}
		return isPunctuation;
	}

	protected boolean noEntryInInflections() {
		boolean noEntry = super.noEntryInInflections();
		//if (noEntry) {
		//	System.out.println("NO ANALYSIS for hebWord=" + hebWord);
		//}
		return noEntry;
	}

	protected void analyzeBase(String base, String prefix) {
		InflectRecord baseRec = null;
		ArrayList baseList = null;
		int baseListSize = 0;
		try {
			baseListSize = Data.getInflections(base);
		} catch (Exception e) {
			System.out
					.println("XMLMorphAnalyzer:analyzeBase - Exception while getting inflections list for base = "
							+ base);
			e.printStackTrace();
		}
		//System.out.println("prefix=" + prefix);
		//System.out.println("base=" + base);
		//System.out.println("baseListSize ="+baseListSize);
		if (baseListSize > 0) {
			for (int i = 0; i < baseListSize; i++) {
				try {
					outputInflectionRec = Data.analyzeInflectionList(i, word);
				} catch (Exception e1) {
					System.out
							.println("XMLMorphAnalyzer:analyzeBase - Exception while analyzeInflectionList for word="
									+ word);
					e1.printStackTrace();
				}
				baseQuantifierTypei = Data.getBaseQuantifierTypei();
				constructi = Data.getConstructi();
				baseDefinitnessi = Data.baseDefinitnessi;
				basePronounTypei = Data.basePronounTypei;
				binyani = Data.getBinyani();
				tensei = Data.getTensei();
				suffixFunctioni = Data.getSuffixFunctioni();
				outputPattern = Data.getOutputPattern();
				pos = Data.getPos();
				posi = Data.getPosi();
				int prefixListSize = 0;
				try {
					prefixListSize = Data.getPrefixes(prefix);
				} catch (Exception e2) {
					System.out
							.println("XMLMorphAnalyzer:analyzeBase - Exception while getting prefixes list for word="
									+ word + "and prefix=" + prefix);
					e2.printStackTrace();
				}
				//System.out.println("prefixListSize =" + prefixListSize);
				for (int j = 0; j < prefixListSize; j++) {
					pr = new PrefixRecord();
					pr = Data.analyzePrefixList(j);
					if (validateByRules()) {
						try {
							foundWord = true;

							//handle words which starts with w - when
							// accompanied with prefix the
							//w must be doubled else it is colloquial
							if (base.startsWith("ww"))
								outputInflectionRec.setScript("formal");
							else if (base.startsWith("w"))
								outputInflectionRec.setScript("coloquiall");
							TextOutput
									.buildXMLPrefixOutput(pr, outputPattern,
											outputInflectionRec, createXML,
											suffixFunctioni, baseDefinitnessi,
											-1, base);
						} catch (Exception e3) {
							System.out
									.println("XMLMorphAnalyzer:analyzeBase - Exception while buildXMLPrefixOutput for word="
											+ word + "and prefix=" + prefix);
							e3.printStackTrace();
						}
					}
				}
			}
		}
	}

	/**
	 * @param document
	 */
	public void treeWalk(Document document) {
		try {
			treeWalk(document.getRootElement());
		} catch (Exception e) {
			System.out
					.println("XMLMorphAnalyzer:treeWalk(document) - Exception");
			e.printStackTrace();
		}
	}

	/**
	 * @param element
	 */
	public void treeWalk(Element element) {
		boolean firstTimeSentence = true;
		boolean firstTimeParagraph = true;

		for (int i = 0, size = element.nodeCount(); i < size; i++) {
			Node node = element.node(i);
			if (node instanceof Element) {
				String name = node.getName();
				if (name.equals("paragraph")) {
					if (firstTimeParagraph) {
						createXML.createParagraph();
						firstTimeParagraph = false;
					} else {
						createXML.finalizeSentence();
						createXML.finalizeParagraph();
						createXML.createParagraph();
					}
				} else if (name.equals("sentence")) {
					if (firstTimeSentence) {
						createXML.createSentence();
						firstTimeSentence = false;
					} else {
						createXML.finalizeSentence();
						createXML.createSentence();
					}
				} else if (name.equals("token")) {
					tokensCount++;
					hebWord = ((Element) element.node(i))
							.attributeValue("surface");
					//System.out.println(hebWord);
					if (hebWord.startsWith("prefix=")) {
						int equallIndex = hebWord.indexOf("=");
						String hebPrefix = hebWord.substring(equallIndex + 1);
						createXML.createToken(hebPrefix);
						handlePrefix(hebPrefix);
					} else { //
						createXML.createToken(hebWord);
						try {
							readInput();
						} catch (IOException e1) {
							System.out
									.println("XMLMorphAnalyzer:treeWalk(element) - IOException while readInput");
							e1.printStackTrace();
						} catch (Exception e1) {
							System.out
									.println("XMLMorphAnalyzer:treeWalk(element) - Exception while readInput");
							e1.printStackTrace();
						}
					} //
					createXML.finalizeToken();
				}
				treeWalk((Element) node);
			}
		}
	}

	/**
	 *  
	 */
	private void ReadXMLFile() {
		SAXReader reader = new SAXReader();
		try {
			Document document = reader.read(new File(inputFile));
			treeWalk(document);
		} catch (DocumentException e) {
			System.out
					.println("XMLMorphAnalyzer:ReadXMLFile - An Errot occured while reading the input xml file");
			System.out.println("inputFile =" + inputFile);
			e.printStackTrace();
		}
	}

	/**
	 *  
	 */
	public void processXMLOutput() {
		createXML = new CreateCorpusXML(outputFile);

		createXML.createXMLdOC();

		createXML.createArticle();

		ReadXMLFile();

		createXML.finalizeSentence();
		createXML.finalizeParagraph();
		createXML.printDoc();
	}

	/**
	 * web interface - tokenization
	 */
	public void processXMLOutput(Document document) {
		Date now = new Date();
		System.out.println("XMLAnalyzer: processXMLOutput Starts At"
				+ DateFormat.getDateTimeInstance(DateFormat.SHORT,
						DateFormat.SHORT).format(now));
		createXML = new CreateCorpusXML(outputFile);
		try {
			createXML.createXMLdOC();
		} catch (Exception e) {
			System.out
					.println("XMLMorphAnalyzer:processXMLOutput while createXMLdOC - Exception");
			e.printStackTrace();
		}
		createXML.createArticle();
		try {
			treeWalk(document);
		} catch (Exception e2) {
			System.out
					.println("XMLMorphAnalyzer:processXMLOutput while ReadXMLFile()- Exception");
			e2.printStackTrace();
		}
		createXML.finalizeSentence();
		createXML.finalizeParagraph();
	}

	//for xsl web view
	public void analyzeStringInput(PrintWriter pw, String inputSt) {
		System.out.println("******************************");
		System.out.println("inputSt=" + inputSt);
		String decodedText = "";
		try {
			decodedText = URLDecoder.decode(inputSt, "UTF-8");
		} catch (UnsupportedEncodingException e2) {
			// TODO Auto-generated catch block
			e2.printStackTrace();
		}
		Data.webFlag = true;
		Data.init();
		createXML = new CreateCorpusXML(outputFile);
		try {
			createXML.createXMLdOC();
		} catch (Exception e) {
			System.out
					.println("XMLMorphAnalyzer:processXMLOutput while createXMLdOC - Exception");
			e.printStackTrace();
		}
		createXML.createArticle();
		createXML.createParagraph();
		createXML.createSentence();
		tokensCount++;
		hebWord = decodedText;
		if (hebWord.startsWith("prefix=")) {
			int equallIndex = hebWord.indexOf("=");
			String hebPrefix = hebWord.substring(equallIndex + 1);
			createXML.createToken(hebPrefix);
		} else
			createXML.createToken(hebWord);
		try {
			readInput();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (Exception e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
		createXML.finalizeToken();
		createXML.finalizeSentence();
		createXML.finalizeParagraph();
		createXML.printDoc(pw, "output.xsl");
	}

	/*
	 * UNIX interface - Files input/output
	 * 
	 * @see lexicon.analyse.MorphAnalyzer#myAnalyzer()
	 */
	public void myAnalyzer() {
		try {
			long startTime = System.currentTimeMillis();
			long afterLoadTime;
			Data.dinflectionsFile = dinflectionsFile;
			Data.dprefixesFile = dprefixesFile;
			Data.gimatriaFile = gimatriasFile;
			Data.webFlag = webFlag;
			Data.init();

			afterLoadTime = printTimesHandling(startTime);

			/////////////////////////////////////////
			processXMLOutput();
			////////////////////////////////////////
			long elapsedTime = System.currentTimeMillis() - afterLoadTime;
			System.out.println("analyze elapsed time = " + elapsedTime + "ms");
			System.out.println("tokens count = " + tokensCount);

			createXML.printDoc();

		} catch (Exception e) {
			System.out
					.println("An error occured make sure you have tokenized the input file, if error still existes send the developer the input file");
			e.printStackTrace();
		} finally {
			System.exit(0);
		}
	}
	
	

	/*
	 * WEB Interface :
	 * http://yeda.cs.technion.ac.il:8088/XMLMorphologicalAnalyzer/XMLOutputAnalyzer.html
	 * tokenization + analyzing
	 * API for Meni's Adler Tagger
	 */
	public void morphologicalAnalyzer(PrintWriter pw, int outputType,
			 Document document, String dinflectionsFile,
			String dprefixesFile, String gimatriasFile) {
		System.out
				.println("XMLAnalyzer: morphologicalAnalyzer Starting...........");
		MorphAnalyzer.outputType = outputType;
		MorphAnalyzer.webFlag = webFlag;
		Data.dinflectionsFile = dinflectionsFile;
		Data.dprefixesFile = dprefixesFile;
		Data.gimatriaFile = gimatriasFile;
		if (dinflectionsFile.equals(""))
			Data.webFlag = true;
		else
			Data.webFlag = false;
		Data.init();
		/////////////////////////////////////////
		processXMLOutput(document);
		createXML.printDoc(pw);
	}

	public void morphologicalAnalyzer(PrintWriter pw, int outputType,
			boolean webFlag, Document document, String xslFile) {
		System.out
				.println("XMLAnalyzer: morphologicalAnalyzer Starting...........");
		MorphAnalyzer.outputType = outputType;
		MorphAnalyzer.webFlag = webFlag;
		Data.dinflectionsFile = dinflectionsFile;
		Data.dprefixesFile = dprefixesFile;
		Data.gimatriaFile = gimatriasFile;
		Data.webFlag = webFlag;
		Data.init();
		/////////////////////////////////////////
		processXMLOutput(document);
		createXML.printDoc(pw, xslFile);
	}

	//	API for users to call from their application
	public void morphologicalAnalyzer(String inputFile, String outputFile,
			String dinflectionsFile, String dprefixesFile, String gimatriasFile) {
		MorphAnalyzer.inputFile = inputFile;
		System.out.println("inputFile= " + inputFile);
		MorphAnalyzer.outputFile = outputFile;
		System.out.println("outputFile= " + outputFile);
		outputType = 1;
		MorphAnalyzer.webFlag = false;
		Data.dinflectionsFile = dinflectionsFile;
		Data.dprefixesFile = dprefixesFile;
		Data.gimatriaFile = gimatriasFile;
		Data.webFlag = webFlag;
		Data.init();
		/////////////////////////////////////////
		processXMLOutput();
		////////////////////////////////////////
		createXML.printDoc();
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see lexicon.analyse.MorphAnalyzer#main(java.lang.String[])
	 */
	public static void main(String[] args) {
		XMLMorphAnalyzer a = new XMLMorphAnalyzer();
		outputType = 2;
		a.handleInputParameters(args);
		a.myAnalyzer();
		//		 StringWriter sw = new StringWriter();
		//         PrintWriter pw = new PrintWriter(sw);
		//         String inputString = "%D7%94%D7%A2%D7%9C%D7%9C%D7%94";
		//         String decodedText ="";
		//         try {
		//			decodedText =URLDecoder.decode(inputString ,"UTF-8");
		//		} catch (UnsupportedEncodingException e) {
		//			// TODO Auto-generated catch block
		//			e.printStackTrace();
		//		}
		//		 a.analyzeStringInput(pw,inputString);
		//	
		//		 String result = sw.toString();
		//         result=result.trim();
		//         System.out.println(result);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see lexicon.analyse.MorphAnalyzer#buildGemetriaOutput(int)
	 */
	protected void buildGemetriaOutput(int value) {
		//createXML.createQuantifierAnalysis("", "", "", "", "gematria", "",
		//		"", hebWord, "unspecified", "", "", "", value);
		createXML.createNumeralAnalysis("", "", "", "", "", "", "", "", "", "",
				"", "", "gematria", String.valueOf(value), "", "");
	}

}
