#include "parse.hpp"
#include <ctype.h>
#include <sstream>

ChartParser::ChartParser(vector<PRule*> &myrules) {
  rules = &myrules;
}

ChartParser::~ChartParser() {
    
}

void ChartParser::init() {
  computeNonterms();
  computeFirst();
}

void ChartParser::computeNonterms() {
    int i;

    for (i = 0; i < (*rules).size(); i++) {
      if (nonterms.find((*rules)[i]->lhs) == nonterms.end()) {
	//cout << "Adding to nonterms: " << rules[i]->lhs << endl;
	nonterms.insert((*rules)[i]->lhs);
      }
    }
}


void ChartParser::computeFirst() {
  //typedef set<string> stringset;
  //map<string, stringset*> curfirst;
  map<string, int> setcount;
  set<string> *tmpset, *bset;
  set<string>::iterator setItr, tmpItr, bItr;
  int i;
  bool updated;

  //cout << "Computing First" << endl;
  // (1) For every A, Cur-First[A] = { A }
  for (setItr = nonterms.begin(); setItr != nonterms.end(); ++setItr) {
      tmpset = new set<string>;
      curfirst[*setItr] = tmpset;
      tmpset->insert(*setItr);
      //cout << "Tmpset size " << tmpset->size() << endl;
  }

  // (2) For every grammar rule A --> B ...,
  //     Cur-First[A] = Union( Cur-First[A], { B })
  for (i = 0; i < (*rules).size(); i++) {
    tmpset = curfirst[(*rules)[i]->lhs];
    //cout << "Nonterm " << (*rules)[i]->lhs << " Tmpset size " << tmpset->size() << endl;
    if (tmpset->find((*rules)[i]->rhs[0]) == tmpset->end()) {
      if (nonterms.find((*rules)[i]->rhs[0]) != nonterms.end()) {
	tmpset->insert((*rules)[i]->rhs[0]);
      }
    }
    }

    // (3) Updated = T
    //     While Updated do
    //       Updated = F
    //       For every A do
    //         Old-First[A] = Cur-First[A]
    //         For each B in Old-First[A] do
    //           Cur-First[A] = Union( Cur-First[A], Cur-First[B])
    //         If Cur-First[A] != Old-First[A]
    //           then Updated = T
    updated = true;
    while (updated) {
	updated = false;
	setcount.clear();
	//cout << "Looking at each NT" << endl;
	for (setItr = nonterms.begin(); setItr != nonterms.end(); ++setItr) {
	    tmpset = curfirst[*setItr];
	    setcount[*setItr] = tmpset->size();
	    for (tmpItr = tmpset->begin(); tmpItr != tmpset->end(); ++tmpItr) {
	      //cout << "Looking at A curfirst for " << *tmpItr << endl;
		bset = curfirst[*tmpItr];
		for (bItr = bset->begin(); bItr != bset->end(); ++bItr) {
		  //cout << "Looking at B curfirst for " << *bItr << endl;
		    if (tmpset->find(*bItr) == tmpset->end()) {
			tmpset->insert(*bItr);
		    }
		}
	    }
	    if (setcount[*setItr] != tmpset->size()) {
		updated = true;
	    }
	}
    }

    // (4) For every A do First[A] = Cur-First


     //Display results
    if (debug) {
      for (setItr = nonterms.begin(); setItr != nonterms.end(); ++setItr) {
	cout << "Non-term " << *setItr << endl;
	tmpset = curfirst[*setItr];
	for (tmpItr = tmpset->begin(); tmpItr != tmpset->end(); ++tmpItr) {
	  cout << *tmpItr << endl;
	}
      }
    }
    
}


string ChartParser::parse(char* srcSent) {
    int sentence_length = 0;
    int wordindex = 0;
    int parseFound = 0;
    //int existingConstituent = -1;
    //int constIndex;
    int rhssize, rhsindex, arcindex;
    int i, j, k, m; 
    //vector<string> sentwords;
    vector<int> parses;
    stringstream fsstream;
    string results;

    //char seps[]   = " ,;\t\n";
    //char *token;
    FSRegisters fsRegs;
    vector<Constituent*> constituents;
    Constituent* tmpconst;
    vector<Arc*> arcs;
    Arc* tmparc;
    bool fsduplicate = false;
    bool addarc;
    set<string> *firstset;
    int key;
    char* inputSent = new char[strlen(srcSent)+1];
    strcpy(inputSent, srcSent);


    debug = true;
    // Change to all upper-case
    
    for (i = 0; i < strlen(inputSent); i++) {
      inputSent[i] = toupper(inputSent[i]);
    }

    // Tokenize and get word count
    /*
    if (debug) cout << "Tokenize start " << "\"" << inputSent << "\"" << "\n";
    token = strtok( inputSent, seps );
    sentence_length = 1;
    sentwords.push_back((string)token);

    while( token != NULL ) {
      token = strtok( NULL, seps );
      if (token != NULL) {
	sentence_length++;
	sentwords.push_back((string)token);
      }
    }
    */

    sentence_length = strlen(inputSent);

    // Main Loop, go till all letters are visited
    key = 0;
    while (wordindex < sentence_length) {
      if (debug) cout << "\nword index " << wordindex << " word " << inputSent[wordindex] << "\n";
      
      // If agenda is empty, increment counter and add letter/glyph at wordindex to agenda

      if (key == constituents.size() && wordindex < sentence_length) {
	// Add letter as its own constituent
	if (debug) cout << "Adding info for " << inputSent[wordindex] << " at " << wordindex << "\n";
	tmpconst = new Constituent;
	tmpconst->type = inputSent[wordindex];
	tmpconst->nt = false;
	tmpconst->start = wordindex;
	// Skip spaces
	while (wordindex+1 < sentence_length && inputSent[wordindex+1] == ' ') {
	  wordindex++;
	}
	tmpconst->end = wordindex+1;
	tmpconst->status = tmpconst->AGENDA;
	constituents.push_back(tmpconst);
      } else {
	break;
      }
      

      // Pop keys from agenda until none are left
      while (key < constituents.size()) { // && !parseFound) {
	// Pick a key constituent from the agenda
	if (debug) cout << "\nNow selecting a key " << key << " type " << constituents[key]->type << " start " << constituents[key]->start
			<< " end " << constituents[key]->end << "\n";

	constituents[key]->status = constituents[key]->KEY;

	// Add rules starting with key to list of active arcs
	//if (debug) cout << "Number of rules is " << rules->size() << endl;
	for (i = 0; i < rules->size(); i++) {
	  //if (debug) cout << "Looking at rule " << i << " with rhs[0] of " << (*rules)[i]->rhs[0] << " " << (*rules)[i]->nt[0] << " " << constituents[key]->nt << endl;
	  if ((*rules)[i]->rhs[0] == constituents[key]->type &&
	      (*rules)[i]->nt[0] == constituents[key]->nt) {

	    // Add arc only if LHS is predicted
	    
	    addarc = false;
	    if (constituents[key]->start == 0) { 
	      firstset = curfirst[string("START")];
	      if (firstset->find((*rules)[i]->lhs) != firstset->end()) {
		addarc = true;
	      }
	    } else if (constituents[key]->nt == false) {
	      addarc = true;
	    } else {
	      for (m = 0; m < arcs.size(); m++) {
		if (arcs[m]->end == constituents[key]->start &&
		    constituents[key]->nt == true &&
		    (*rules)[arcs[m]->rule]->nt[arcs[m]->dot_position] == true &&
		    arcs[m]->status == arcs[m]->ACTIVE) {
		  if (debug) cout << "Getting firstset for rule " << arcs[m]->rule << " and type " << (*rules)[arcs[m]->rule]->rhs[arcs[m]->dot_position] << endl;
		  firstset = curfirst[(*rules)[arcs[m]->rule]->rhs[arcs[m]->dot_position]];
		  if (firstset->find((*rules)[i]->lhs) != firstset->end()) {
		    addarc = true;
		    break;
		  }
		}
	      }
	    }

	    // Check if arc already in arcs
	    
	    for (arcindex = 0; arcindex < arcs.size(); arcindex++) {
		if (arcs[arcindex]->rule == i &&
		    arcs[arcindex]->start == constituents[key]->start &&
		    arcs[arcindex]->end == constituents[key]->start &&
		    arcs[arcindex]->dot_position == 0 &&
		    arcs[arcindex]->status == arcs[arcindex]->ACTIVE) {
		    addarc = false;
		    if (debug) cout << "Rejecting new arc " << arcs.size() << " from rule " << i << " with rhs[0] of " << (*rules)[i]->rhs[0] << " " << (*rules)[i]->nt[0] << " " << constituents[key]->nt << endl;
		}
	    }
	    
	    if (addarc == false) continue;

            
	    if (debug) cout << "Adding arc " << arcs.size() << " from rule " << i << " with rhs[0] of " << (*rules)[i]->rhs[0] << " " << (*rules)[i]->nt[0] << " " << constituents[key]->nt << endl;
	    tmparc = new Arc;
	    tmparc->rule = i;
	    tmparc->start = constituents[key]->start;
	    tmparc->end = constituents[key]->start;
	    tmparc->dot_position = 0;
	    tmparc->status = tmparc->ACTIVE; 
	    arcs.push_back(tmparc);
	  }
	}

	// For each active arc, look for ones to extend with key
	// Add completed active arcs into agenda
	for (j = 0; j < arcs.size(); j++) {
	  //if (debug) cout << "Looking at arc j " << j << " of " << arcs.size() << "\n";

	  if (arcs[j]->status == arcs[j]->ACTIVE &&
	      (*rules)[arcs[j]->rule]->rhs[arcs[j]->dot_position] == constituents[key]->type &&
	      (*rules)[arcs[j]->rule]->nt[arcs[j]->dot_position] == constituents[key]->nt &&
	      arcs[j]->end == constituents[key]->start ) {
	    
	    if (debug) cout << "Checking if arc " << j << " completed: arcs dot+1 " << arcs[j]->dot_position+1 << " rule "
			      << arcs[j]->rule << " rhs_total " << (*rules)[arcs[j]->rule]->rhs_total << endl;
	    
	    if (arcs[j]->dot_position+1 == (*rules)[arcs[j]->rule]->rhs_total) {

	      // Do feature unification ... add LHS if succeeds
	      rhssize = (*rules)[arcs[j]->rule]->rhs_total;
	      if (debug) cout << "rhs size is " << rhssize << endl;
	      fsRegs.resize(rhssize+1);
	      fsRegs[0].clear();
	      rhsindex = 0;
	      for (rhsindex = 0; rhsindex < rhssize-1; rhsindex++) {
		fsRegs[rhsindex+1].clear();
		if (constituents[arcs[j]->indices[rhsindex]]->nt == true) {
		  if (debug) cout << "Adding in fs for rhsindex " << rhsindex << " and constituent #" << arcs[j]->indices[rhsindex] << " " <<
			     constituents[arcs[j]->indices[rhsindex]]->type << endl << constituents[arcs[j]->indices[rhsindex]]->fss[0] 
			     << endl;
		  fsRegs[rhsindex+1] = constituents[arcs[j]->indices[rhsindex]]->fss[0];
		}
	      }
	      fsRegs[rhsindex+1].clear();
	      if (debug) cout << "Setting key fs register " << endl;
	      if (constituents[key]->nt == true) {
  	        if (debug) cout << "Adding in fs for rhsindex " << rhsindex << " and key constituent #" << key << " " <<
			   constituents[key]->type << endl << constituents[key]->fss[0] << endl;
		fsRegs[rhsindex+1] = constituents[key]->fss[0];
	      }

	      if (debug) {
		cout << "Running the rule " << arcs[j]->rule << endl;
		cout << "(<" << (*rules)[arcs[j]->rule]->lhs << "> <-- (";
		for (rhsindex = 0; rhsindex < rhssize; rhsindex++) {
		  if ((*rules)[arcs[j]->rule]->nt[rhsindex]) {
		    cout << "<" << (*rules)[arcs[j]->rule]->rhs[rhsindex] << ">";
		  } else {
		    cout << (*rules)[arcs[j]->rule]->rhs[rhsindex];
		  }
		  if (rhsindex+1 < rhssize) {
		    cout << " ";
		  } else {
		    cout << ")";
		  }
		}
		cout << endl << (*rules)[arcs[j]->rule]->eb << endl << ")" << endl;
	      }
	      if (!(*rules)[arcs[j]->rule]->eb.run(fsRegs)) {
		if (debug) cout << "Unification failed" << endl;
		continue;
	      } else {
		if (debug) cout << "Unification succeeded" << endl << fsRegs[0] << endl;
	      }


	      // Add LHS of completed active arc into agenda
	      if (debug) cout << "Completing an arc " << j << "\n";
	      tmparc = new Arc;
	      tmparc->dot_position = arcs[j]->dot_position+1;
	      tmparc->start = arcs[j]->start;
	      tmparc->end = constituents[key]->end;
	      tmparc->status = arcs[j]->COMPLETED;
	      tmparc->rule = arcs[j]->rule;
	      for (k = 0; k < arcs[j]->indices.size(); k++) {
		tmparc->indices.push_back(arcs[j]->indices[k]);
	      }
	      if (debug) cout << "Adding arc " << arcs.size() << " of type " << (*rules)[arcs[j]->rule]->lhs << "\n";
	      tmparc->indices.push_back(key);
	      arcs.push_back(tmparc);

	      //cout << "Constructing new constituent\n";
	      // Check first if matching constituent already exists
	      // If so, just add pointer to new arc, otherwise create new constituent
	      /*
	      for (constIndex = 0; constIndex < constituents.size(); constIndex++) {
		if (constituents[constIndex]->start == arcs[j]->start &&
		    constituents[constIndex]->end == constituents[key]->end &&
		    constituents[constIndex]->type == (*rules)[arcs[j]->rule]->lhs) {
		  break;
		}
	      }


	      if (constIndex < constituents.size()) {
		constituents[constIndex]->arcs.push_back(arcs.size()-1);
		} else { */
	      if (debug) cout << "Adding constituent of type " << (*rules)[arcs[j]->rule]->lhs << " index " << constituents.size()<< "\n";
		tmpconst = new Constituent;
		tmpconst->type = (*rules)[arcs[j]->rule]->lhs;
		tmpconst->nt = true;
		tmpconst->start = arcs[j]->start;
		tmpconst->end = constituents[key]->end;
		tmpconst->arcs.push_back(arcs.size()-1);
		tmpconst->fss.push_back(fsRegs[0]);
		constituents.push_back(tmpconst);
		//}
	      // Check if constituent already in agenda
	      
	      // Add lhs of newly completed rule onto agenda
	      //cout << "Adding in new constituent\n";
	      //cout << "Added in new constituent\n";
	    } else {
	      // Add previously unseen updated arcs back onto active arcs
	      if (debug) cout << "Add previously unseen update from arc " << j << " and rule " << arcs[j]->rule << " onto active arcs\n";
	      tmparc = new Arc;
	      tmparc->rule = arcs[j]->rule;
	      tmparc->status = arcs[j]->ACTIVE;
	      tmparc->start = arcs[j]->start;
	      tmparc->end = constituents[key]->end;
	      tmparc->dot_position = arcs[j]->dot_position+1;
	      tmparc->indices = arcs[j]->indices;
	      tmparc->indices.push_back(key);
	      
	      // Add arc
	      arcs.push_back(tmparc);
	    }
	  }
	}
	
	if (debug) cout << "Setting status\n";
	constituents[key]->status = constituents[key]->CHART;
	
	if (debug) cout << "KEY " << constituents[key]->type << " " << constituents[key]->start << " " << constituents[key]->end << " " 
			<< sentence_length << "\n";

	// Check to see if a complete parse was found
	if (constituents[key]->type == "START" &&
	    constituents[key]->start == 0 &&
	    constituents[key]->end == sentence_length) {
	  if (debug) cout << "***** Parse found! *****\n";
	  parses.push_back(key);
	  parseFound = 1;
	}

	key++;
      }

      wordindex++;
    }

    // If parse found 
    if (parseFound == 1) {
      // Walk the parse?
      /*
      deque<int> arcwalker;
      int tmpindex, ruleindex;
      string srcnt, srcword;

      if (debug) cout << "\nLook at parses\n";
      for (i = 0; i < parses.size(); i++) {

	// Bread-first traversal
	  	cout << "\nBreadth-first traversal " << i << "\n";
	arcwalker.push_back(parses[i]);
	while (!arcwalker.empty()) {
	  tmpindex = arcwalker[0];
	  arcwalker.pop_front();
	  if (constituents[tmpindex]->arcs.size() == 0) {
	    cout << "Leaf " << constituents[tmpindex]->type << "\n";
	    continue;
	  }
	  tmparc = arcs[constituents[tmpindex]->arcs[0]];
	  cout << "popping " << constituents[tmpindex]->type << "\n";

	  for (j = 0; j < tmparc->indices.size(); j++) {
	    arcwalker.push_back(tmparc->indices[j]);
	  }
	  }

	// Depth-first traversal
	if (debug) cout << "\nDepth-first traversal of Source Syntax Tree " << i << "\n";
	deque<string> treespacing;
	treespacing.push_front("");
	string tmpspace;

	arcwalker.push_front(parses[i]);
	while (!arcwalker.empty()) {
	  tmpindex = arcwalker[0];
	  arcwalker.pop_front();
	  if (constituents[tmpindex]->arcs.size() == 0) {
	    cout << treespacing[0] << constituents[tmpindex]->type << "\n";
	    treespacing.pop_front();
	    continue;
	  }
	  tmparc = arcs[constituents[tmpindex]->arcs[0]];
	  tmpspace = treespacing[0] + "   ";
	  cout << treespacing[0] << constituents[tmpindex]->type << "\n";
	  treespacing.pop_front();

	  for (j = tmparc->indices.size()-1; j >= 0; j--) {
	    arcwalker.push_front(tmparc->indices[j]);
	    treespacing.push_front(tmpspace);
	  }
	}

	debug = false;
	// Perform Transfer
	cout << "\nTransfer " << i << "\n";

	string targetsent;

	arcwalker.push_front(parses[i]);
	while (!arcwalker.empty()) {
	  tmpindex = arcwalker[0];
	  arcwalker.pop_front();

	  // Leaf nodes

	  if (constituents[tmpindex]->arcs.size() == 0) {
	      // Do lexical transfer
	      if (constituents[tmpindex]->status == constituents[tmpindex]->INSERT) {
		targetsent.append(constituents[tmpindex]->type + " ");
		continue;
	      }

	      srcnt = constituents[tmpindex]->type;
	      srcword = sentwords[constituents[tmpindex]->start];
	       if (debug) cout << "Doing lexical transfer from " << srcnt << " to " << srcword << " Status " 
			       << constituents[tmpindex]->status << "\n";


	      for (ruleindex = 0; ruleindex < rules->size(); ruleindex++) {
		  if (debug) cout << "LHS " << (*rules)[ruleindex]->lhs << " WORD " << (*rules)[ruleindex]->rhs[0] << "\n";
		if ((*rules)[ruleindex]->lhs == srcnt &&
		    (*rules)[ruleindex]->rhs[0] == srcword) {
		  //targetsent.append((*rules)[ruleindex]->tgt_rhs[0] + " "); // << "tgt word: ";
		  break;
		}
	      }
	      continue;
	  }

	  // Non-terminals
	  tmparc = arcs[constituents[tmpindex]->arcs[0]];
	  // Search for appropriate matching transfer rule
	  
	  //cout << "Transfer " << constituents[tmpindex]->type << "\n";

	  //cout << "PRule # " << tmparc->rule << " of " << rules->size() << "\n";
	  
	  for (j = (*rules)[tmparc->rule]->tgt_rhs_total - 1; j>=0; j--) {
	    if ((*rules)[tmparc->rule]->yxalign[j] != -1) {
	      if (debug) cout << "Arc transfer\n";
	      arcwalker.push_front(tmparc->indices[(*rules)[tmparc->rule]->yxalign[j]]);
	    } else {
	      // Insert new word
	      if (debug) cout << "Insert word\n";
	      tmpconst = new Constituent;
	      tmpconst->nt = true;
	      tmpconst->type = (*rules)[tmparc->rule]->tgt_rhs[j];  // lexicon[i]->pos[j];
	      tmpconst->start = -1;
	      tmpconst->end = -1;
	      tmpconst->status = tmpconst->INSERT;
	      constituents.push_back(tmpconst);
	      arcwalker.push_front(constituents.size()-1);
	    }

	  }


	}
	targetsent[0] = toupper(targetsent[0]);
	targetsent[targetsent.size() -1] = '.';
	cout << targetsent << endl;

	}*/
      if (debug) cout << "Total parses " << parses.size() << endl;
      for (i = 0; i < parses.size(); i++) {
	fsduplicate = false;
	/*for (j = i+1; j < parses.size(); j++) {
	  if (constituents[parses[i]]->fss[0] == 
	      constituents[parses[j]]->fss[0]) {
	    fsduplicate = true;
	  }
	  }*/
	if (!fsduplicate) {
	  fsstream << constituents[parses[i]]->fss[0] << endl;
	}
      }
      results = fsstream.str();
      return results;

    } 
    return "no parses found";
} 
