header {
#include "grammar.hpp"
#include "gen.hpp"
#include <stdlib.h>
#include <string>
#include <iostream.h>

using namespace std;
using namespace Gen;
}

options { language="Cpp"; }


class GenKitParser extends Parser;
options { 
    k=8; 
    buildAST = false;
}


{
  Grammar *g;

  // Variables used to store info to be placed in grammar
  string leftnt;
  Grammar::RHS rhs;
  RuleBody *rulePtr;
  Path lPath,rPath,optPath;
  EBlockCase *ebCasePtr;
  EBlockEor *ebEorPtr;
  EBlockOr *ebOrPtr;
  EBlockMain *ebMainPtr;
  Value v;

  // since blocks occur recursively, push current block on top of stack, insert
  // values into it and pop when done
  deque<EBlockMain*> eblocks;  

  // Variables used for get-lex and get-lex-fs
//  Slot sem,cat,id;
  EFArgs req,opt;
  FStruc keyFS, *fsPtr;
  FSPath *fsPathPtr;
  Value *vPtr;
  bool hasid;

  // Variable that store state
  int currentregister;
  int lregister;
  int rregister;
  enum {LEFTSIDE, RIGHTSIDE} currentside;
  enum {ERRORTYPE, DEFTYPE, UNDEFTYPE, PATHTYPE, VALTYPE, REMOVETYPE, FUNCTIONTYPE, NUMBERTYPE, INTEGERTYPE, POSITIVETYPE} rightsidetype;
  enum actiontypes {UNIFYACT, OVERWRITEACT, ASSIGNREMOVEACT, APPENDACT, POPACT, CONSTRAINACT, NEGATEACT} ;
  enum {GETLEX, GETLEXFS, NEXTLEX} functionname;
    bool debug;


public:
  // Need to know grammar to insert rules into, set it here
  void setGrammar(Grammar *newg) {
    g = newg;
  }

    void setDebug(bool mydebug) {
        debug = mydebug;
    }

}


// Top-level parsing rule, call this first
rules
    : ({ rhs.clear(); } rule { eblocks.pop_back();} )+ 
        {   if (debug) cout<<"* The toy grammar:"<<endl;
            if (debug) cout<<*g<<endl<<endl;
        } EOF!
    ;

// An individual parsing or generation rule
rule
    : LPAREN production (equationsgroup)? RPAREN
    | LPAREN production (equation)? RPAREN
    ;

// A CFG rule from a non-terminal to one or more terminals and/or non-terminals
production
    : leftside NONSPACED LPAREN rightside RPAREN
        { rulePtr=&g->addRule(Symbol(leftnt), rhs, true); eblocks.push_back(&(rulePtr->eBlock)); }
    | leftside SPACED LPAREN rightside RPAREN
        { rulePtr=&g->addRule(Symbol(leftnt), rhs, false); eblocks.push_back(&(rulePtr->eBlock)); }
    | leftside NONSPACEDPARSE LPAREN rightside RPAREN
        { rulePtr=&g->addRule(Symbol(leftnt), rhs, true); eblocks.push_back(&(rulePtr->eBlock)); }
    | leftside SPACEDPARSE LPAREN rightside RPAREN
        { rulePtr=&g->addRule(Symbol(leftnt), rhs, false); eblocks.push_back(&(rulePtr->eBlock)); }
    ;

// The left side of the production rule, always a single non-terminal
leftside 
    : n:NT { 
            leftnt = n->getText(); 
            if (debug) cout << "\nNew rule " << n->getText() << endl;
        }
    ;


rightside { string nonterm; }
    : (n:NT { 
                nonterm = n->getText(); 
                rhs.push_back(RLiteral(Symbol(nonterm), true)); 

                if (debug) cout << "pushback " << n->getText() << "\n"; 
            }
    | t:TOKEN    { rhs.push_back(RLiteral(Symbol(t->getText()), false));
                  if (debug) cout << "pushback " << t->getText() << "\n"; 
                 }

    | w:WILDCARD { rhs.push_back(RLiteral(symWildcard, false));
                  if (debug) cout << "pushback " << w->getText() << "\n"; }
        )+ 
    ;

equationsgroup
    : LPAREN (equations)* RPAREN
    | equation
    ;

equations
    : (equation)+
    ;


// Where most of the work of the ANTLR grammar occurs
// Reads in left and right side rules and adds them into 
// the top EBlock on the eblocks stack
equation {  enum actiontypes actiontype; rightsidetype = ERRORTYPE; 
            rPath.clear();  lPath.clear(); if (debug) cout << "clearing paths\n"; currentside = RIGHTSIDE; }
    : LPAREN lpath 

            (
            UNIFY     { actiontype = UNIFYACT; if (debug) cout << "UNIFYing \n"; } |
            OVERWRITE { actiontype = OVERWRITEACT; if (debug) cout << "OVERWRITINGing \n";} |
            ASSIGNREMOVE { actiontype = ASSIGNREMOVEACT; if (debug) cout << "ASSIGNing \n";} |
            APPEND    { actiontype = APPENDACT; if (debug) cout << "APPENDing \n";} |
            POP       { actiontype = POPACT; if (debug) cout << "POPing \n"; } |
            CONSTRAIN { actiontype = CONSTRAINACT; if (debug) cout << "Constrain action\n"; } |
            NEGATE    { actiontype = NEGATEACT; if (debug) cout << "NEGATEing \n"; } 
            )
    
      eqrightside RPAREN  {

        if (debug) cout << "left register " << lregister << "\n";
        if (actiontype == UNIFYACT) {
            if (rightsidetype == DEFTYPE) {
                if (debug) cout << "Unify Defined\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::DEFINED));
            } else if (rightsidetype == UNDEFTYPE) {
                if (debug) cout << "Unify Undefined\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::UNDEFINED));
            } else if (rightsidetype == REMOVETYPE) {
                if (debug) cout << "Unify Remove\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::REMOVE));    
            } else if (rightsidetype == NUMBERTYPE) {
                if (debug) cout << "Unify Number Check\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::NUMBER));
            } else if (rightsidetype == INTEGERTYPE) {
                if (debug) cout << "Unify Number Check\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::INTEGER));
            } else if (rightsidetype == POSITIVETYPE) {
                if (debug) cout << "Unify Number Check\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        Symbol::POSITIVE));
            }  else if (rightsidetype == PATHTYPE) {
                if (debug) cout << "right register " << rregister << "\n";
                /*if (lregister == rregister && lpath.compare(rpath) != Path::none) {
                    cerr << "Same xN on left and right side not allowed" << endl;
                    exit(1);
                } */
                if (debug) cout << "Unify Path\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        rregister,rPath,
                        Symbol::OP_PSEUDO_UNIFY)); 
                
            }  else if (rightsidetype == VALTYPE) {
                if (debug) cout << "Unify Value\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,v,
                        Symbol::OP_PSEUDO_UNIFY));    
            } 
        } else if (actiontype == OVERWRITEACT) {
          if (rightsidetype == PATHTYPE) {
              if (debug) cout << "Overwrite Path\n";
              if (debug) cout << "right register " << rregister << "\n";
              eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
                        rregister,rPath,
                        Symbol::OP_ASSIGN));


          } else if (rightsidetype == VALTYPE) {
              if (debug) cout << "Overwrite Value\n";
              eblocks.back()->addBlock(new EBlockMain(lregister,lPath,v,
			                Symbol::OP_ASSIGN));

          } else if (rightsidetype == FUNCTIONTYPE) {
              if (debug) cout << "Overwrite Function\n";
            if (functionname == GETLEX) {
                if (debug) cout << "Get-lex sem cat options\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,symGetLex,req,opt,
			                Symbol::OP_ASSIGN));
            } else if (functionname == GETLEXFS) {
                if (debug) cout << "Get-lex-fs sem fs options\n";
                eblocks.back()->addBlock(new EBlockMain(lregister,lPath,symGetLexFS,req,opt,
			         Symbol::OP_ASSIGN));
            } else if (functionname == NEXTLEX) {
                if (debug) cout << "next-lex (may need fixing)\n";
                 /*eblocks.back()->addBlock(new EBlockMain(lregister,lPath,Symbol("NextLex"),req,opt,
			         Symbol::OP_ASSIGN)); */
            }

          }

        } else if (actiontype == ASSIGNREMOVEACT) {
          if (debug) cout << "right register " << rregister << "\n";
          if (debug) cout << "Remove Assign\n";
            eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
			       rregister,rPath,
			       Symbol::OP_REMOVE_ASSIGN));
          
        } else if (actiontype == POPACT) {
          if (debug) cout << "right register " << rregister << "\n";
          if (debug) cout << "Pop\n";
            eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
			       rregister,rPath,
			       Symbol::OP_POP));

        } else if (actiontype == APPENDACT) {
          if (debug) cout << "right register " << rregister << "\n";
          if (debug) cout << "Pop\n";
            eblocks.back()->addBlock(new EBlockMain(lregister,lPath,
			       rregister,rPath,
			       Symbol::OP_PUSH));

        } else if (actiontype == CONSTRAINACT) {
        
          if (debug) cout << "Constrain " << endl;
            eblocks.back()->addBlock(new EBlockMain(lregister,lPath, v, Symbol::OP_CONSTRAIN));

        }
    }


    // Add in an *OR* Equation Block
    | LPAREN OR {   eblocks.push_back(new EBlockOr()); if (debug) cout << "Adding OR block\n"; }

        ( { eblocks.push_back(new EBlockMain()); }
        equationsgroup 
          {  ebMainPtr = eblocks.back(); eblocks.pop_back(); eblocks.back()->addBlock(ebMainPtr);  } )+  

        RPAREN  { ebOrPtr = (EBlockOr*)eblocks.back(); eblocks.pop_back(); eblocks.back()->addBlock(ebOrPtr); }

      // Add in an *EOR* Equation Block
    | LPAREN EOR {   eblocks.push_back(new EBlockEor()); if (debug) cout << "Adding EOR block\n"; } 

        ( { eblocks.push_back(new EBlockMain()); } 

            equationsgroup 
            
          {  ebMainPtr = eblocks.back(); eblocks.pop_back(); eblocks.back()->addBlock(ebMainPtr);  } )+  


      RPAREN { ebEorPtr = (EBlockEor*)eblocks.back(); eblocks.pop_back(); eblocks.back()->addBlock(ebEorPtr); }

    | LPAREN CASE 
	{ optPath.clear(); currentside = LEFTSIDE; } 
	path 
	{ eblocks.push_back((EBlockMain*)new EBlockCase(lregister, optPath)); if (debug) cout << "Adding CASE block\n"; } 

	( { eblocks.push_back(new EBlockMain()); } 
	  LPAREN key:TOKEN equationsgroup RPAREN	
          { ebMainPtr = eblocks.back(); eblocks.pop_back(); 
	    v.clear(); v.insert(v.begin(),Symbol(key->getText()));
	    v.compact();
	    ebCasePtr = (EBlockCase*)eblocks.back(); ebCasePtr->addCaseBlock(v,ebMainPtr); } )+

	RPAREN { ebCasePtr = (EBlockCase*)eblocks.back(); eblocks.pop_back(); eblocks.back()->addBlock(ebCasePtr); }

    | LAPREN TEST path RPAREN 
    ;

eqrightside
    : { if (debug) cout << "Rightside *defined*" << endl;}    DEFINED { rightsidetype = DEFTYPE; } 
    | { if (debug) cout << "Rightside *undefined*" << endl;}  UNDEFINED { rightsidetype = UNDEFTYPE; } 
    | { if (debug) cout << "Rightside *remove*" << endl;}     REMOVE { rightsidetype = REMOVETYPE; } 
    | { if (debug) cout << "Rightside *number*" << endl;}     NUMBER { rightsidetype = NUMBERTYPE; } 
    | { if (debug) cout << "Rightside *integer*" << endl;}    INTEGER { rightsidetype = INTEGERTYPE; } 
    | { if (debug) cout << "Rightside *positive*" << endl;}   POSITIVE { rightsidetype = POSITIVETYPE; } 

    | { if (debug) cout << "Before Rightside function" << endl; } function { rightsidetype = FUNCTIONTYPE;  }
    | { if (debug) cout << "Rightside value" << endl;}            value { rightsidetype = VALTYPE; } 
    | { if (debug) cout << "Rightside path" << endl;}             rpath { rightsidetype = PATHTYPE; } 
    ;


lpath
    : { if (debug) cout << "In lpath" << endl; currentside = LEFTSIDE; lregister=0;} path;

rpath
    : { if (debug) cout << "In rpath" << endl; currentside = RIGHTSIDE; rregister=0;} path;
    

path
    : LPAREN r:REFERENCE { if (debug) cout << "Setting register\n";
                            if (currentside == LEFTSIDE) 
                                 lregister = atoi(r->getText().c_str());
                           else rregister =  atoi(r->getText().c_str()); }
        pathvals RPAREN 
    | r1:REFERENCE { if (currentside == LEFTSIDE)
                          lregister = atoi(r1->getText().c_str()); 
                     else rregister = atoi(r1->getText().c_str()); }
    ;

pathvals
    : (t:TOKEN { if (debug) cout << "Push back path val " << t->getText() << "\n";
                 if (currentside == LEFTSIDE)
                    lPath.push_back(Symbol(t->getText()));
                 else
                    rPath.push_back(Symbol(t->getText()));
        })+
    ;

value
    : //(QUOTE)? 
              t:TOKEN { v.clear(); 
              v.insert(v.begin(), Symbol(t->getText()));
              v.compact();
              if (debug) cout << "Inserting value: " << t->getText() << endl;
            }
    | disjunctivevalue { if (debug) cout << "Disjunctive value\n"; }
    | negativevalue { if (debug) cout << "Negative value\n"; }
    | multiplevalue { if (debug) cout << "Multiple value\n"; }
    ;


disjunctivevalue { Value::Iterator iter1; if (debug) cout << "In *OR* value" << endl;}
    : LPAREN OR { v.clear(); iter1 = v.insert(v.begin(), symOr); }  
        (t:TOKEN { v.insert(iter1.end(),Symbol(t->getText())); } | 
         n:NT    { v.insert(iter1.end(),Symbol("<" + n->getText() + ">")); } 
        )+ 
      RPAREN { v.compact(); }
    ;

negativevalue { Value::Iterator iter1;  if (debug) cout << "In *NOT* value" << endl;}
    : LPAREN NOT { v.clear(); iter1 = v.insert(v.begin(), Symbol(Symbol::NOT)); }  
        (t:TOKEN { v.insert(iter1.end(),Symbol(t->getText())); } | 
         n:NT    { v.insert(iter1.end(),Symbol("<" + n->getText() + ">")); }
        )+ 
        RPAREN  { v.compact(); }
    ;

multiplevalue {  if (debug) cout << "In *MULTIPLE* value" << endl; }
    : LPAREN MULTIPLE (TOKEN)+ RPAREN
    ;


function
    : LPAREN { req.clear(); opt.clear(); if (debug) cout << "In Function\n"; } 
        funcname:TOKEN
        { 
            if (funcname->getText() == "GET-LEX") {
                functionname = GETLEX;
            } else if (funcname->getText() == "GET-LEX-FS") {
                functionname = GETLEXFS;
            } else if (funcname->getText() == "NEXT-LEX") {
                functionname = NEXTLEX;
            } 
        }

        // Add in SEM path or value
        (LPAREN semref:REFERENCE { fsPathPtr=&req.newFSPath(); if (debug) cout << "In SEM Path\n";
                                   fsPathPtr->fsIdx = atoi(semref->getText().c_str()); } 
                (sempathtok:TOKEN { fsPathPtr->path.push_back(Symbol(sempathtok->getText())); } )+ 
         RPAREN 
        | 
        //QUOTE
          semtok:TOKEN { vPtr=&req.newValue(); vPtr->insert(vPtr->begin(),Symbol(semtok->getText()));
                       if (debug) cout << "In SEM Value\n"; })

        // Add in CAT path or value
        (LPAREN catref:REFERENCE { if (debug) cout << "In CAT Path\n"; fsPathPtr=&req.newFSPath(); 
                                   fsPathPtr->fsIdx = atoi(catref->getText().c_str()); }
                (catpathtok:TOKEN { fsPathPtr->path.push_back(Symbol(catpathtok->getText())); })+ RPAREN 

        | 
         //QUOTE 
         QPAREN { if (debug) cout << "In CAT FS\n"; fsPtr=&req.newFS(); }
                  (LPAREN { optPath.clear(); } 
                        (catfeature:TOKEN { optPath.push_back(Symbol(catfeature->getText())); } )+
                         catval:TOKEN { v.clear(); v.insert(v.begin(),Symbol(catval->getText())); 
                                     fsPtr->assign(optPath,v); }
                   RPAREN )+
                 RPAREN 
        |
        //QUOTE 
         cattok:TOKEN { if (debug) cout << "In CAT Value\n"; vPtr=&req.newValue(); 
                       vPtr->insert(vPtr->begin(),Symbol(cattok->getText())); 
                     }
        )

        // Add in options (if any)
        (keyid:KEY 
             (// Feature structure
               QPAREN { fsPtr=&opt.newFS(keyid->getText()); if (debug) cout << "In OPTION FS\n"; } 
                    (LPAREN { optPath.clear(); } (optfeature:TOKEN { optPath.push_back(Symbol(optfeature->getText())); } )+
                            optval:TOKEN { v.clear(); v.insert(v.begin(),Symbol(optval->getText())); 
                                        fsPtr->assign(optPath,v); }
                     RPAREN )+
                    RPAREN 
          | 
           // Path
           LPAREN keyref:REFERENCE { if (debug) cout << "In CAT Path\n"; fsPathPtr=&opt.newFSPath(Symbol(keyid->getText()));
                                   fsPathPtr->fsIdx = atoi(keyref->getText().c_str()); }
                (keypathtok:TOKEN { fsPathPtr->path.push_back(Symbol(keypathtok->getText())); })+ RPAREN 
          |
          // Token
           keytok:TOKEN { vPtr=&opt.newValue(keyid->getText()); if (debug) cout << "In OPTION Value\n";
                          vPtr->insert(vPtr->begin(),Symbol(keytok->getText())); }) )*

      RPAREN
    ;


{
#include <iostream.h>
}

class GenKitLexer extends Lexer;
options {
    k=4;
    charVocabulary = '\3'..'\377';
    caseSensitive=false;
    caseSensitiveLiterals=false;
    testLiterals=true;
}

{
   string letter; 
   int i;
   bool debug;
}


/* ignore comments */
SL_COMMENT
	:	';'
		(~('\n'|'\r'))* ('\n'|'\r'('\n')?)
		{$setType(_token->SKIP); newline(); } 
    ;


/* ignore comments */
// multiple-line comments
ML_COMMENT
	:	"#|"
		(	/*	'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.  I am trying to
				handle any flavor of newline that comes in, but the language
				that allows both "\r\n" and "\r" and "\n" to all be valid
				newline is ambiguous.  Consequently, the resulting grammar
				must be ambiguous.  I'm shutting this warning off.
			 */
			options {
				generateAmbigWarnings=false;
			}
		:
			{ LA(2)!='#' }? '|'
		|	'\r' '\n'		{newline();}
		|	'\r'			{newline();}
		|	'\n'			{newline();}
		|	~('|'|'\n'|'\r')
		)*
		"|#"
		{$setType(_token->SKIP);}
	;


// Whitespace -- ignored
WS	:	(	' '
		|	'\t'
		|	'\f'
		// handle newlines
		|	(	"\r\n"  // Evil DOS
			|	'\r'    // Macintosh
			|	'\n'    // Unix (the right way)
			)
			{ newline(); }
		)
		{ _ttype = _token->SKIP; }
	;


QPAREN : "'(" ;
LPAREN : "("   ;
RPAREN : ")" ;
SPACED : "==>" ;
NONSPACED : "-->" ;
SPACEDPARSE : "<==>" ;
NONSPACEDPARSE : "<-->" ;
NT : "<"! (~('='|' '|'>'))+ ">"!                      
    { letter = getText();  
          for (i = 0; i < letter.length(); i++) {
                letter[i] = toupper(letter[i]);
            }  
      setText(letter) ;}
 ;

WILDCARD : "%"                                 ;
ASSIGNREMOVE : "=="  ;
UNIFY : "=" ;
OVERWRITE : "<=" ;
POP : "<" ;
APPEND : ">" ;
CONSTRAIN : "=c" ;
NEGATE : "=n" ;

DEFINED : "*defined*" ;
UNDEFINED : "*undefined*"  ;
REMOVE : "*remove*" ;

//Tests
NUMBER:  "*number*";
INTEGER: "*integer*";
POSITIVE: "*positive*";

OR : "*or*" ;
EOR : "*eor*" ;
NOT : "*not*" ;
MULTIPLE : "*multiple*" ;
CASE : "*case*" ;
TEST : "*test*" ;

REFERENCE : 'x'! ('0'..'9')+ ;

KEY : (":lex-id" | ":check" | ":ambiguity")
      { 
          letter = getText();  
          for (i = 0; i < letter.length(); i++) {
             letter[i] = toupper(letter[i]);
          }  
          setText(letter) ;
      }
;

TOKEN : (  ('\''!)? ~('\''|':'|'\n'|'\r'|'\t'|' '|'\f'|'('|')'|'<'|'>'|'='|';'|'#'|'|'|'%'|'"')(~('\n'|'\f'|'\r'|'\t'|' '|'"'|'('|')'|'<'|'>'|'='|';'|'#'|'|'|'%'))* 
        |
        '"'! (' '|~('"'))* '"'!
        ) 
        { letter = getText();  
          for (i = 0; i < letter.length(); i++) {
                letter[i] = toupper(letter[i]);
            }  
          setText(letter) ;}

//    { if (debug) cout << "TOKEN: " << getText() << endl; } 
;
