// ======================================================================
// grammar.hpp - A unification-based grammar.
// 
// 081304: Benjamin Han <benhdj@cs.cmu.edu> Declared global arrowLex and
//         arrowNonLex for printing lexical and non-lexical grammar rules.
// 011403: Benjamin Han <benhdj@cs.cmu.edu> Revised using yet a new
//         implementation of the CFG module: now it's conditional compilation
//         for either analysis (UK_ANALYSIS) or generation (UK_GENERATION);
//         get rid of Dir and the related methods.
// 120601: Benjamin Han <benhdj@cs.cmu.edu> Major revision to use the new
//         cfg.hpp in Toolbox; changed Literal to RLiteral; namespace added.
// 092601: Benjamin Han <benhdj@cs.cmu.edu> Redesigned the grammar structure
//         to make it more friendly for parsing: now rules reverse-lookup
//         via RHS is possible; removed struct RHS (and the related 
//         structures) and added class Rule.
// 082801: Benjamin Han <benhdj@cs.cmu.edu> Use list<>::back() whenever 
//         possible.
// 081601: Benjamin Han <benhdj@cs.cmu.edu> Changed the string argument
//         in Grammar::setStart() and Grammar::Grammar() to a Symbol argument
//         - this way only symbol.* directly deals with strings, so the
//         future i18n is easier.
// 080501: Benjamin Han <benhdj@cs.cmu.edu> Changed RHS::lexical to 
//         RHS::charLevel.
// 071301: Benjamin Han <benhdj@cs.cmu.edu> Minor revisions after the 
//         implementation of extension functions.
// 062901: Benjamin Han <benhdj@cs.cmu.edu> Moved FSRegisters into equation.*.
// 061901: Benjamin Han <benhdj@cs.cmu.edu> From now on the difference between
//         a lexical and a non-lexical rule is that the former inserts spaces
//         between the RHS literals - both lexical and non-lexical rules can
//         have RHS NT or terminals; revised some comments; added
//         Literal::needFS().
// 061501: Benjamin Han <benhdj@cs.cmu.edu> Added Grammar::dir and all related
//         stuff for a real direction-independent grammar representation.
// 061401: Benjamin Han <benhdj@cs.cmu.edu> Added a default constructor
//         to RHS - make sure it initializes its EBlockMain to be a top-level
//         block; non-lexical rules can now have RHS terminals by introducing
//         Literals.
// 061201: Benjamin Han <benhdj@cs.cmu.edu> Moved from Generator to UKernel.
// 061101: Benjamin Han <benhdj@cs.cmu.edu> Now you MUST specify which NT
//         is the starting symbol.
// 052901: Benjamin Han <benhdj@cs.cmu.edu> Created.
// ======================================================================

//    Copyright (C) 2000-2004 Benjamin Han <benhdj@cs.cmu.edu>
//
//    This library is free software; you can redistribute it and/or
//    modify it under the terms of the GNU Lesser General Public
//    License as published by the Free Software Foundation; either
//    version 2.1 of the License, or (at your option) any later version.
//
//    This library is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//    Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public
//    License along with this library; if not, write to the Free Software
//    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#ifndef GRAMMAR_HPP
#define GRAMMAR_HPP

#ifdef UK_ANALYSIS
#include "cfgAnalysis.hpp"
#elif defined UK_GEN
#include "cfgGen.hpp"
#endif

#include "eBlock.hpp"
#include "lexicon.hpp"

namespace UKernel {

extern const std::string arrowLex;
extern const std::string arrowNonLex;

struct RLiteral {
  Symbol sym;
  bool nt;     // true iff this is a non-terminal

  RLiteral () {}
  RLiteral (const Symbol &sym, bool nt=true):sym(sym),nt(nt) {}

  bool needFS () const { return (nt || sym==symWildcard); }
  bool operator < (const RLiteral &l) const;
};

// print the RHS
std::ostream &operator << (std::ostream &os, const RLiteral &r);
std::ostream &operator << (std::ostream &os, const RHS<RLiteral> &rhs);

// the body of a rule
// WARNING: DON'T TRY TO COPY IT since EBlockMain has no support for that
struct RuleBody {
  bool charLevel;     // true iff this is a character-level rule
  EBlockMain eBlock;  // equation block
};

// Main class: A unification-based grammar
class Grammar:

#ifdef UK_ANALYSIS
public CFGAnalysis<Symbol,RLiteral,RuleBody>
#elif defined UK_GEN
public CFGGen<Symbol,RLiteral,RuleBody>
#endif

{
#ifdef UK_ANALYSIS
  typedef CFGAnalysis<Symbol,RLiteral,RuleBody> _Parent;
#elif defined UK_GEN
  typedef CFGGen<Symbol,RLiteral,RuleBody> _Parent;
#endif

public:

  typedef _Parent::Iterator Iterator;
  typedef _Parent::RuleIterator RuleIterator;

  FSRegisters fsRegs;
  Lexicons &lex;

private:

  Symbol start;

  friend std::ostream &operator << (std::ostream &os, Grammar &g);

public:

  void setStart (const Symbol &symStart) { start=symStart; }
  const Symbol &readStart () const { return start; }

  Grammar (Lexicons &lex, const Symbol &symStart):lex(lex),start(symStart) {}

  RuleBody &addRule (const Symbol &lhs, const RHS &rhs, bool charLevel=false);
};

std::ostream &operator << (std::ostream &os, Grammar &g);

// ============================ inline functions ============================

inline bool RLiteral::operator < (const RLiteral &l) const {
  return (sym<l.sym || (!nt && l.nt));
}

inline RuleBody &Grammar::addRule (const Symbol &lhs, const RHS &rhs, 
				   bool charLevel) {
  RuleBody &rb=_Parent::addRule(lhs,rhs);
  FSRegisters::size_type s;

  rb.charLevel=charLevel;  
  // increase FS registers size if necessary
  if (fsRegs.size()<=(s=rhs.size())) fsRegs.add(s);
  return rb;
}

};

#endif
