/*----------------------------------------------------------
FILE
	meaning.C

OVERVIEW

	Lexifier program code

LOG	<PRE>
	$Log: meaning.C,v $
	Revision 1.82  2005/10/19 15:00:17  svoboda
	Removed memtrays
	
	Revision 1.81  2005/06/28 19:47:37  svoboda
	Added token offsets to FS from lexifier
	
	Revision 1.80  2005/02/24 18:59:19  svoboda
	Several Windows portability changes s/string.H/kstring.H/g s/out/tout/
	
	Revision 1.79  2003/04/30 19:26:49  svoboda
	Now produces complete token sequences for S with rogue literal tag
	
	Revision 1.78  2003/01/13 18:04:07  svoboda
	Incorporated md5i's STD C++ compatability; now compilues under g++ 3.0
	
	Revision 1.71.2.3  2002/11/14 21:27:55  md5i
	Merge with trunk.
	
	Revision 1.77  2002/10/15 20:29:27  svoboda
	Adjustment to fix 507...kce-minus POSes not added to alternate-cats slot
	
	Revision 1.76  2002/10/14 17:20:39  svoboda
	Alternate-cats is now assigned to kce-minus terms
	
	Revision 1.75  2002/10/07 21:06:18  svoboda
	Fixed bug 488
	
	Revision 1.74  2002/09/27 16:47:11  svoboda
	Changed CTE to KCE for DMK & FS
	
	Revision 1.73  2002/04/16 13:40:16  svoboda
	Fixed 'hardened steel ball' regression - bug 393
	
	Revision 1.72  2002/04/10 16:21:25  svoboda
	Added diagnostics messages to Analyzer
	
	Revision 1.71.2.2  2002/06/18 20:28:33  md5i
	Merge in HEAD.
	
	Revision 1.71.2.1  2002/03/19 18:08:43  md5i
	Updates for STD C++.
	
	Revision 1.71  2002/02/11 23:11:14  svoboda
	Patrick_Statement_Lists must NOT be initialized at startup. AIX hates that
	
	Revision 1.70  2002/02/11 20:17:40  svoboda
	Oops, true is now a valid keyword...except on AIX
	
	Revision 1.69  2001/11/29 21:29:07  svoboda
	Added head-root slot, as per Bug 290
	
	Revision 1.68  2001/10/22 20:39:45  svoboda
	OK, overhauled unify_defs's initialization routines to be consistent
	
	Revision 1.67  2001/10/19 21:38:22  svoboda
	Bug 279 fix...don't unify idioms with words if no idioms
	
	Revision 1.66  2001/10/16 18:09:15  svoboda
	Bug 269 - unify_defs unifies class iff Idiom class not defined
	
	Revision 1.65  2001/10/15 19:04:38  svoboda
	unify-defs once again unifies semantic verb features (Bug 262)
	
	Revision 1.64  2001/07/05 16:14:33  svoboda
	Idiom-word inheritance no longer inherits scomp, xcomp, or apcomp (fix for Bug 156)
	
	Revision 1.63  2000/11/03 20:35:01  svoboda
	Adopted simpler output format for ambiguities under KCE. CTE outputs unchanged
	
	Revision 1.62  2000/10/26 18:30:59  svoboda
	Changed CTE to KCE for DMK slot and for disambig tags
	
	Revision 1.61  2000/09/19 13:36:54  svoboda
	Bugfix...final period may still also be part of an abbrev

	Revision 1.60  2000/09/05 14:12:22  svoboda
	Character entities may no longer be considered meaningless tokens

	Revision 1.59  2000/08/29 16:17:54  svoboda
	Bugfix, single punct chars may no longer be Meaningless_Tokens

	Revision 1.58  2000/04/14 12:57:25  svoboda
	Definition-unification now ignores abbreviations & acronyms

	Revision 1.57  2000/03/14 12:09:37  svoboda
	Analyzer now reports meaningless tokens

	Revision 1.56  1999/12/03 13:58:33  svoboda
	KANTOO now produces log output formatted as outlines

	Revision 1.55  1999/10/07 13:33:48  svoboda
	SMCS codes no longer included when unifying words & idioms

	Revision 1.54  1999/09/29 17:14:53  svoboda
	Bugfix...pass strings to morphonizer dequoted

	Revision 1.53  1999/09/28 14:30:54  svoboda
	Quick bugfix to hash wildcard feature

	Revision 1.52  1999/09/28 13:39:03  svoboda
	PR 5991 - added wildcard 'hash' symbols

	Revision 1.51  1999/07/27 14:03:39  svoboda
	Unify-definitions no longer prunes out idioms if their head has no matching POS

	Revision 1.50  1999/07/19 12:09:38  svoboda
	Oops, literals were still getting added

	Revision 1.49  1999/07/16 12:30:30  svoboda
	Eliminated numberizer/tagifier & fixed some memory leaks

	Revision 1.48  1999/07/08 18:59:53  svoboda
	Removed Numberizer and Tagifier...made obsolete by more powerful morphology

	Revision 1.47  1999/07/03 12:02:25  svoboda
	Moved is_int, etc. from Symbol_Frame to KString...where they belong

	Revision 1.46  1999/07/02 18:59:23  svoboda
	Symbol_Frames now replace Atomic_Frames, Boolean_Frames, and Integer_Frames

	Revision 1.45  1999/06/17 14:29:52  svoboda
	Symbols are now implemented with Frames, and like Frames

	Revision 1.44  1999/05/11 12:53:27  md5i
	Changes to allow symbols to be created staticly despite memtrays.

	Revision 1.43  1999/03/22 13:04:21  md5i
	Fencepost error fixed.

	Revision 1.42  1999/03/19 17:12:59  md5i
	Use all-heads to fix head-pos values in ?CTE attach tags.

	Revision 1.41  1999/03/16 15:21:34  md5i
	Memory leak fix.

	Revision 1.40  1999/03/15 16:11:25  md5i
	Memory fixes.

	Revision 1.39  1999/03/01 16:12:29  svoboda
	Added SMCS Codes

	Revision 1.38  1999/01/29 10:45:49  svoboda
	Made zero-numbering optional for units

	Revision 1.37  1998/12/16 18:03:03  svoboda
	Made fuzzy head-pos go both ways

	Revision 1.36  1998/12/10 18:01:44  svoboda
	Added disgusting fuzzy_head_pos hack in to Lexifier

	Revision 1.35  1998/11/23 09:39:23  md5i
	Merged Memory_Trays into main distribution.

	Revision 1.34  1998/11/12 17:29:11  svoboda
	Bugfixes: missing-bug, empty-literal-tags bug

	Revision 1.33  1998/11/06 17:41:43  svoboda
	DMK_Translator now points out invalid symbols in DMK

	Revision 1.32  1998/11/02 15:46:21  svoboda
	Syntaxifier now takes a graph of inputs, not a sequence

	Revision 1.31.2.1  1998/11/13 17:45:18  md5i
	Added memtrays.

	Revision 1.31  1998/10/27 17:42:47  svoboda
	OK got unify_defs working right now.

	Revision 1.30  1998/10/26 18:59:00  svoboda
	Slight bug in unify-defs; allow non-noun idioms to unify correctly

	Revision 1.29  1998/10/13 19:32:02  svoboda
	Some bugfixes wrt idiom/word unification

	Revision 1.28  1998/10/08 18:26:09  svoboda
	Oops...numbers were getting wrongly identified in phrases

	Revision 1.27  1998/10/08 14:23:28  svoboda
	Removed (cte -) from words when unifying with idioms

	Revision 1.26  1998/10/07 16:59:28  svoboda
	Fixed bug wrt idiom meanings/orthos/roots being truncated to head

	Revision 1.25  1998/10/06 16:39:23  svoboda
	Idioms now are constructed from symbols, not strings

	Revision 1.24  1998/09/30 13:57:25  svoboda
	Bugfixes in definition unification

 * Revision 1.23  98/09/23  15:15:28  svoboda
 * Fixed literals bug
 * 
	Revision 1.22  1998/09/21 15:08:36  svoboda
	Added numeric-value to lexifier for idioms

 * Revision 1.21  98/09/11  12:36:52  md5i
 * Symbols are now based on Memvector_KStrings.
 * 
	Revision 1.20  1998/09/04 14:38:32  svoboda
	Converted Lexifier to speak PATRICK

	Revision 1.19  1998/08/03 16:37:12  svoboda
	Fixed alternate-cats to ignore phrases and non-CTE words

	Revision 1.18  1998/07/27 15:24:14  svoboda
	Adjusted ED/ING to be first-class alternate-cats

	Revision 1.17  1998/07/22 16:06:34  svoboda
	Added alternate-cats ing and ed

	Revision 1.16  1998/06/24 15:44:22  svoboda
	Defs unify if typeids match, not cats

	Revision 1.15  1998/06/23 17:16:14  svoboda
	Added alternate-cats slot

	Revision 1.14  1998/06/22 18:26:34  svoboda
	The unify-defs procedure handles several more features now

	Revision 1.13  1998/06/19 14:03:59  svoboda
	Many bugfixes, incl. idiom inheritance, separate Defs, / & ' handling

	Revision 1.12  1998/06/02 11:59:17  svoboda
	Added install.data for language-related files to install.

	Revision 1.11  1998/05/27 13:33:10  svoboda
	Plugged memory leaks

	Revision 1.10  1998/02/26 13:20:01  md5i
	Shared Library name no longer hard codes KANTOO environ.

	Revision 1.9  1998/01/13 10:52:48  md5i
	Numbers are now Noun_Definitions, as opposed to Real Definitions.

	Revision 1.8  1997/11/26 12:51:00  svoboda
	Made partial DMK image optimization

 * Revision 1.7  97/09/09  16:16:42  md5i
 * More hacking to get things working under g++ with optimization.
 * 
	Revision 1.6  1997/08/22 16:08:44  svoboda
	Fixed bug of interpreting '.' as a number

	Revision 1.5  1997/08/01 15:59:38  svoboda
	Created Shared_Library class, extended Rule_Database to use it, improved logs

 * Revision 1.4  97/06/05  19:18:06  svoboda
 * Redesigned unification, adding paths/slots/typeless grammar
 * 
 * Revision 1.3  97/06/01  16:34:03  svoboda
 * Added symbol table class
 * 
 * Revision 1.2  97/05/20  16:28:19  svoboda
 * Added protocol class, made modules less interdependent
 * 
 * Revision 1.1  97/05/13  13:22:04  svoboda
 * Added Lexifier
 *
	</PRE>

AUTHOR
	David Svoboda

	Copyright (C) 1997
	Carnegie Mellon
	All Rights Reserved
----------------------------------------------------------*/

#include "dmk_database.H"
#include "deftypes.H"
#include "lexifier.H"
#include "morphonizer.H"
#include "morphsemizer.H"
#include "wordifier.H"
#include "idiomifier.H"
#include <ctype.h>
#include "capitalization.H"
#include "idiom_dictary.H"
#include "sequence_frame.H"
#include "slot_frame.H"
#include "patrick_statements.H"
#include "smcs.H"


const Symbol Lexifier::Root("root");
const Symbol Lexifier::Ortho("ortho");
const Symbol Lexifier::Head_Token("head-token");
const Symbol Head_Root("head-root");
const Symbol Lexifier::Attributes("attributes");
const Symbol Lexifier::Being("being");
const Symbol Lexifier::Alt_Cats("alternate-cats");
const Symbol Lexifier::Hash_Value("hash");
const Frame Lexifier::Number_Category("number");
const Frame Lexifier::Verb("v");
const Frame Lexifier::ED("ed");
const Frame Lexifier::ING("ing");
const Patrick_Path Attributes_Head_Pos("%(attributes head-pos)");
const Patrick_Path Attributes_All_Heads("%(attributes all-heads)");
const Patrick_Path Attributes_Sel("%(attributes sel)");
const Patrick_Path Snobbish("%(concepts class)");
const KString Lexifier::Special("SP");
const Concept_ID Numeric_Concept_ID("C", "DECIMAL-NUMBER");
const Patrick_Statement Is_Literal("(%(concepts contents) =c literal)");
const Symbol Meaningless("meaningless");
const Symbol Token_Start("token-start");
const Symbol Token_End("token-end");


//////////
// Computes the head of the idiom and the tokens involved. Fills idiom_head
// with head value and returns subvector of tokens involved.
Subvector< Token> tokens(const Definition& d, Iterator& idiom_head, 
			 const Vector< Token>& input, Iterator token_head) {
  const Symbol_Frame* words
    = Symbol_Frame::down_cast( d.slot_value( Idiom_Dictionary::Words).value());
  Iterator length = words ? words->value().int_value() : 1;
  const Symbol_Frame* head
    = Symbol_Frame::down_cast( d.slot_value( Idiom_Dictionary::Head).value());
  if (head) idiom_head = head->value().int_value();
  else idiom_head = length - 1;

  Iterator start = token_head - idiom_head;
  return Subvector< Token>( input, start, length);
}


//////////
// This is all the possible forms of the verb 'to be'.
Vector< KString> to_be_forms() {
  Vector< KString> result;
  result += KString("be");
  result += KString("am");
  result += KString("are");
  result += KString("is");
  result += KString("was");
  result += KString("were");
  result += KString("been");
  result += KString("being");
  result += KString("beings");
  return result;
}

Vector< KString> To_Be_Forms = to_be_forms();


//////////
// Creates a literal definition from a token
void add( Definition& d, const Subvector< Token>& tokens,
	  const KString& head_root = KString::Default, Iterator head = 0,
	  const Frame& hash_value = Undefined_Frame::Value) {
  ASSERT( tokens.length() && (tokens.length() >= head));
  Iterator ti;

  // Add %tokens
  Frame token_frame;
  for (Iterator t = tokens.start(); t < tokens.end(); t++)
    token_frame.f_and( Frame( Symbol( KString_Format( t+1))));
  d.unify( Definition::Tokens, token_frame);

  d.unify( Token_Start, Frame( Symbol( KString_Format( tokens.original_vector()[ tokens.start()].substring().start()))));
  d.unify( Token_End, Frame( Symbol( KString_Format( tokens.original_vector()[ tokens.end() - 1].substring().end()))));

  // Add %head-token
  if (tokens.length() > 1)
    d.unify( Lexifier::Head_Token, 
	     Frame( Symbol( KString_Format( head + tokens.start() + 1))));

  // Add %head-root
  KString hr = (head_root != KString::Default) ? head_root :
    tokens.original_vector()[ head + tokens.start()].substring().string();
  hr.capitalization( Capitalization::Lowercase);
  d.unify( Head_Root, Frame( Symbol( hr)));

  // Add %ortho
  const Vector< Token> original_tokens = tokens.original_vector();
  Iterator start = original_tokens[ tokens.start()].substring().start();
  Iterator end = original_tokens[ tokens.end() - 1].substring().end();
  if (d.slot_value( Lexifier::Ortho)->undefined())
    d.unify( Lexifier::Ortho, Frame( Symbol(
	       Substring( original_tokens[0].substring().original_string(),
			  start, end - start).string())));

  // Add %root unless already defined
  KString root;
  const Symbol_Frame* rootf
    = Symbol_Frame::down_cast( d.slot_value( Lexifier::Root).value());
  if (rootf) root = rootf->value();
  else {
    for (ti = 0; ti < tokens.length(); ti++) {
      if (ti) root += ' ';
      if (ti == head && head_root.size()) root += head_root;
      else root += tokens.original_vector()[ tokens.start() + ti].substring().string();
    }
    root.capitalization( Capitalization::Lowercase);
    d.unify( Lexifier::Root, Frame( Symbol( root)));
  } 

  // Add %meaning unless already defined or literal
  if (d.slot_value( Definition::Meanings)->undefined() && head_root.size()) {
    Frame posf = d.slot_value( Definition::Category);
    const Symbol_Frame* possf = Symbol_Frame::down_cast( posf.value());
    d.meaning( Meaning( possf ? possf->value() : Lexifier::Special,
			root, tokens.start() + head + 1));
  }

  // Add (being +) if necessary
  if (To_Be_Forms.find( root) != Invalid_Iterator)
    d.unify( Lexifier::Being, Symbol::Plus);

  // Add %hash-value
  if (!(hash_value->undefined()) && d.slot_value( Lexifier::Hash_Value)->undefined())
    d.unify( Lexifier::Hash_Value, hash_value);
}


/////////
// Unify idiom definition with word def of same category, if avilable
Vector< Definition> unify_definitions(const Set< Definition>& idioms,
				      Definition& words) {
  if (idioms.size() == 0) return Vector< Definition>();

  // First clean up word definition
  bool snobbish = False;
  for (Iterator i = 0; i < idioms.size(); i++) {
    Frame tmp = idioms[i];
    if (!(Snobbish.evaluate( tmp)->undefined())) {
      snobbish = True;
      break;
    }
  }

  static Patrick_Statement_List Prepare_Defs(
	     "((%abbreviation = *REMOVE*)"
	     " (%acronym = *REMOVE*)"
	     " (%(concepts id) = *REMOVE*)"
	     " (%(concepts smcs) = *REMOVE*)"
	     " (%(concepts smcs-class) = *REMOVE*)"
	     " (*OR* ((%pos = (*OR* unit n prop pron))"
	     "        (%pos <= (*OR* unit n prop pron)))"
	     "       ((%pos = (*NOT* (*OR* unit n prop pron))))))");
  Prepare_Defs.evaluate( words);
  static Patrick_Statement_List Remove_Class(
	     "((%(concepts class) = *REMOVE*)"
	     " (%(concepts apcomp) = *REMOVE*)"
	     " (%(concepts scomp) = *REMOVE*)"
	     " (%(concepts xcomp) = *REMOVE*)"
	     " (%(concepts valency) = *REMOVE*))");
  if (snobbish) Remove_Class.evaluate( words);
  Patrick_Statement("(%kce = *REMOVE*)").evaluate( words);

  Vector< Definition> results;
  for (Iterator i3 = 0; i3 < idioms.size(); i3++) {
    Set_Frame* id = (Set_Frame*) Set_Frame::down_cast( idioms[i3].value());
    Frame new_def;
    if (!id) new_def = idioms[i3]->unify( words);
    else
      for (Iterator i2 = 0; i2 < id->values().size(); i2++) {
	Frame pos_def = id->values()[i2]->unify( words);
	if (!(pos_def->fail())) new_def.f_or( pos_def);
	else new_def.f_or( id->values()[i2]);
      }
    if (!(new_def->fail())) results += Definition::down_cast( new_def);
    else results += idioms[i3];
  }
  return results;
}


//////////
// Add alternate-cats fields
void add_alternate_cats( Frame& defs) {
  Frame kce_defs( defs);
  if (Patrick_Statement("(%kce = +)").evaluate( kce_defs)->fail()) return;
  const Set_Frame* sdefs = Set_Frame::down_cast( defs.value());
  if (!sdefs) return;
  Frame all_cats	// must use path, should be *OR*
    = Patrick_Path( Vector< Symbol>( 1, Definition::Category)).evaluate( kce_defs);

  Frame temp = defs;
  Patrick_Statement_List Verb_ED(
				 "((%past =c +)"
				 " (%pred-passive = +)"
				 " (%(concepts class subject) =c agent))");
  bool ed = !(Verb_ED.evaluate( temp)->fail());
  temp = defs;
  Patrick_Statement_List Verb_ING(
		  "((%past = -)"
		  " (%participle =c +)"
		  " (%(concepts valency) =c intransitive))");
  bool ing = !(Verb_ING.evaluate( temp)->fail());
  if (ed || ing) all_cats.unify( (Lexifier::Verb)->f_not());

  // Now assign the categories to each definition in the *OR*
  Frame new_defs;
  for (Iterator i = 0; i < sdefs->values().size(); i++) {
    Frame alt_cats;
    Frame new_def = sdefs->values()[i], defs( new_def);
    Frame this_cat = new_def.slot_value( Definition::Category);
    Frame new_alt_cats( all_cats);
    if (new_alt_cats.unify( this_cat.f_not())->fail())
      new_alt_cats = Undefined_Frame::Value;

    temp = new_def;
    if (ed && Verb_ED.evaluate( temp)->fail()) 
      new_alt_cats.f_or( Lexifier::ED);
    temp = new_def;
    if (ing && Verb_ING.evaluate( temp)->fail()) 
      new_alt_cats.f_or(Lexifier::ING);
    if (!new_alt_cats->undefined()) new_def.unify( Lexifier::Alt_Cats, 
						   new_alt_cats);
    new_defs.f_or( new_def);
  }
  defs = Definition::down_cast( new_defs);
}


// This algorithm is specifically tailored to work with CAT's LE...because it
// sometimes gets head_pos wrong (usually because the head-pos value is obsolete)
// This algorithm should not apply to anything except CTE.
// (It will not work unless a tag has the all-heads attribute set, which is only
// done by the LE. Besides, we haven't yet needed structural tags in KCE anyway.)
// Therefore, this algorithm is purely meant for CTE, not KCE
void adjust_head_pos(Definition& tag, const Vector<Token>& tokens) {
  Frame all_heads = Attributes_All_Heads.evaluate(tag);
  if (all_heads->undefined()) return;
  const Symbol_Frame *heads = Symbol_Frame::down_cast(all_heads.value());
  if (!heads) THROW(Data_Error, << "Invalid all-heads in tag:\n" << tag);
  Frame selection = Attributes_Sel.evaluate(tag);
  if (selection->undefined())
    THROW(Data_Error, << "Missing sel in tag:\n" << tag);
  const Symbol_Frame *sel = Symbol_Frame::down_cast(selection.value());
  int selval;
  if (!sel) THROW(Data_Error, << "Invalid sel in tag:\n" << tag);
  selval = sel->value().int_value();
  
  KString heads_string = heads->value();
  heads_string.dequote();
  Vector<KString> heads_strings = heads_string.split();
  if (heads_strings.size() % 2)
    THROW(Data_Error, << "Attribute all-heads wrong size:\n" << tag);
  Iterator n = heads_strings.size() >> 1;
  if (selval > n || selval < 1)
    THROW(Data_Error, << "Attribute sel is invalid:\n" << tag);
  int offset = heads_strings[(selval - 1) * 2].int_value() - 1;
  
  int current_offset = tokens[0].substring().start();
  Iterator j;
  for (j = 0; current_offset < offset && j < tokens.size(); j++) {
    if (tokens[j].type() == Token_Type::Tag)
      if (tokens[j].substring().length() > 5) {
	KString tmp(tokens[j].substring().string(), 0, 5);
	tmp.capitalization(Capitalization::Uppercase);
	if (tmp != KString("<?CTE"))
	  current_offset++;
      } else current_offset++;
    else if (tokens[j].type() == Token_Type::Entity)
      current_offset++;
    else
      current_offset += tokens[j].substring().length();
    if (j + 1 < tokens.size())
      current_offset += tokens[j + 1].substring().start() - 
			tokens[j].substring().end();
  }
  if (current_offset != offset)
    THROW(Data_Error, << "Invalid all-heads value:\n" << tag);
  
  Patrick_Statement(Attributes_Head_Pos, "<=", 
		    Frame( Symbol( KString_Format(j + 1)))).evaluate(tag);
}


static Symbol Hash("^"), Period(".");


//////////
// Looks for numbers / single chars in input. Returns True if successful
bool try_hashing( const Symbol& input, Symbol& output, Vector< KString>& hashes) {
  const KString& token = input.value();

  // if input is single char, use it.
  if ((token.size() == 1) && isalpha( token[0])) {
    output = Hash;
    hashes += token;
    return True;
  }

  // if input contains numbers, include them all
  bool number_flag = False;
  KString new_hash;
  for (Iterator i = 0; i < token.size(); i++) {
    if (isdigit( token[i])) {
      if (!number_flag) {
	number_flag = True;
	hashes += KString( 1, token[i]);
	new_hash += Hash.value()[1];
      } else {
	hashes[ hashes.size() - 1] += token[i];
    }} else {
      number_flag = False;
      new_hash += token[i];
    }}
  output = Symbol( new_hash);
  return (input != output);
}


static Vector< KString> Default_Vector_KString;

//////////
// The execute function. Will run the main algorithm described
// above. 
Vector< Frame> Lexifier::execute(const Vector< Token>& input) const {
  Vector< Frame> results;
  Iterator i;

  // This vector will indicate what tokens need literal meanings on them.
  Vector< bool> literal_flags( input.size(), True);
  Frame literal_tag;	// becomes a tagname when a literal tag appears
  Iterator literal_token_start;

  // The various strings of every input token.
  Vector< Symbol> input_symbols;
  for (i = 0; i < input.size(); i++)
    input_symbols += Symbol( input[i].substring().string());

  // Construct hash info
  Vector< Symbol> hashed_symbols( input.size(), Symbol::Default);
  Vector< Vector< KString> > hashes( input.size(), Default_Vector_KString);
  bool hashable = False;
  for (i = 0; i < input.size(); i++)
    hashable = try_hashing( input_symbols[i], hashed_symbols[i], hashes[i]) || hashable;

  // Now analyze tokens
  for (i = 0; i < input.size(); i++) {
    Vector< Frame> idiom_results;
    Frame word_results;
    // Create trivial morpheme, or nontrivial one if token is word.
    KString is = input_symbols[i].value();
    is.dequote();
    Set< Morpheme> sm = morphonizer()->run( is);
    for (Iterator m = 0; m < sm.size(); m++) {
      Vector< Definition> idiom_defs;
      Definition word_defs, unifiable_word_defs;
      const KString& root = sm[m].root();
      word_defs = wordifier()->run( root);

      // Call Morphsemizer on words
      if (!(word_defs->undefined() || word_defs->fail())) {
	unifiable_word_defs = word_defs;
	Description desc( word_defs, sm[m]);
	Definition final_defs = morphsemizer()->run( desc);
	Iterator head;
	Subvector< Token> words;
	if (!(final_defs->undefined() || final_defs->fail()) ) {
	  Set_Frame* sf = Set_Frame::down_cast((Proto_Frame&) (final_defs.mutable_value()));
	  const Definition& wr = sf ? Definition::down_cast( sf->values()[0]) : final_defs;
	  words = tokens( wr, head, input, i);
	  Subvector< Token> st( input, i, 1);
	  if (sf) for (Iterator sfi = 0; sfi < sf->values().size(); sfi++)
	    add( Definition::down_cast( sf->mutable_values()[sfi]), st, root);
	  else add( final_defs, st, root);
	  literal_flags[i] = False;
	  word_results.f_or( final_defs);
	}}

      // Now search for idioms.
      Symbol old_idiom = input_symbols[i];
      input_symbols[i] = Symbol( root);
      Idiom idiom( input_symbols, i);
      idiom_defs += unify_definitions( idiomifier()->run( idiom), 
				       unifiable_word_defs);
      input_symbols[i] = old_idiom;
      // If sentence contains hashable elements, try using hashed_symbols
      if (hashable) {
	old_idiom = hashed_symbols[i];
	hashed_symbols[i] = Symbol( root);
	idiom = Idiom( hashed_symbols, i);
	idiom_defs += unify_definitions( idiomifier()->run( idiom), 
					 unifiable_word_defs);
	hashed_symbols[i] = old_idiom;
      }
      

      // Call Morphsemizer to validate/altar idiom definitions
      for (Iterator sdi = 0; sdi < idiom_defs.size(); sdi++) {
        Description desc = Description( idiom_defs[sdi], sm[m]);
	Definition altered_def = morphsemizer()->run( desc);
        if (altered_def->undefined() || altered_def->fail()) continue;

	// Figure out just how many tokens were used in the Definition.
	Set_Frame* sf = (Set_Frame*) Set_Frame::down_cast( altered_def.value());
	const Definition& ad
	  = sf ? Definition::down_cast( sf->values()[0]) : altered_def;
	Iterator head;
	Subvector< Token> words = tokens( ad, head, input, i);
	
	// Figure out hash frames
	Frame hash_frame;
	for (Iterator wi = words.start(); wi < words.end(); wi++)
	  for (Iterator whi = 0; whi < hashes[wi].size(); whi++)
	    hash_frame.f_and( Frame( Symbol( hashes[wi][whi])));

	if (sf) {
	  for (Iterator sfi = 0; sfi < sf->values().size(); sfi++)
	    add( Definition::down_cast( sf->mutable_values()[sfi]), words, root, head, hash_frame);
	} else add( altered_def, words, root, head, hash_frame);
        smcs_checker()->validate( altered_def);
	idiom_results += altered_def;
        for (Iterator li = words.start(); li < words.end(); li++)
          literal_flags[li] = False;
    }}

    // Several special things to do with tags
    Frame wr( word_results);
    if (!(Is_Literal.evaluate( wr)->fail())) {
      const Frame& root = word_results.slot_value( Lexifier::Root);
      if (literal_tag->undefined()) {	// start of literal sequence
	results += word_results;
	literal_tag = root;
	literal_token_start = i+1;
      } else if (literal_tag == root) {
	results += word_results;
	// don't add literal tokens if there are none
	if (literal_token_start < i) {
	  Definition lit_def;
	  Iterator start = input[ literal_token_start-1].substring().end();
	  lit_def.unify( Lexifier::Ortho, Frame( Symbol(
		Substring( input[0].substring().original_string(),
			   start, input[i].substring().start() - start).string())));
	  add( lit_def, Subvector< Token>( input, literal_token_start,
					   i - literal_token_start));
	  results += lit_def;
	}
	literal_tag = Undefined_Frame::Value;
	for (Iterator li = literal_token_start; li < i; li++)
	  literal_flags[li] = False;
	continue;
      }
    }

    // don't analyze, part of literal
    if (literal_tag->undefined()) {
      if (!(word_results->undefined() || word_results->fail())) {
	add_alternate_cats( word_results);
	smcs_checker()->validate( word_results);
	adjust_head_pos( Definition::down_cast( word_results), input);
	results += word_results;
      }
      results += idiom_results;
    } else if (i == input.size() - 1 && literal_token_start < i) {
      // In case of rogue literal, add one def conprising rest of sentence
      Definition lit_def;
      Iterator start = input[ literal_token_start-1].substring().end();
      lit_def.unify( Lexifier::Ortho, Frame( Symbol(
	    Substring(input[0].substring().original_string(),
		      start, input[i].substring().start() - start).string())));
      add( lit_def, Subvector< Token>( input, literal_token_start,
				       i - literal_token_start));
      results += lit_def;
      literal_tag = Undefined_Frame::Value;
      for (Iterator li = literal_token_start; li < i; li++)
	literal_flags[li] = False;
    }
  }

  // Always create literal for final period. (If not done already)
  if (input.size() && (input_symbols[input.size()-1] == Period)) {
    Definition literal_def;
    add( literal_def, Subvector< Token>( input, input.size()-1, 1));
    results += literal_def;
    literal_flags[ input.size() - 1] = False;
  }

  // Fill out Meaningless_Tokens. No single punctuation char may be meaningless.
  Iterator lfi;
  for (lfi = 0; lfi < input.size(); lfi++) {
    if (!literal_flags[lfi]) continue;
    Definition literal_def;
    add( literal_def, Subvector< Token>( input, lfi, 1));
    if (literal_flags[lfi] && (input[lfi].type() != Token_Type::Punctuation)
	&& (input[lfi].type() != Token_Type::Entity))
      literal_def.unify( Symbol( Meaningless), Symbol::Plus);
    results += literal_def;
  }

  return results;
}


