/*	LEXICON.C - functions to load a lexicon file, free lexicons, etc.
 ***************************************************************************
 *
 *	LEXICON *find_lexicon(name,lang)
 *	unsigned char *name;
 *	LANGUAGE *lang;
 *
 *	void free_lexicons(lang)
 *	LANGUAGE *lang;
 *
 *	int load_lexicons(lexiconfile,lang,comment_char)
 *	unsigned char *lexiconfile;
 *	LANGUAGE *lang;
 *	unsigned comment_char;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	18-Jul-89	BLDLEX.C written by Dave Smith
 *			MMEM.C written by Dave Smith
 *	18-Sep-89	Steve McConnel
 *			     - regularize some comments and includes
 *			     - rearrange, label static functions
 *			     - replace malloc() with myalloc()
 *			     - replace free() with myfree()
 *			     - cleaned up linked list removal
 *	19-Sep-89	SRMc - revised definition of getline()
 *			     - allow comments (or blank lines) at beginning
 *				of lexicon file
 *	20-Sep-89	SRMc - protect isspace() with isascii()
 *	21-Sep-89	SRMc - define MAXLEV locally
 *			     - use whiteSpc[] for strtok()
 *			     - replace strtok() with strtok8()
 *			     - change char to unsigned char for 8-bit safety
 *	23-Sep-89	SRMc - rename MORPH.H to KIMMO.H
 *	25-Sep-89	SRMc - finetune messages to screen
 *	26-Sep-89	SRMc - use STAMP's newstyle TRIE
 *			     - replace struct endlst with struct lex_item
 *			     - write add_lex_item()
 *			     - move findCClass() from recogniz.c
 *			     - replace free() with myfree() in freeLexes()
 *			     - write erase_lex_item()
 *	28-Sep-89	SRMc - make Lang global, remove "lang" arguments
 *	29-Sep-89	SRMc - revise definitions of Language and struct
 *				lex_item
 *	30-Sep-89	SRMc - finish changing lexicon storage
 *	 2-Oct-89	SRMc - consolidated lexicon storage was a mistake --
 *				back up to previous scheme
 *			     - rename Language to LANGUAGE
 *			     - replace typedefs ContClass and Alt with
 *				typedef ALTERNATION
 *			     - replace typedef Lexicon with typedef LEXICON
 *	 4-Oct-89	SRMc - rename BLDLEX.C to LOADLEXI.C
 *			     - rename buildLex() to load_lexicon()
 *			     - change all errors to NonFatal
 *			     - moved find_lexicon() from LOADLEXI.C to
 *				FINDLEXI.C
 *	10-Oct-89	SRMc - close lexicon file on error
 *			     - revamp error reporting
 *	12-Oct-89	SRMc - check for lexicon sections not listed as
 *				member of any alternation
 *	14-Oct-89	SRMc - merge LOADLEXI.C, FINDLEXI.C, and part of
 *				MMEM.C to form LEXICON.C
 *			     - rename freeLexes() to free_lexicons()
 *	16-Oct-89	SRMc - allow flexible quotation marking of lexicon
 *				glossary (feature) strings
 *	17-Oct-89	SRMc - clear Lang.initial_lex in free_lexicons()
 *	20-Oct-89	SRMc - eliminate global variables
 *			     - restore LANGUAGE *lang argument to
 *				find_lexicon(), free_lexicons(), and
 *				load_lexicon()
 *	21-Oct-89	SRMc - rename load_lexicon() to load_lexicons()
 *	23-Oct-89	SRMc - fiddle with free_lexicons() and
 *				new_alternation() to prevent error recovery
 *				bug like over in free_rules()
 *	24-Oct-89	SRMc - use Lang.boundary instead of hardwired "#"
 *				for end of word (no continuation) marker
 *				in a lexicon entry
 *			     - require ALTERNATION keyword for each
 *				definition of an alternation, allow such
 *				definitions to split across lines
 *			     - use END keyword only as an (optional) EOF
 *				marker
 *			     - define INCLUDE keyword for allowing lexicon
 *				to be split into several files
 *			     - replace getToken() with get_token()
 *			     - wrote add_alternation() and add_lexicon() to
 *				replace other static functions
 *			     - some delinting
 *	13-Dec-89	SRMc - add filename to report_error() argument list
 *	18-Dec-89	SRMc - edit error message 308
 *	 2-Jan-90	SRMc - add function prototypes, more delinting
 *	 3-Jan-90	SRMc - will we never run out of lint?
 *	24-Jan-90	SRMc - edit error messages
 *	26-Jan-90	SRMc - add automatic ".lex" extension for INCLUDE files
 *	19-Apr-90	EA   - #ifdef for THINK_C
 *	12-Jul-90	SRMc - #ifdef code to squeeze ALTERNATION and LEXICON
 *				structures in load_lexicons(), as suggested by
 *				Greg Lee (lee@uhccux.uhcc.hawaii.edu) for port
 *				to ULTRIX
 *			     - replace "void *" with "VOIDP", as also suggested
 *				by Greg Lee for port to ULTRIX
 *	14-Jul-90	SRMc - fix typo in #ifdef REALLOC_SAFE
 *	25-Feb-91	SRMc - fix bug in loading a lexicon with a large number
 *				of sections
 *			     - fix related bug in loading a lexicon with a
 *				large number of ALTERNATIONs
 *	26-Feb-91	SRMc - replace yesterday's fix altogether -- it was
 *				only 90% working
 *			     - change definitions of ALTERNATION and struct
 *				lex_item to store array indices rather than
 *				pointers into arrays (the arrays may be shifted
 *				around in memory during loading as they grow
 *				in size dynamically)
 *	19-Nov-91	SRMc - in erase_lex_item(), free the nodes themselves
 *	 5-Dec-91	SRMc - add prototypes for Think C 5.0
 *	30-Jan-92	SRMc - move function prototypes to pckimmo.h
 ***************************************************************************
 * Copyright 1989, 1992 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#include "pckimmo.h"

#ifdef BSD
#include <strings.h>
#else
#include <string.h>
#endif

#ifdef UNIX
#define DIRSEPCHAR '/'
#endif
#ifdef MSDOS
#define DIRSEPCHAR '\\'
#endif
#ifdef THINK_C
#define DIRSEPCHAR ':'
#endif

static unsigned char whiteSpc[] = " \t\n\v\f\r";  /* same chars as isspace() */
static unsigned char comment_char;
/*
 *  error messages
 */
static struct message Bad_lex_file =
    { 300, "Lexicon file could not be opened: %s" };
static struct message Empty_lexfile =
    { 301, "No data in lexicon file %s" };
static struct message No_alt_name =
    { 302, "Missing alternation name" };
static struct message Empty_alt =
    { 303, "Empty alternation definition: %s" };
static struct message Already_altern =
    { 304, "Adding to existing alternation %s" };
static struct message No_lexicon_sections =
    { 305, "No lexicon sections in lexicon file %s" };
static struct message No_lex_name =
    { 306, "Missing lexicon name" };
static struct message Unknown_lexicon =
    { 307, "Lexicon section %s is not listed as a member of any alternations"};
static struct message No_cont_class =
    { 308, "Expected continuation class or BOUNDARY symbol for %s" };
static struct message Bad_cont_class =
    { 309, "Invalid continuation class %s for %s" };
static struct message No_glossary =
    { 310, "Expected gloss element for %s" };
static struct message Bad_glossary =
    { 311, "Invalid gloss element %s for %s" };
static struct message Bad_form =
    { 312, "Form contains character not in alphabet: %c" };
static struct message No_init_lexicon =
    { 313, "INITIAL lexicon not found" };
static struct message No_nesting =
    { 314, "Cannot nest lexicon INCLUDE files" };
static struct message Miss_filename =
    { 315, "Missing INCLUDE filename" };
static struct message Bad_filename =
    { 316, "Lexicon INCLUDE file could not be opened: %s" };
static struct message Bad_keyword =
    { 317, "Invalid lexicon file keyword: %s" };

				/* name of the initial lexicon */
#define INITIAL_LEX "INITIAL"
#define MAXLEV 10000		/* maximum depth of trie (ie, infinite) */
/*
 *  macro to test a string for being a valid keyword in a lexicon file
 */
#define is_keyword(token) \
( (strcmp(token,"ALTERNATION")==0) || (strcmp(token,"LEXICON")==0) ||\
  (strcmp(token,"INCLUDE")==0) || (strcmp(token,"END")==0) )

static char *filename = (char *)NULL;

/****************************************************************************
 * NAME
 *    find_lexicon
 * ARGUMENTS
 *    name - pointer to name of a section of the lexicon
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Search the array of lexicon section names for a matching name.
 * RETURN VALUE
 *    pointer to appropriate sublexicon, or NULL if not found
 */
LEXICON *find_lexicon(name,lang)
unsigned char *name;
LANGUAGE *lang;
{
LEXICON *lxp;
register int i;

for (lxp = lang->lex_sections, i = 0 ; i < lang->num_lex_sections ; ++i, ++lxp)
    {
    if (strcmp((char *)lxp->lex_name, (char *)name) == 0)
	return( lxp );		/* found it! */
    }
return( (LEXICON *)NULL );	/* not found */
}

/****************************************************************************
 * NAME
 *    erase_lex_item
 * ARGUMENTS
 *    lexp - pointer to a linked list of lex_item nodes
 * DESCRIPTION
 *    Free the memory used by a linked list of lex_item nodes.
 * RETURN VALUE
 *    none
 */
static void erase_lex_item(lexp)
struct lex_item *lexp;
{
register struct lex_item *lp;

while ((lp = lexp) != (struct lex_item *)NULL)
    {
    lexp = lp->link;
    if (lp->lex_string)
	myfree(lp->lex_string);
    if (lp->lex_gloss)
	myfree(lp->lex_gloss);
    myfree(lp);
    }
}

/****************************************************************************
 * NAME
 *    free_lexicons
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Free the Lexicon related information for the language.
 * RETURN VALUE
 *    none
 */
void free_lexicons(lang)
LANGUAGE *lang;
{
int i;
register ALTERNATION *ap;
register LEXICON *lxp;
/*
 *  free the array of ALTERNATIONs
 */
if (lang->alterns)
    {
    for ( ap = lang->alterns, i = 0 ; i < lang->num_alterns ; ++i, ++ap )
	{
	myfree(ap->alt_name);
	myfree(ap->alt_lexicons);
	}
    myfree(lang->alterns);
    }
lang->alterns = (ALTERNATION *)NULL;
lang->num_alterns = 0;
/*
 *  free the array of LEXICONs
 */
if (lang->lex_sections)
    {
    for (lxp=lang->lex_sections, i=0 ; i < lang->num_lex_sections ; ++i, ++lxp)
	{
	if (lxp->lex_name)
	    myfree(lxp->lex_name);
	if (lxp->lex_storage != (TRIE *)NULL)
	    erase_trie(lxp->lex_storage, erase_lex_item);
	}
    myfree(lang->lex_sections);
    }
lang->lex_sections = (LEXICON *)NULL;
lang->initial_lex  = (LEXICON *)NULL;
lang->num_lex_sections = 0;
}

/****************************************************************************
 * NAME
 *    add_lex_item
 * ARGUMENTS
 *    item - current lex_item node to add the list
 *    list - list of lex_item nodes
 * DESCRIPTION
 *    Add a node to the end of a linked list of lex_item nodes.
 * RETURN VALUE
 *    address of the beginning of the list
 */
static struct lex_item *add_lex_item(item,list)
struct lex_item *item;
struct lex_item *list;
{
register struct lex_item *lp;

item->link = (struct lex_item *)NULL;
/*
 *  if empty list, just return the item
 */
if (list == (struct lex_item *)NULL)
    return(item);
/*
 *  add the item to the end of the list, then return the head of the list
 */
for ( lp = list ; lp->link != (struct lex_item *)NULL ; lp = lp->link )
    ;
lp->link = item;
return(list);
}

/*
 *  numbers of entries currently allocated for dynamically sized arrays
 */
static int size_alterns;	/* size for lang->alterns */
static int size_lex_sections;	/* size for lang->lex_sections */

/****************************************************************************
 * NAME
 *    find_alternation
 * ARGUMENTS
 *    name - pointer to name of an alternation class
 *    lang - pointer to LANGUAGE data structure
 * DESCRIPTION
 *    Search the array of ALTERNATIONs for a matching name.
 * RETURN VALUE
 *    pointer to the matching ALTERNATION, or NULL if not found
 */
static ALTERNATION *find_alternation(name,lang)
unsigned char *name;
LANGUAGE *lang;
{
register ALTERNATION *ap;
register int i;

for ( ap = lang->alterns, i = 0 ; i < lang->num_alterns ; ++i, ++ap )
    {
    if (!strcmp((char *)ap->alt_name,(char *)name))
	return(ap);
    }
return((ALTERNATION *)NULL);
}

/****************************************************************************
 * NAME
 *    new_lexicon
 * ARGUMENTS
 *    name - pointer to name of a lexicon section
 *    lang - pointer to LANGUAGE data structure
 * DESCRIPTION
 *    Add this lexicon subsection to lang->lex_sections.
 * RETURN VALUE
 *    pointer to where the name is stored in lang->lex_sections
 */
static LEXICON *new_lexicon(name,lang)
unsigned char *name;
LANGUAGE *lang;
{
register LEXICON *lxp;
LEXICON *oldlex;
int oldsize;
/*
 *  make sure that this name isn't already used
 */
lxp = find_lexicon(name,lang);
if (lxp != (LEXICON *)NULL)
    return( lxp );		/* name is already there */
/*
 *  if necessary, grow the array of LEXICON structures
 */
if (lang->num_lex_sections >= size_lex_sections)
    {
    oldlex = lang->lex_sections;
    oldsize = size_lex_sections;
    size_lex_sections += 50;
    lang->lex_sections = (LEXICON *)myrealloc(
				lang->lex_sections,
				size_lex_sections * sizeof(LEXICON) );
    }
/*
 *  store a copy of the name and initialize the storage
 */
lxp = &lang->lex_sections[lang->num_lex_sections];
lxp->lex_name = (unsigned char *)mystrdup((char *)name);
lxp->lex_storage = (TRIE *)NULL;
++lang->num_lex_sections;
return( lxp );
}

/****************************************************************************
 * NAME
 *    get_token
 * ARGUMENTS
 *    fp    - input FILE pointer
 *    linep - pointer to file line number
 * DESCRIPTION
 *    Get the next token from a lexicon file.
 * RETURN VALUE
 *    pointer to token found, or NULL if EOF
 */
static unsigned char *get_token(fp, linep)
FILE *fp;
int *linep;
{
static unsigned char *token = (unsigned char *)NULL;
/*
 *  get the next token, reading from the file if necessary
 */
if (*linep == 0)
    token = (unsigned char *)NULL;	/* reset at beginning of file */
else if (token != (unsigned char *)NULL)
    token = strtok8(NULL,whiteSpc);	/* get the next token */

while (token == (unsigned char *)NULL)
    {
    token = getline(fp, linep, comment_char);
    if (token == (unsigned char *)NULL)
	return( (unsigned char *)NULL );	/* signal EOF */
    token = strtok8(token,whiteSpc);
    }
return(token);
}

/****************************************************************************
 * NAME
 *    add_alternation
 * ARGUMENTS
 *    infp  - input FILE pointer
 *    linep - pointer to input file line number
 *    tokp  - address of pointer to the next token from the input file
 *    lang  - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Add an ALTERNATION to the LANGUAGE data structure.
 * RETURN VALUE
 *    zero if okay, -1 if error
 */
static int add_alternation(infp,linep,tokp,lang)
FILE *infp;
int *linep;
unsigned char **tokp;
LANGUAGE *lang;
{
int i;
unsigned char *tok;
ALTERNATION *ap;
LEXICON *lxp;
int size_alt_lex;	/* size for array of alt_lexicons field
			 *   in current entry of lang->alterns */
int num_alt_lex;	/* number of entries actually used in the
			 *   alt_lexicons field of current entry in
			 *   lang->alterns */
ALTERNATION *oldalt;
int oldsize;

tok = get_token(infp,linep);
if ((tok == (unsigned char *)NULL) || is_keyword((char *)tok))
    {
    report_error(FATAL, &No_alt_name, linep, filename);
    return( -1 );
    }
/*
 *  check whether this alternation name was already used
 */
ap = find_alternation(tok,lang);
if (ap != (ALTERNATION *)NULL)
    {
    report_error(NONFATAL, &Already_altern, (int *)NULL, filename, tok);
    for ( i = 0 ; ap->alt_lexicons[i] != -1 ; ++i )
	;
    size_alt_lex = i + 1;	/* size allocated (last one -1) */
    num_alt_lex  = i;		/* index of next one to add */
    }
else
    {
    /*
     *  if necessary, grow the array of ALTERNATION structures
     */
    if (lang->num_alterns >= size_alterns)
	{
	oldalt = lang->alterns;
	oldsize = size_alterns;
	size_alterns += 50;
	lang->alterns = (ALTERNATION *)myrealloc(
					lang->alterns,
					size_alterns * sizeof(ALTERNATION) );
	}
    /*
     *  add a new ALTERNATION
     */
    ap = &lang->alterns[lang->num_alterns];
    ++lang->num_alterns;				/* bump the counter */
							/* save the name */
    ap->alt_name = (unsigned char *)mystrdup((char *)tok);
    /*
     *  initialize memory storage for the lexicon section indexes
     */
    ap->alt_lexicons = (short *)NULL;
    size_alt_lex = 0;
    num_alt_lex  = 0;
    }
/*
 *  read lexicon section names until EOF or a keyword
 */
for (;;)
    {
    tok = get_token(infp,linep);
    if ((tok == (unsigned char *)NULL) || is_keyword((char *)tok))
	break;
    /*
     *  convert the lexicon section name into a LEXICON pointer
     *  if necessary, add it to the array of section names
     */
    lxp = find_lexicon(tok,lang);
    if (lxp == (LEXICON *)NULL)
	{
	lxp = new_lexicon(tok,lang);
	}
    /*
     *  store the LEXICON pointer, after growing the array if necessary
     */
    if (num_alt_lex >= size_alt_lex)
	{
	size_alt_lex += 50;
	ap->alt_lexicons = (short *)myrealloc(
					ap->alt_lexicons,
					size_alt_lex * sizeof(short) );
	}
    ap->alt_lexicons[num_alt_lex] = lxp - lang->lex_sections;
    ++num_alt_lex;
    ap->alt_lexicons[num_alt_lex] = -1;
    }
/*
 *  finished defining this ALTERNATION, so clean up and return
 */
if (ap->alt_lexicons == (short *)NULL)
    report_error(NONFATAL, &Empty_alt, linep, filename, ap->alt_name);
else
    {
    /*
     *  terminate the array of indices into lang->lex_sections
     */
    if (num_alt_lex >= size_alt_lex)
	{
	size_alt_lex = num_alt_lex + 1;
	ap->alt_lexicons = (short *)myrealloc(
					ap->alt_lexicons,
					size_alt_lex * sizeof(short) );
	}
    ap->alt_lexicons[num_alt_lex] = -1;
    ++num_alt_lex;
    /*
     *  squeeze the array to a perfect fit
     */
    if (num_alt_lex < size_alt_lex)
	ap->alt_lexicons = (short *)myrealloc(
					ap->alt_lexicons,
					num_alt_lex * sizeof(short) );
    }
*tokp = tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    add_lexicon
 * ARGUMENTS
 *    infp  - input FILE pointer
 *    linep - pointer to input file line number
 *    tokp  - address of pointer to the next token from the input file
 *    lang  - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Add a LEXICON to the LANGUAGE data structure.
 * RETURN VALUE
 *    zero if okay, -1 if error
 */
static int add_lexicon(infp,linep,tokp,lang)
FILE *infp;
int *linep;
unsigned char **tokp;
LANGUAGE *lang;
{
unsigned char *tok, *tok2, *tok3;
unsigned char *p;
unsigned char quotechar;
LEXICON *lxp;
ALTERNATION *ap;
struct lex_item *lp;
    
tok = get_token(infp,linep);
if ((tok == (unsigned char *)NULL) || is_keyword((char *)tok))
    {
    report_error(FATAL, &No_lex_name, linep, filename);
    return( -1 );
    }
lxp = find_lexicon(tok,lang);
if (lxp == (LEXICON *)NULL)
    {
    if (strcmp((char *)tok,INITIAL_LEX) != 0)
	report_error(NONFATAL, &Unknown_lexicon, linep, filename, tok);
    lxp = new_lexicon(tok,lang);
    }
/*
 *  load lexicon entries until the next keyword or EOF
 */
for (;;)
    {
    tok = get_token(infp,linep);
    if ((tok == (unsigned char *)NULL) || is_keyword((char *)tok))
	break;
    /*
     *  validate the lexical form
     */
    for ( p = tok ; *p != NUL ; ++p )
	{
	if (	(strchr((char *)lang->alphabet,*p) == (char *)NULL) &&
		(*p != lang->null) )
	    {
	    report_error(FATAL, &Bad_form, linep, filename, *p);
	    return( -1 );
	    }
	}
    /*
     *  get and validate the continuation alternation
     */
    tok2 = strtok8((unsigned char *)NULL,whiteSpc);
    if ((tok2 == (unsigned char *)NULL) || is_keyword((char *)tok2))
	{
	report_error(FATAL, &No_cont_class, linep, filename, tok);
	return( -1 );
	}
    ap = find_alternation(tok2,lang);
    if (    (ap == (ALTERNATION *)NULL) &&
	    !((tok2[0] == lang->boundary) && (tok2[1] == NUL)) )
	{
	report_error(FATAL, &Bad_cont_class, linep, filename, tok2, tok);
	return( -1 );
	}
    /*
     *  get the gloss string
     */
    tok3 = strtok8((unsigned char *)NULL,NULL);
    if (tok3 != (unsigned char *)NULL)
	{
	while ((*tok3 != NUL) && isascii(*tok3) && isspace(*tok3))
	    ++tok3;
	}
    if ((tok3 == (unsigned char *)NULL) || (*tok3 == NUL))
	{
	report_error(FATAL, &No_glossary, linep, filename, tok);
	return( -1 );
	}
    quotechar = *tok3++;
    p = (unsigned char *)strchr((char *)tok3, quotechar);
    if (p == (unsigned char *)NULL)
	{
	report_error(FATAL, &Bad_glossary, linep, filename, tok3-1, tok);
	return( -1 );
	}
    *p = NUL;
    /*
     *  add this entry to the lexicon TRIE storage
     */
    lp = (struct lex_item *)myalloc(sizeof(struct lex_item));
    lp->link       = (struct lex_item *)NULL;
    lp->lex_string = (unsigned char *)mystrdup((char *)tok);
    lp->lex_continue = ap - lang->alterns;
    lp->lex_gloss = (unsigned char *)mystrdup((char *)tok3);
    lxp->lex_storage = add_to_trie(lxp->lex_storage, (char *)tok, lp,
				   (VOIDP(*)())add_lex_item, MAXLEV);
    }
/*
 *  finished reading this lexicon section
 */
*tokp = tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    load_lexicons
 * ARGUMENTS
 *    lexiconfile - name of the lexicon file
 *    lang        - pointer to a LANGUAGE data structure
 *    comment     - comment character for input file
 * DESCRIPTION
 *    Build the internal lexicon from the lexicon file.
 * RETURN VALUE
 *    0 if okay, -1 if error
 */
int load_lexicons(lexiconfile,lang,comment)
unsigned char *lexiconfile;
LANGUAGE *lang;
unsigned comment;
{
FILE *lex_fp, *include_fp, *infp;
unsigned char *tok;
int line_num, save_line_num;
unsigned char *p;
/*
 *  open the Lexicon file
 */
if ( (lex_fp = fopen((char *)lexiconfile, "r")) == (FILE *)NULL )
    {
    report_error(FATAL, &Bad_lex_file, (int *)NULL, (char *)NULL, lexiconfile);
    return(-1);
    }
fprintf(stderr, "Lexicon being loaded from %s\n", lexiconfile );
filename = (char *)lexiconfile;
/*
 *  initialize the arrays of ALTERNATIONs and LEXICONs
 */
lang->alterns = (ALTERNATION *)NULL;
lang->num_alterns = 0;
size_alterns = 0;
lang->lex_sections = (LEXICON *)NULL;
lang->num_lex_sections = 0;
size_lex_sections = 0;

comment_char = comment;
line_num = 0;
save_line_num = 0;
infp = lex_fp;
include_fp = (FILE *)NULL;

for ( tok = get_token(infp,&line_num) ; ; )
    {
    if (    (tok == (unsigned char *)NULL) ||		/* hard EOF */
	    (strcmp((char *)tok, "END") == 0) )		/* soft EOF marker */
	{
	if (infp == lex_fp)
	    break;			/* end of the lexicon file */
	else
	    {				/* end of an INCLUDEd lexicon file */
	    fclose(include_fp);
	    myfree(filename);
	    include_fp = (FILE *)NULL;
	    infp = lex_fp;
	    filename = (char *)lexiconfile;
	    line_num = save_line_num;
	    tok = get_token(infp,&line_num);
	    }
	}
    else if (strcmp((char *)tok, "ALTERNATION") == 0)
	{				/* define an ALTERNATION class */
	if (add_alternation(infp,&line_num,&tok,lang) == -1)
	    goto bad_lexiconfile;
	}
    else if (strcmp((char *)tok, "LEXICON") == 0)
	{
	if (add_lexicon(infp,&line_num,&tok,lang) == -1)
	    goto bad_lexiconfile;
	}
    else if (strcmp((char *)tok, "INCLUDE") == 0)
	{
	if (infp == include_fp)
	    {
	    report_error(FATAL, &No_nesting, &line_num, filename );
	    goto bad_lexiconfile;
	    }
	tok = get_token(infp,&line_num);
	if (tok == (unsigned char *)NULL)
	    {
	    report_error(FATAL, &Miss_filename, &line_num, filename );
	    goto bad_lexiconfile;
	    }
	tok = (unsigned char *)mystrdup((char *)tok);
#ifdef DIRSEPCHAR
	p = (unsigned char *)strrchr((char *)tok, DIRSEPCHAR);
	if (p != (unsigned char *)NULL)
	    ++p;
	else
#endif
	    p = tok;
	if (strchr((char *)p,'.') == (char *)NULL)
	    {
	    tok =(unsigned char *)myrealloc((char *)tok,
					    (unsigned)strlen((char *)tok)+5);
	    strcat((char *)tok,".lex");
	    }
	include_fp = fopen((char *)tok,"r");
	if (include_fp == (FILE *)NULL)
	    {
	    report_error(FATAL, &Bad_filename, &line_num, filename, tok );
	    goto bad_lexiconfile;
	    }
	save_line_num = line_num;
	infp = include_fp;
	filename = (char *)tok;
	line_num = 0;
	tok = get_token(infp,&line_num);	/* start the INCLUDE file */
	}
    else
	{
	report_error(FATAL, &Bad_keyword, &line_num, filename, tok);
	goto bad_lexiconfile;
	}
    }
if ((lang->num_alterns == 0) && (lang->num_lex_sections == 0))
    {
    report_error(FATAL, &Empty_lexfile, (int *)NULL, (char *)NULL,lexiconfile);
    goto bad_lexiconfile;
    }
if (lang->num_lex_sections == 0)
    {
    report_error(FATAL, &No_lexicon_sections, (int *)NULL, (char *)NULL,
								lexiconfile);
    goto bad_lexiconfile;
    }
	/*
	 *  This code causes problems on UNIX PC's (7300, 3b1).  Maybe some
	 *  realloc()'s can move a block if it's shorter, rather than leaving
	 *  it in place?  This would really foul up pointers into the data!
	 *  Well, since UNIX usually has more memory than PC's...
	 *
	 *  verson 1.0.5 has no pointers into these arrays stored in the data
	 *  -- it turned out to be a generally losing idea
	 */
#define REALLOC_SAFE
#ifdef REALLOC_SAFE
/*
 *  squeeze the array of ALTERNATION structures to a perfect fit
 */
if (lang->num_alterns < size_alterns)
    lang->alterns = (ALTERNATION *)myrealloc(
				lang->alterns,
				lang->num_alterns * sizeof(ALTERNATION));
/*
 *  squeeze the array of LEXICON structures to a perfect fit
 */
if (lang->num_lex_sections < size_lex_sections)
    {
    lang->lex_sections = (LEXICON *)myrealloc(
				lang->lex_sections,
				lang->num_lex_sections * sizeof(LEXICON));
    }
#endif
/*
 *  make sure we have an "initial" lexicon
 */
lang->initial_lex = find_lexicon((unsigned char *)INITIAL_LEX,lang);
if (lang->initial_lex == (LEXICON *)NULL)
    {
    report_error(FATAL, &No_init_lexicon, (int *)NULL, (char *)NULL);
    goto bad_lexiconfile;
    }
fclose(lex_fp);
return(0);

/****************************** ERROR RETURN *******************************/
bad_lexiconfile:

if (include_fp != (FILE *)NULL)
    {
    fclose(include_fp);
    myfree(filename);
    }
fclose(lex_fp);
free_lexicons(lang);
return( -1 );
}
