/*	RULES.C - functions to load a rules file, free rules, etc.
 ***************************************************************************
 *
 *	void free_rules(lang)
 *	LANGUAGE *lang;
 *
 *	void feasible_pairs(lang)
 *	LANGUAGE *lang;
 *
 *	int load_rules(rulefile, lang, comment)
 *	unsigned char *rulefile;
 *	LANGUAGE *lang;
 *	unsigned comment;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	30-May-88	GETALPH.C written by Dave Smith
 *	15-Jul-89	GETRULES.C written by Dave Smith
 *	18-Jul-89	MMEM.C written by Dave Smith
 *	21-Aug-88	GETPAIRS.C written by Dave Smith
 *	18-Sep-89	SRMc - regularize some comments and includes
 *			     - replace malloc() with myalloc()
 *			     - replace free() with myfree()
 *			     - cleaned up linked list removal
 *			     - replace (bogus) index() with strpos()
 *	19-Sep-89	SRMc - revised getline() to read arbitrarily long
 *				input, using internal static buffer to start
 *			     - redefined the subsets field of Language, added
 *				the numsubsets field
 *			     - revised memory allocation strategy for the
 *				automata field of Language
 *			     - redefined the name field of Rule
 *	20-Sep-89	SRMc - allow tab as well as space to separate tokens
 *			     - protect isspace() with isascii()
 *	21-Sep-89	SRMc - define MAXALPH locally
 *			     - define MAXFP locally
 *			     - rewrite getRulTok() to use strtok8()
 *			     - change char to unsigned char for 8-bit safety
 *			     - replace strspn() and strcspn() with strtok8()
 *	23-Sep-89	SRMc - rename MORPH.H to KIMMO.H
 *	25-Sep-89	SRMc - move MAXLINELEN definition to KIMMO.H
 *			     - finetune screen displays
 *			     - revise the specificity calculations in
 *				getRules(), write specificity()
 *			     - fix bug in getline() dynamic memory
 *			     - change getline()'s handling of comments
 *	26-Sep-89	SRMc - add rule_active field to Rule struct
 *			     - remove rulState field from Language struct
 *			     - replace free() with myfree() in freeRules()
 *	27-Sep-89	SRMc - replace typedef Rule with typedef RULE
 *			     - write find_subset()
 *	28-Sep-89	SRMc - make Lang global, remove "lang" arguments
 *	 2-Oct-89	SRMc - rename Language to LANGUAGE
 *			     - rename ResNode to RESULT
 *	 9-Oct-89	SRMc - clean up the error message reporting to use
 *				report_error(), and to not abort the program
 *			     - change filename from GETALPH.C to GETALPHA.C
 *			     - check return value of getAlphabet()
 *	10-Oct-89	SRMc - finish cleaning up error checking
 *	11-Oct-89	SRMc - revise storage of subsets to use SUBSET struct
 *	13-Oct-89	SRMc - add Lang.boundary field and BOUNDARY_CHAR type
 *	14-Oct-89	SRMc - move getline() to GETLINE.C
 *			     - merge GETALPHA.C, GETRULES.C, and part of
 *				MMEM.C to form LOADRULE.C
 *			     - rename freeRules() to free_rules()
 *	18-Oct-89	SRMc - fix bug in missing numbers of rows or columns
 *				altogether in a rule
 *	19-Oct-89	SRMc - use RULE keyword to start a rule rather than
 *				a double quote character (")
 *			     - use getRulTok() in loading automata as well
 *				as the alphabet information
 *			     - check that every rule covers every feasible
 *				pair
 *	20-Oct-89	SRMc - revise error messages
 *			     - rename file from LOADRULE.C to RULES.C
 *			     - merge GETPAIRS.C into RULES.C
 *				(40K source file -- so what?)
 *			     - rename getRules() to load_rules()
 *			     - merge getAlphabet() and getNBline() into
 *				load_rules()
 *			     - rename getFpairs to feasible_pairs()
 *			     - eliminate global variables, restore LANGUAGE *
 *				argument to free_rules(), load_rules(),
 *				feasible_pairs(), and find_subset()
 *	21-Oct-89	SRMc - comment character is parameter for getline(),
 *				and thus for load_rules()
 *	23-Oct-89	SRMc - fix bug in free_rules() triggered by aborting
 *				an invalid rules file
 *	24-Oct-89	SRMc - fix bug in calling strtok8(NULL,NULL)
 *			     - BOUNDARY character is now mandatory
 *			     - fix bugs in calls to report_error()
 *			     - rename rulesFp to rules_fp
 *			     - replace getRulTok() with get_token()
 *			     - rewrite load_rules() to use subfunctions, and
 *				to treat all keywords (except ALPHABET)
 *				equally
 *			     - some delinting
 *	13-Dec-89	SRMc - add filename to report_error() argument list
 *	18-Dec-89	SRMc - edit error message 230
 *	 2-Jan-90	SRMc - add function prototypes, more delinting
 *	 3-Jan-90	SRMc - will we never run out of lint?
 *	24-Jan-90	SRMc - edit error messages
 *	25-Jan-90	SRMc - add error check for duplicated subset name
 *	 6-Feb-90	SRMc - add error check for NULL:NULL column header
 *	19-Apr-90	EA   - #ifdef for THINK_C
 *	12-Jul-90	SRMc - replace "void *" with "VOIDP", as suggested
 *				by Greg Lee (lee@uhccux.uhcc.hawaii.edu) for
 *				port to ULTRIX
 *	26-Feb-91	SRMc - type cast argument to report_error() for the
 *				benefit of THINK C
 *	 5-Dec-91	SRMc - add prototypes for Think C 5.0
 *	30-Jan-92	SRMc - move function prototypes to pckimmo.h
 ***************************************************************************
 * Copyright 1989, 1992 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#include "pckimmo.h"

#ifdef BSD
#include <strings.h>
#else
#include <string.h>
#endif

#ifdef __STDC__
#define P(s) s
#else
#define P(s) ()
#endif

static unsigned char *find_subset P((unsigned char *name, LANGUAGE *lang) );
static void prune_pairs P((RULE *rulep, LANGUAGE *lang));
static int set_alphabet P((unsigned char **tokp, LANGUAGE *lang));
static int set_any P((LANGUAGE *lang));
static int set_null P((LANGUAGE *lang));
static int set_boundary P((LANGUAGE *lang));
static int add_subset P((unsigned char **tokp, LANGUAGE *lang));
static int add_rule P((LANGUAGE *lang));

/* standard library functions */
#ifndef DJGPP
extern int atoi P((char *string));
#endif

#undef P

static unsigned char whiteSpc[] = " \t\n\v\f\r";  /* same chars as isspace() */

static FILE *rules_fp;		/* input rules FILE pointer */
static char *filename;		/* input rules file name */
static int line_num;		/* input file line number */
static unsigned char comment_char; /* comment character for input files */
static int size_subsets;
static int size_rules;
/*
 *  error messages
 */
static struct message Bad_rule_file =
    { 200, "Rules file could not be opened: %s" };
static struct message Unexpected_EOF =
    { 201, "Unexpected end of rules file: %s" };
static struct message Expected_ALPHABET =
    { 202, "Expected ALPHABET keyword" };
static struct message No_ALPHABET_value =
    { 203, "Alphabet contains no members" };
static struct message Too_many_alpha =
    { 204, "Too many characters in the alphabet" };
static struct message Char_in_alpha =
    { 205, "Character is already in the alphabet: %c" };
static struct message No_NULL_value =
    { 206, "No value given for NULL keyword" };
static struct message Bad_NULL_value =
    { 207, "Value given for NULL symbol was already declared as alphabetic: %c" };
static struct message Already_have_NULL =
    { 208, "The NULL symbol has already been defined" };
static struct message Same_NULL_ANY =
    { 209, "Value given for NULL symbol was already declared for ANY" };
static struct message Same_NULL_BOUND =
    { 210, "Value given for NULL symbol was already declared for BOUNDARY" };
static struct message No_ANY_value =
    { 211, "No value given for ANY keyword" };
static struct message Bad_ANY_value =
    { 212, "Value given for ANY symbol was already declared as alphabetic: %c" };
static struct message Already_have_ANY =
    { 213, "The ANY symbol has already been defined" };
static struct message Same_ANY_NULL =
    { 214, "Value given for ANY symbol was already declared for NULL" };
static struct message Same_ANY_BOUND =
    { 215, "Value given for ANY symbol was already declared for BOUNDARY" };
static struct message No_BOUND_value =
    { 216, "No value given for BOUNDARY keyword" };
static struct message Bad_BOUND_value =
    { 217, "Value given for BOUNDARY symbol was already declared as alphabetic: %c" };
static struct message Already_have_BOUND =
    { 218, "The BOUNDARY symbol has already been defined" };
static struct message Same_BOUND_NULL =
    { 219, "Value given for BOUNDARY symbol was already declared for NULL" };
static struct message Same_BOUND_ANY =
    { 220, "Value given for BOUNDARY symbol was already declared for ANY" };
static struct message No_SUBSET_name =
    { 221, "Subset name not given" };
static struct message Bad_SUBSET_name =
    { 222, "Subset name %s is not unique" };
static struct message Empty_subset =
    { 223, "Subset %s contains no members" };
static struct message Bad_SUBSET_value =
    { 224, "Subset %s contains a nonalphabetic character: %c" };
static struct message Char_in_SUBSET =
    { 225, "Subset %s already contains %c" };
static struct message Invalid_keyword =
    { 226, "Invalid keyword: %s" };
static struct message No_ANY_char =
    { 227, "ANY symbol not defined" };
static struct message No_NULL_char =
    { 228, "NULL symbol not defined" };
static struct message No_BOUNDARY_char =
    { 229, "BOUNDARY symbol not defined" };
static struct message Bad_rule_name =
    { 230, "Missing closing delimiter for the name of a rule: %s" };
static struct message Invalid_rows_num =
    { 231, "Invalid number of rows: %s" };
static struct message Invalid_cols_num =
    { 232, "Invalid number of columns: %s" };
static struct message Bad_state_num =
    { 233, "Invalid state number: %s" };
static struct message Bad_final_mark =
    { 234, "Expected final (:) or nonfinal (.) state indicator: %c" };
static struct message Bad_table_entry =
    { 235, "State table entry out of range: %s" };
static struct message Bad_lex_char =
    { 236, "Lexical character not in alphabet: %s" };
static struct message Bad_surf_char =
    { 237, "Surface character not in alphabet: %s" };
static struct message Bad_table_char =
    { 238,  "Nonnumeric character in state table: %c" };
static struct message Bad_column_head =
    { 239, "Rule number %d, column %d pairs a BOUNDARY symbol with something else: %s:%s" };
static struct message No_feasible =
    { 240, "No feasible pairs for this set of rules" };
static struct message Column_conflict =
    { 241, "RULE %d (%s) - %c:%c specified by both columns %d (%s:%s) and %d (%s:%s)" };
static struct message Miss_feasible =
    { 242, "RULE %d (%s) - %c:%c not specified by any column" };
static struct message Bad_column_head2 =
    { 243, "Rule number %d, column %d pairs two NULL symbols: %s:%s" };

#define MAXALPH	255	/* maximum number of alphabetic characters */
/*
 *  macro to check whether a token is a rules file keyword
 */
#define is_keyword(token) \
( (strcmp(token,"ALPHABET")==0) || (strcmp(token,"NULL")==0) || \
  (strcmp(token,"ANY")==0) || (strcmp(token,"BOUNDARY")==0) || \
  (strcmp(token,"SUBSET")==0) || (strcmp(token,"RULE")==0) || \
  (strcmp(token,"END")==0) )

/****************************************************************************
 * NAME
 *    free_rules
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Free the memory used for storing rules for the language.
 * RETURN VALUE
 *    none
 */
void free_rules(lang)
LANGUAGE *lang;
{
int i, j;
register RULE *rp;
register struct fsa_column *cp;
/*
 *  clear the stored alphabet information
 */
myfree(lang->alphabet);
lang->alphabet = (unsigned char *)NULL;
lang->null = NUL;
lang->any  = NUL;
lang->boundary = NUL;

if (lang->subsets)
    {
    for ( i = 0 ; i < lang->numsubsets ; ++i )
	{
	if (lang->subsets[i].name != (unsigned char *)NULL)
	    myfree(lang->subsets[i].name);
	if (lang->subsets[i].members != (unsigned char *)NULL)
	    myfree(lang->subsets[i].members);
	}
    myfree( lang->subsets );
    }
lang->subsets = (SUBSET *)NULL;
lang->numsubsets = 0;
/*
 *  clear the feasible pairs
 */
myfree(lang->lex_pair);
lang->lex_pair = (unsigned char *)NULL;
myfree(lang->surf_pair);
lang->surf_pair = (unsigned char *)NULL;
lang->num_pairs = 0;
/*
 *  clear the automata
 */
if (lang->automata)
    {
    for ( rp = lang->automata, i = 0 ; i < lang->num_rules ; ++i, ++rp )
	{
	myfree(rp->name);
	if (rp->columns)
	    {
	    for ( cp = rp->columns, j = 0 ; j < rp->num_cols ; ++j, ++cp )
		{
		if (cp->lex_name)
		    myfree( cp->lex_name );
		if (cp->lex_chars)
		    myfree( cp->lex_chars );
		if (cp->surf_name)
		    myfree( cp->surf_name );
		if (cp->surf_chars)
		    myfree( cp->surf_chars );
		if (cp->transitions)
		    myfree( cp->transitions );
		}
	    myfree( rp->columns );
	    }
	myfree(rp->final_states);
	}
    myfree(lang->automata);
    }
lang->automata = (RULE *)NULL;
lang->num_rules = 0;
}

/****************************************************************************
 * NAME
 *    find_subset
 * ARGUMENTS
 *    name - pointer to the name of the subset
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Search for a matching alphabet subset, and return a pointer to the
 *    associated string of characters.
 * RETURN VALUE
 *    pointer to the string of alphabetic characters associated with the
 *    subset name, or NULL if name is not associated with a subset
 */
static unsigned char *find_subset(name,lang)
unsigned char *name;
LANGUAGE *lang;
{
register int k;
register SUBSET *sp;

for ( sp = lang->subsets, k = 0 ; k < lang->numsubsets ; ++k, ++sp )
    {
    if (!strcmp((char *)sp->name,(char *)name))
	return(sp->members);
    }
return((unsigned char *)NULL);
}

/****************************************************************************
 * NAME
 *    prune_pairs
 * ARGUMENTS
 *    rulep - pointer to a RULE
 *    lang  - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    For each of the columns of a RULE, trim feasible pairs that are
 *    subsumed by another column of lower specificity.
 *    Also check for overlapping feasible pairs in columns of equal
 *    specificity.
 * RETURN VALUE
 *    none
 */
static void prune_pairs(rulep,lang)
RULE *rulep;
LANGUAGE *lang;
{
int i, j;
register struct fsa_column *cp, *xp;
int ck, cl, cnum, xk, xnum;

for ( cp = rulep->columns, i = 0 ; i < rulep->num_cols ; ++i, ++cp )
    {
    for ( xp = rulep->columns, j = 0 ; j < rulep->num_cols ; ++j, ++xp )
	{
	if (xp->precedence >= cp->precedence)
	    continue;
	/*
	 *  column xp has lower specificity than column cp, so if any
	 *    feasible pair in xp is also in cp, remove it from cp
	 */
	for ( xnum = strlen((char *)xp->lex_chars), xk = 0 ; xk < xnum ; ++xk )
	    {
	    for ( cnum=strlen((char *)cp->lex_chars), ck=0 ; ck < cnum ; ++ck )
		{
		if (	(xp->lex_chars[xk] == cp->lex_chars[ck]) &&
			(xp->surf_chars[xk] == cp->surf_chars[ck]) )
		    {
		    /*
		     *  remove cp->lex_chars[ck] and cp->surf_chars[ck]
		     */
		    for ( cl = ck ; cl < cnum ; ++cl)
		        {
			cp->lex_chars[cl]  = cp->lex_chars[cl+1];
			cp->surf_chars[cl] = cp->surf_chars[cl+1];
			}
		    --cnum;		/* fewer pairs to check now */
		    cp->lex_chars[cnum] = NUL;
		    cp->surf_chars[cnum] = NUL;
		    }
		}
	    }
	}
    }
/*
 *  complain if there's any remaining overlap
 */
for ( cp = rulep->columns, i = 0 ; i < rulep->num_cols ; ++i, ++cp )
    {
    for (xp = &rulep->columns[i+1], j = i+1 ; j < rulep->num_cols ; ++j, ++xp)
	{
	for ( xnum = strlen((char *)xp->lex_chars), xk = 0 ; xk < xnum ; ++xk )
	    {
	    for ( cnum=strlen((char *)cp->lex_chars), ck=0 ; ck < cnum ; ++ck )
		{
		if (	(xp->lex_chars[xk] == cp->lex_chars[ck]) &&
			(xp->surf_chars[xk] == cp->surf_chars[ck]) )
		    {
		    report_error(NONFATAL, &Column_conflict, (int *)NULL,
				 (char *)NULL,
				 (int)(rulep - lang->automata)+1,
				 rulep->name,
				 xp->lex_chars[xk], xp->surf_chars[xk],
				 i+1, cp->lex_name, cp->surf_name,
				 j+1, xp->lex_name, xp->surf_name );
		    }
		}
	    }
	}
    }
}

/****************************************************************************
 * NAME
 *    feasible_pairs
 * ARGUMENTS
 *    lang - pointer to LANGUAGE data structure
 * DESCRIPTION
 *    Compute the feasible pairs for the active set of rules.
 * RETURN VALUE
 *    none
 */
void feasible_pairs(lang)
LANGUAGE *lang;
{
RULE *rp;
struct fsa_column *cp;
int i, j, k, x;
unsigned char *lexp, *lexsub;
unsigned char *surfp, *surfsub;
int num_pairs, num_allocated;
unsigned char *check_pairs;

if ( lang->automata == (RULE *)NULL )
    return;
/*
 *  release any previously allocated space
 */
for ( rp = lang->automata, i = 0 ; i < lang->num_rules ; ++i, ++rp )
    {
    for ( cp = rp->columns, j = 0 ; j < rp->num_cols ; ++j, ++cp )
	{
	if (cp->lex_chars)
	    myfree(cp->lex_chars);
	if (cp->surf_chars)
	    myfree(cp->surf_chars);
	cp->precedence = 0;
	}
    }
/*
 *  accumulate all the feasible pairs across all of the active rules
 */
num_pairs = 0;
num_allocated = 0;
lexp  = (unsigned char *)NULL;
surfp = (unsigned char *)NULL;
for ( rp = lang->automata, i = 0 ; i < lang->num_rules ; ++i, ++rp )
    {
    if (!rp->rule_active)
	continue;
    for ( cp = rp->columns, j = 0 ; j < rp->num_cols ; ++j, ++cp )
	{
	if (	!((	(cp->lex_type == ALPHABET_CHAR) ||
			(cp->lex_type == BOUNDARY_CHAR) ||
			(cp->lex_type == NULL_CHAR) )
		    &&
		(	(cp->surf_type == ALPHABET_CHAR) ||
			(cp->surf_type == BOUNDARY_CHAR) ||
			(cp->surf_type == NULL_CHAR) ))  )
	    continue;			/* need both alphabetic or null */
	if (lexp == (unsigned char *)NULL)
	    {
	    /*
	     *  allocate space for the very first feasible pair
	     */
	    num_allocated = 100;
	    lexp  = (unsigned char *)myalloc( num_allocated+1 );
	    strcpy((char *)lexp, (char *)cp->lex_name);
	    surfp = (unsigned char *)myalloc( num_allocated+1 );
	    strcpy((char *)surfp, (char *)cp->surf_name);
	    num_pairs = 1;
	    continue;
	    }
	/*
	 *  check whether this feasible pair is already listed
	 */
	for ( k = 0 ; k < num_pairs ; ++k )
	    {
	    if ((lexp[k] == *cp->lex_name) && (surfp[k] == *cp->surf_name))
		break;			/* ==> k < num_pairs */
	    }
	if (k == num_pairs)
	    {
	    /*
	     *  add this feasible pair to the list
	     */
	    if (num_pairs == num_allocated)
		{
		num_allocated += 100;
		lexp  = (unsigned char *)myrealloc(lexp, num_allocated+1);
		surfp = (unsigned char *)myrealloc(surfp, num_allocated+1);
		}
	    strcat((char *)lexp,(char *)cp->lex_name);
	    strcat((char *)surfp,(char *)cp->surf_name);
	    ++num_pairs;
	    }
	}
    }
if (num_pairs == 0)
    {
    report_error(NONFATAL, &No_feasible, (int *)NULL, (char *)NULL);
    lang->num_pairs = 0;
    lang->lex_pair = (unsigned char *)NULL;
    lang->surf_pair = (unsigned char *)NULL;
    return;
    }
if (num_pairs != num_allocated)
    {
    lexp  = (unsigned char *)myrealloc(lexp, num_pairs+1);
    surfp = (unsigned char *)myrealloc(surfp, num_pairs+1);
    }
/*
 *  allocate array for verifying pairs
 */
check_pairs = (unsigned char *)myalloc(num_pairs);
/*
 *  now, fill in the feasible pairs and precedence for each column of
 *  each rule
 */
for ( rp = lang->automata, i = 0 ; i < lang->num_rules ; ++i, ++rp )
    {
    if (!rp->rule_active)
	continue;
    for ( cp = rp->columns, j = 0 ; j < rp->num_cols ; ++j, ++cp )
	{
	cp->lex_chars = (unsigned char *)myalloc(num_pairs+1);
	cp->surf_chars = (unsigned char *)myalloc(num_pairs+1);
	/*
	 *  *:* - any pair
	 */
	if ((cp->lex_type==ANY_CHAR) && (cp->surf_type==ANY_CHAR))
	    {
	    strcpy((char *)cp->lex_chars,(char *)lexp);
	    strcpy((char *)cp->surf_chars,(char *)surfp);
	    }
	/*
	 *  *:S - any pair with surface character in subset
	 */
	else if ((cp->lex_type==ANY_CHAR) && (cp->surf_type==SUBSET_CHAR))
	    {
	    surfsub = find_subset(cp->surf_name, lang);
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (strchr((char *)surfsub,surfp[k]) != (char *)NULL)
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  *:c or *:0 - any pair with matching surface character
	 */
	else if (cp->lex_type==ANY_CHAR)
	    {
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (surfp[k] == *cp->surf_name)
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  S:* - any pair with lexical character in subset
	 */
	else if ((cp->lex_type==SUBSET_CHAR) && (cp->surf_type==ANY_CHAR))
	    {
	    lexsub = find_subset(cp->lex_name, lang);
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (strchr((char *)lexsub,lexp[k]) != (char *)NULL)
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  S:S - any pair with lexical character and surface character each
	 *		in appropriate subsets
	 */
	else if ((cp->lex_type==SUBSET_CHAR) && (cp->surf_type==SUBSET_CHAR))
	    {
	    lexsub  = find_subset(cp->lex_name, lang);
	    surfsub = find_subset(cp->surf_name, lang);
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (	(strchr((char *)lexsub,lexp[k]) != (char *)NULL) &&
			(strchr((char *)surfsub,surfp[k]) != (char *)NULL) )
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  S:c or S:0 - any pair with lexical character in subset and
	 *		matching surface character
	 */
	else if (cp->lex_type==SUBSET_CHAR)
	    {
	    lexsub  = find_subset(cp->lex_name, lang);
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (	(strchr((char *)lexsub,lexp[k]) != (char *)NULL) &&
			(surfp[k] == *cp->surf_name) )
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  c:* or 0:* - any pair with matching lexical character
	 */
	else if (cp->surf_type==ANY_CHAR)
	    {
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (lexp[k] == *cp->lex_name)
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  c:S or 0:S - any pair with matching lexical character and
	 *		surface character in subset
	 */
	else if (cp->surf_type==SUBSET_CHAR)
	    {
	    surfsub  = find_subset(cp->surf_name, lang);
	    for ( x = 0, k = 0 ; k < num_pairs ; ++k )
		{
		if (	(lexp[k] == *cp->lex_name) &&
			(strchr((char *)surfsub,surfp[k]) != (char *)NULL) )
		    {
		    cp->lex_chars[x] = lexp[k];
		    cp->surf_chars[x] = surfp[k];
		    ++x;
		    }
		}
	    cp->lex_chars[x] = '\0';
	    cp->surf_chars[x] = '\0';
	    }
	/*
	 *  c:c  or c:0 or 0:c or 0:0 - only the matching pair
	 */
	else
	    {
	    cp->lex_chars[0] = *cp->lex_name;
	    cp->surf_chars[0] = *cp->surf_name;
	    cp->lex_chars[1] = '\0';
	    cp->surf_chars[1] = '\0';
	    }
	cp->precedence = strlen((char *)cp->lex_chars);
	if (cp->precedence < num_pairs)
	    {
	    cp->lex_chars = (unsigned char *)
				myrealloc(cp->lex_chars, cp->precedence +1);
	    cp->surf_chars = (unsigned char *)
				myrealloc(cp->surf_chars, cp->precedence +1);
	    }
	}
    /*
     * now, prune the pairs stored for each column
     */
    prune_pairs( rp, lang );
    /*
     *  verify that this rule spans all possible feasible pairs
     */
    memset((char *)check_pairs,NUL,num_pairs);
    for ( cp = rp->columns, j = 0 ; j < rp->num_cols ; ++j, ++cp )
	{
	for ( x = 0 ; x < strlen((char *)cp->lex_chars) ; ++x )
	    {
	    for ( k = 0 ; k < num_pairs ; ++k )
		{
		if (	(cp->lex_chars[x] == lexp[k]) &&
			(cp->surf_chars[x] == surfp[k]) )
		    {
		    check_pairs[k] = 1;
		    break;
		    }
		}
	    }
	}
    for ( k = 0 ; k < num_pairs ; ++k )
	{
	if (!check_pairs[k])
	    {
	    report_error(NONFATAL, &Miss_feasible, (int *)NULL, (char *)NULL,
				i+1, rp->name, lexp[k], surfp[k] );
	    }
	}
    }
myfree(check_pairs);
/*
 *  allocate storage and copy feasible pairs
 */
lang->num_pairs = num_pairs;
lang->lex_pair = lexp;
lang->surf_pair = surfp;
}

/****************************************************************************
 * NAME
 *    get_token
 * ARGUMENTS
 *    none
 * DESCRIPTION
 *    Get the next token from the rules file.
 * RETURN VALUE
 *    pointer to the token string, or NULL if EOF
 */
static unsigned char *get_token()
{
static unsigned char *token = (unsigned char *)NULL;
/*
 *  get the next token, reading from the file if necessary
 */
if (line_num == 0)
    token = (unsigned char *)NULL;	/* reset at beginning of file */
else if (token != (unsigned char *)NULL)
    token = strtok8(NULL,whiteSpc);	/* get the next token */

while (token == (unsigned char *)NULL)
    {
    token = getline(rules_fp, &line_num, comment_char);
    if (token == (unsigned char *)NULL)
	return( (unsigned char *)NULL );	/* signal EOF */
    token = strtok8(token,whiteSpc);
    }
return(token);
}

/****************************************************************************
 * NAME
 *    set_alphabet
 * ARGUMENTS
 *    tokp  - address of pointer to the next token
 *    lang  - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load the alphabet from the rules file, and add it to the LANGUAGE
 *    data.
 * RETURN VALUE
 *    0 if successful, -1 if error in loading rules from rulefile
 */
static int set_alphabet(tokp,lang)
unsigned char **tokp;
LANGUAGE *lang;
{
unsigned char alph_buf[MAXALPH+1];
int i;
unsigned char *tok;

memset((char *)alph_buf,NUL,MAXALPH+1);
i = 0;
/*
 *  bring in the alphabet
 */
for (;;)
    {
    tok = get_token();
    if (tok == (unsigned char *)NULL)
	{
	report_error(FATAL, &Unexpected_EOF, &line_num, filename,
					"reading ALPHABET characters");
	return( -1 );
	}
    if (is_keyword((char *)tok))
	break;
    for ( ; *tok != NUL ; ++tok )
	{
	if (strchr((char *)alph_buf, *tok) == (char *)NULL)
	    {
	    if (i < MAXALPH)
		alph_buf[i++] = *tok;
	    else
		report_error(NONFATAL, &Too_many_alpha, &line_num, filename);
	    }
	else
	    report_error(NONFATAL, &Char_in_alpha, &line_num, filename, *tok );
	}
    }
if (alph_buf[0] == NUL)
    {
    report_error(FATAL, &No_ALPHABET_value, &line_num, filename);
    return( -1 );
    }
/*
 *  store the string of alphabetic characters and return
 */
lang->alphabet = (unsigned char *)mystrdup((char *)alph_buf);
*tokp = tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    set_any
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load the "any" character from the rules file, and add it to the
 *    LANGUAGE data.
 * RETURN VALUE
 *    0 if successful, -1 if error
 */
static int set_any(lang)
LANGUAGE *lang;
{
unsigned char *tok;
int kw_line;
    
kw_line = line_num;
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
						"reading ANY symbol");
    return( -1 );
    }
if (is_keyword((char *)tok))		/* Error if no value given for ANY */
    {
    report_error(FATAL, &No_ANY_value, &kw_line, filename );
    return( -1 );
    }
if (strchr((char *)lang->alphabet, *tok) != (char *)NULL)
    {
    report_error(FATAL, &Bad_ANY_value, &kw_line, filename, *tok );
    return( -1 );
    }
if (lang->any != NUL)
    {
    report_error(FATAL, &Already_have_ANY, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->null))
    {
    report_error(FATAL, &Same_ANY_NULL, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->boundary))
    {
    report_error(FATAL, &Same_ANY_BOUND, &kw_line, filename );
    return( -1 );
    }
lang->any = *tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    set_null
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load the null character from the rules file, and add it to the
 *    LANGUAGE data.
 * RETURN VALUE
 *    0 if successful, -1 if error
 */
static int set_null(lang)
LANGUAGE *lang;
{
unsigned char *tok;
int kw_line;
    
kw_line = line_num;
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
						"reading NULL symbol");
    return( -1 );
    }
if (is_keyword((char *)tok))		/* Error if no value given for NULL */
    {
    report_error(FATAL, &No_NULL_value, &kw_line, filename );
    return( -1 );
    }
if (strchr((char *)lang->alphabet, *tok) != (char *)NULL)
    {
    report_error(FATAL, &Bad_NULL_value, &kw_line, filename, *tok );
    return( -1 );
    }
if (lang->null != NUL)
    {
    report_error(FATAL, &Already_have_NULL, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->any))
    {
    report_error(FATAL, &Same_NULL_ANY, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->boundary))
    {
    report_error(FATAL, &Same_NULL_BOUND, &kw_line, filename );
    return( -1 );
    }
lang->null = *tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    set_boundary
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load the boundary character from the rules file, and add it to the
 *    LANGUAGE data.
 * RETURN VALUE
 *    0 if successful, -1 if error
 */
static int set_boundary(lang)
LANGUAGE *lang;
{
unsigned char *tok;
int kw_line;
    
kw_line = line_num;
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
						"reading BOUNDARY symbol");
    return( -1 );
    }
if (is_keyword((char *)tok))	/* Error if no value given for BOUNDARY */
    {
    report_error(FATAL, &No_BOUND_value, &kw_line, filename );
    return( -1 );
    }
if (strchr((char *)lang->alphabet, *tok) != (char *)NULL)
    {
    report_error(FATAL, &Bad_BOUND_value, &kw_line, filename, *tok );
    return( -1 );
    }
if (lang->boundary != NUL)
    {
    report_error(FATAL, &Already_have_BOUND, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->null))
    {
    report_error(FATAL, &Same_BOUND_NULL, &kw_line, filename );
    return( -1 );
    }
if ((*tok != NUL) && (*tok == lang->any))
    {
    report_error(FATAL, &Same_BOUND_ANY, &kw_line, filename );
    return( -1 );
    }
lang->boundary = *tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    add_subset
 * ARGUMENTS
 *    tokp - address of pointer to the following token
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load one alphabet subset from the rules file, and add it to the LANGUAGE
 *    data.
 * RETURN VALUE
 *    0 if successful, -1 if error
 */
static int add_subset(tokp,lang)
unsigned char **tokp;
LANGUAGE *lang;
{
int i, kw_line;
unsigned char *tok;
unsigned char *p;
unsigned char sub_name[MAXALPH+1];
unsigned char sub_chars[MAXALPH+1];
/*
 *  first, get the name of the subset
 */
kw_line = line_num;
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading SUBSET name");
    return( -1 );
    }
if (is_keyword((char *)tok))
    {
    report_error(FATAL, &No_SUBSET_name, &kw_line, filename );
    return( -1 );
    }
strncpy((char *)sub_name, (char *)tok, MAXALPH);
sub_name[MAXALPH] = NUL;
/*
 *  now, get the members of the subset
 */
memset((char *)sub_chars, NUL, MAXALPH+1);
for ( i = 0 ;;)
    {
    tok = get_token();
    if ((tok == (unsigned char *)NULL) || is_keyword((char *)tok))
	break;
    for ( p = tok ; *p != NUL ; ++p )
	{
	if (	(strchr((char *)lang->alphabet,*p) != (char *)NULL) ||
		(*p == lang->null))
	    {
	    if (strchr((char *)sub_chars,*p) == (char *)NULL)
		sub_chars[i++] = *p;
	    else
		report_error(NONFATAL, &Char_in_SUBSET, &kw_line, filename,
								sub_name, *p);
	    }
	else
	    {
	    report_error(FATAL, &Bad_SUBSET_value, &kw_line, filename,
								sub_name, *p);
	    return( -1 );
	    }
	}
    }
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
						"reading SUBSET characters");
    return( -1 );
    }
if (sub_name[0] == NUL)
    {
    report_error(FATAL, &No_SUBSET_name, &kw_line, filename );
    return( -1 );
    }
if (    (sub_name[1] == NUL) &&
	    (	(strchr((char *)lang->alphabet,sub_name[0]) != (char *)NULL) ||
		(sub_name[0] == lang->null) ||
		(sub_name[0] == lang->boundary) ||
		(sub_name[0] == lang->any)  ) ||
	(find_subset(sub_name,lang) != (unsigned char *)NULL)  )
    {
    report_error(FATAL, &Bad_SUBSET_name, &kw_line, filename, sub_name );
    return( -1 );
    }
if (sub_chars[0] == NUL)
    {
    report_error(FATAL, &Empty_subset, &kw_line, filename, sub_name );
    return( -1 );
    }
/*
 *  if necessary, allocate space for this subset
 */
if (lang->numsubsets >= size_subsets)
    {
    size_subsets += 20;		/* grow the array as needed */
    if (lang->subsets == (SUBSET *)NULL)
	lang->subsets = (SUBSET *)myalloc(size_subsets * sizeof(SUBSET));
    else
	lang->subsets = (SUBSET *)myrealloc(lang->subsets,
					size_subsets * sizeof(SUBSET));
    }

lang->subsets[lang->numsubsets].name    = (unsigned char *)
						mystrdup((char *)sub_name);
lang->subsets[lang->numsubsets].members = (unsigned char *)
						mystrdup((char *)sub_chars);
++lang->numsubsets;

*tokp = tok;
return( 0 );
}

/****************************************************************************
 * NAME
 *    add_rule
 * ARGUMENTS
 *    lang - pointer to a LANGUAGE data structure
 * DESCRIPTION
 *    Load one rule from the rules file, and add it to the LANGUAGE data.
 * RETURN VALUE
 *    0 if successful, -1 if error
 */
static int add_rule(lang)
LANGUAGE *lang;
{
unsigned char *tok;
unsigned char *q;
RULE *rp;
struct fsa_column *cp;
int i, j, x;
/*
 *  make sure we have space to store this rule
 */
if (lang->num_rules >= size_rules)
    {
    size_rules += 20;
    if (lang->automata == (RULE *)NULL)
	lang->automata = (RULE *)myalloc(size_rules * sizeof(RULE));
    else
	lang->automata = (RULE *)myrealloc( lang->automata,
					    size_rules * sizeof(RULE) );
    }
rp = &(lang->automata[lang->num_rules]);
++lang->num_rules;
rp->rule_active = 1;		/* rules start out active */

/*****************************************************************
 *  get the rule name, which begins and end with a quote mark
 *  (we can't use get_token() here because of possible whitespace)
 */
tok = strtok8(NULL,NULL);	/* get rest of line following RULE keyword */
if (tok != (unsigned char *)NULL)
    {
    while ((*tok != NUL) && isascii(*tok) && isspace(*tok))
	++tok;
    }
if ((tok == (unsigned char *)NULL) || (*tok == NUL))
    {
    do { tok = getline(rules_fp, &line_num, comment_char); }
    while ((tok != (unsigned char *)NULL) && (*tok == NUL));
    }
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename, "reading RULE");
    return( -1 );
    }
q = (unsigned char *)strchr((char *)tok+1, *tok);
if (q == (unsigned char *)NULL)
    report_error(NONFATAL, &Bad_rule_name, &line_num, filename, tok);
else
    *q++ = NUL;
rp->name = (unsigned char *)mystrdup((char *)tok+1);

/*****************************************************************
 *  get the dimensions of the state table
 *  (after call to strtok8() we can use get_token())
 */
tok = strtok8(q, whiteSpc);
if (tok == (unsigned char *)NULL)
    {
    tok = get_token();
    if (tok == (unsigned char *)NULL)
	{
	report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	return( -1 );
	}
    }
rp->num_rows = atoi((char *)tok);
if (rp->num_rows <= 0)
    {
    report_error(FATAL, &Invalid_rows_num, &line_num, filename, tok);
    return( -1 );
    }
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num, filename, "reading RULE");
    return( -1 );
    }
rp->num_cols = atoi((char *)tok);
if ( rp->num_cols <= 0 )
    {
    report_error(FATAL, &Invalid_cols_num, &line_num, filename, tok);
    return( -1 );
    }
/*
 *  allocate memory for the rest of the automaton
 */
rp->columns = (struct fsa_column *)
			myalloc(sizeof(struct fsa_column) * rp->num_cols);
for ( cp = rp->columns, i = 0 ; i < rp->num_cols ; ++i, ++cp )
    {
    cp->lex_name = (unsigned char *)NULL;
    cp->lex_type = 0;
    cp->lex_chars = (unsigned char *)NULL;
    cp->surf_name = (unsigned char *)NULL;
    cp->surf_type = 0;
    cp->surf_chars = (unsigned char *)NULL;
    cp->precedence = 0;
    cp->transitions = (short *)myalloc(sizeof(short) * rp->num_rows);
    }
rp->final_states = (unsigned char *)
				myalloc(sizeof(unsigned char) * rp->num_rows);

/*****************************************************************
 *  get the lexical characters
 */
for ( i = 0 ; i < rp->num_cols ; ++i )
    {
    tok = get_token();
    if (tok == (unsigned char *)NULL)
	{
	report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	return( -1 );
	}
    x = strlen((char *)tok);
    if ((x == 1) && (strchr((char *)lang->alphabet, *tok) != (char *)NULL))
	rp->columns[i].lex_type = ALPHABET_CHAR;
    else if ((x == 1) && (*tok == lang->any))
	rp->columns[i].lex_type = ANY_CHAR;
    else if ((x == 1) && (*tok == lang->null))
	rp->columns[i].lex_type = NULL_CHAR;
    else if ((x == 1) && (*tok == lang->boundary))
	rp->columns[i].lex_type = BOUNDARY_CHAR;
    else if (find_subset(tok,lang) != (unsigned char *)NULL)
	rp->columns[i].lex_type = SUBSET_CHAR;
    else
	{
	report_error(FATAL, &Bad_lex_char, &line_num, filename, tok);
	return( -1 );
	}
    rp->columns[i].lex_name = (unsigned char *)mystrdup((char *)tok);
    /*
     *  (rp->columns[i].lex_chars is filled in by feasible_pairs())
     */
    }

/*****************************************************************
 *  get the surface characters
 */
for ( i = 0 ; i < rp->num_cols ; ++i )
    {
    tok = get_token();
    if (tok == (unsigned char *)NULL)
	{
	report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	return( -1 );
	}
    x = strlen((char *)tok);
    if ((x == 1) && (strchr((char *)lang->alphabet, *tok) != (char *)NULL))
	rp->columns[i].surf_type = ALPHABET_CHAR;
    else if ((x == 1) && (*tok == lang->any))
	rp->columns[i].surf_type = ANY_CHAR;
    else if ((x == 1) && (*tok == lang->null))
	rp->columns[i].surf_type = NULL_CHAR;
    else if ((x == 1) && (*tok == lang->boundary))
	rp->columns[i].surf_type = BOUNDARY_CHAR;
    else if (find_subset(tok,lang) != (unsigned char *)NULL)
	rp->columns[i].surf_type = SUBSET_CHAR;
    else
	{
	report_error(FATAL, &Bad_surf_char, &line_num, filename, tok);
	return( -1 );
	}
    rp->columns[i].surf_name = (unsigned char *)mystrdup((char *)tok);
    /*
     *  (rp->columns[i].surf_chars is filled in by feasible_pairs())
     */
    }
/*
 *  check for valid use of the BOUNDARY character -- must be in pairs
 *  check for valid use of the NULL character -- must NOT be in pairs
 */
for ( i = 0 ; i < rp->num_cols ; ++i )
    {
    if (    (	(rp->columns[i].lex_type  == BOUNDARY_CHAR) &&
		(rp->columns[i].surf_type != BOUNDARY_CHAR) ) ||
	    (	(rp->columns[i].lex_type  != BOUNDARY_CHAR) &&
		(rp->columns[i].surf_type == BOUNDARY_CHAR) ) )
	{
	--line_num;
	report_error(FATAL, &Bad_column_head, &line_num, filename,
			lang->num_rules, i+1,
			rp->columns[i].lex_name, rp->columns[i].surf_name );
	return( -1 );
	}
    if (    (rp->columns[i].lex_type  == NULL_CHAR) &&
	    (rp->columns[i].surf_type == NULL_CHAR) )
	{
	--line_num;
	report_error(FATAL, &Bad_column_head2, &line_num, filename,
			lang->num_rules, i+1,
			rp->columns[i].lex_name, rp->columns[i].surf_name );
	return( -1 );
	}
    }

/*****************************************************************
 *  get the state table and final/non-final state indicators
 */
for ( i = 0 ; i < rp->num_rows ; ++i )
    {
    /*************************************************
     *  get and validate the state number
     */
    tok = get_token();
    if (tok == (unsigned char *)NULL)
	{
	report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	return( -1 );
	}
    if ( atoi((char *)tok) != i+1 )
	{
	report_error(FATAL, &Bad_state_num, &line_num, filename, tok);
	return( -1 );
	}
    /*************************************************
     *  get the final/non-final state indicator
     */
    while ( isascii(*tok) && isdigit(*tok) )
	++tok;
    if (*tok == NUL)
	{
	tok = get_token();
	if (tok == (unsigned char *)NULL)
	    {
	    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	    return( -1 );
	    }
	}
    if ((*tok != ':') && (*tok != '.'))
	{
	report_error(FATAL, &Bad_final_mark, &line_num, filename, *tok);
	return( -1 );
	}
    rp->final_states[i] = (*tok++ == ':');

    /*************************************************
     *  get a row of new states for a given state
     */
    for ( j = 0 ; j < rp->num_cols ; ++j )
	{
	tok = get_token();
	if (tok == (unsigned char *)NULL)
	    {
	    report_error(FATAL, &Unexpected_EOF, &line_num, filename,
							"reading RULE");
	    return( -1 );
	    }
	if ( !(isascii(*tok) && isdigit(*tok)) )
	    {
	    report_error(FATAL, &Bad_table_char, &line_num, filename, *tok);
	    return( -1 );
	    }
	x = atoi((char *)tok);
	if ((x < 0) || (x > rp->num_rows))
	    {
	    report_error(FATAL, &Bad_table_entry, &line_num, filename, tok);
	    return( -1 );
	    }
	rp->columns[j].transitions[i] = x;
	}
    }
return( 0 );
}

/****************************************************************************
 * NAME
 *    load_rules
 * ARGUMENTS
 *    rulefile - name of a PC-KIMMO rules file
 *    lang     - pointer to a LANGUAGE data structure
 *    comment  - character indicating comment in an input file
 * DESCRIPTION
 *    Load the alphabet information and rules (automata) from a rules file.
 * RETURN VALUE
 *    0 if successful, -1 if error in loading rules from rulefile
 */
int load_rules(rulefile, lang, comment)
unsigned char *rulefile;
LANGUAGE *lang;
unsigned comment;
{
unsigned char *tok;
/*
 *  open the Rules file
 */
if ( (rules_fp = fopen((char *)rulefile, "r")) == (FILE *)NULL )
    {
    report_error(FATAL, &Bad_rule_file, (int *)NULL, (char *)NULL, rulefile);
    return( -1 );
    }
fprintf(stderr, "Rules being loaded from %s\n", rulefile);
filename = (char *)rulefile;
/*
 *  initialize the alphabet and arrays of SUBSETs and RULEs
 */
lang->alphabet = (unsigned char *)NULL;
lang->null = NUL;
lang->any = NUL;
lang->boundary = NUL;
lang->subsets = (SUBSET *)NULL;
lang->numsubsets = 0;
size_subsets = 0;
lang->automata = (RULE *)NULL;
lang->num_rules = 0;
size_rules = 0;

line_num = 0;
comment_char = comment;		/* save invariant argument */
/*
 *  read the ALPHABET:  it must come first
 */
tok = get_token();
if (tok == (unsigned char *)NULL)
    {
    report_error(FATAL, &Unexpected_EOF, &line_num,filename,"nothing in file");
    goto bad_rulefile;
    }
else if (strcmp((char *)tok,"ALPHABET") != 0)
    {
    report_error(FATAL, &Expected_ALPHABET, &line_num, filename);
    goto bad_rulefile;
    }
else if (set_alphabet(&tok,lang) == -1)
    goto bad_rulefile;
/*
 *  load the rest of rules file following the ALPHABET
 */
for (;;)
    {
    if ((tok == (unsigned char *)NULL) || (strcmp((char *)tok, "END") == 0))
	break;
    if (strcmp((char *)tok,"ANY") == 0)
	{				/* set the ANY character */
	if (set_any(lang) == -1)
	    goto bad_rulefile;
	tok = get_token();
	}
    else if (strcmp((char *)tok,"NULL") == 0)
	{				/* set the NULL character */
	if (set_null(lang) == -1)
	    goto bad_rulefile;
	tok = get_token();
	}
    else if (strcmp((char *)tok,"BOUNDARY") == 0)
	{				/* set the BOUNDARY character */
	if (set_boundary(lang) == -1)
	    goto bad_rulefile;
	tok = get_token();
	}
    else if (strcmp((char *)tok,"SUBSET") == 0)
	{				/* add an alphabet SUBSET */
	if (add_subset(&tok,lang) == -1)
	    goto bad_rulefile;
	}
    else if (strcmp((char *)tok,"RULE") == 0)
	{				/* add a RULE */
	if (add_rule(lang) == -1)
	    goto bad_rulefile;
	tok = get_token();
	}
    else
	{
	report_error(FATAL, &Invalid_keyword, &line_num, filename, tok);
	goto bad_rulefile;
	}
    }
/*
 *  check that the special characters are defined
 */
if (lang->any == NUL)
    report_error(NONFATAL, &No_ANY_char, (int *)NULL, (char *)NULL );
if (lang->null == NUL)
    report_error(NONFATAL, &No_NULL_char, (int *)NULL, (char *)NULL );
if (lang->boundary == NUL)
    {
    report_error(FATAL, &No_BOUNDARY_char, (int *)NULL, (char *)NULL );
    goto bad_rulefile;		/* need this for the lexicon file */
    }
/*
 *  squeeze the arrays of SUBSETs and RULEs to exact fits
 *  then calculate the feasible pairs, and return zero to indicate success
 */
if (lang->numsubsets < size_subsets)
    {
    lang->subsets = (SUBSET *)myrealloc( lang->subsets,
					 lang->numsubsets * sizeof(SUBSET) );
    }
if (lang->num_rules < size_rules)
    {
    lang->automata = (RULE *)myrealloc( lang->automata,
				        lang->num_rules * sizeof(RULE) );
    }
fclose(rules_fp);
feasible_pairs(lang);
return( 0 );

/****************************** ERROR RETURN *******************************/
bad_rulefile:

fclose(rules_fp);
free_rules(lang);
return( -1 );
}
