/*      ENVPAR.C - parse environment constraint expressions
 ***************************************************************************
 *
 *	void epar_error(fmt,arg)
 *	char *fmt;
 *	char *arg;
 *
 *	void show_badenv(envir)
 *	char *envir;
 *
 *      struct env_cond *env_parse(ismorph,find_cl)
 *	int ismorph;
 *      struct string_class *(*find_cl)();
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	 9-May-88	Steve McConnel
 *	19-May-88	SRMc - add "~_" as a token
 *	20-May-88	SRMc - write ccenv_parse()
 *	 1-Jun-88	SRMc - add env_ortho
 *	28-Jul-88	SRMc - replace ssalloc() with malloc() and realloc()
 *      24-Aug-88       SRMc - regularize error messages
 *       7-Sep-88       SRMc - split ENVPAR.C into ENVPAR.C, AENVPA.C,
 *                              CENVPA.C, IENVPA.C, and MENVPA.C
 *       9-Sep-88       SRMc - parse ( <item> ) for E_OPTIONAL
 *      16-Sep-88       SRMc - tweak the error message to
 *                                              "Missing optional item"
 *      21-Oct-88       SRMc - reorganize the file header comments
 *      10-Nov-88       SRMc - replace free() with myfree()
 *      17-May-89       SRMc - revised definition of apply_cc() (for STAMP)
 *                           - use mystrdup()
 *      13-Jul-89       hab  - de-"lint" the source
 *      11-Jul-89       hab  - convert tabs to spaces in output
 *      26-Jul-89       hab  - merge with STAMP version
 *      27-Jul-89       hab  - add Copyright 1989
 * 1.0m 14-Mar-90 ALB Allow cat and cat class in morpheme environment
 * 1.1b 29-Jun-90 BK/ALB Fix for portability to MAC, add string.h
 *	17-Jan-91	SRMc - declare epar_error() as void
 *			     - declare show_badenv() as void
 *	 3-Jan-92	SRMc - change argument list for apply_cc()
 ***************************************************************************
 * Copyright 1988, 1992 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#ifdef BSD
#include <strings.h>
#else
#include <string.h>
#endif
#include "opaclib.h"
#include "class.h"
#include "envir.h"
#include "change.h"
#include "strlist.h"

#ifdef __STDC__
#define P(s) s
#else
#define P(s) ()
#endif

/* categ.c */
extern int find_cat P((char *name ));
extern struct cat_class *find_ccl P((char *name ));

/* change.c */
extern char *apply_cc P((char *buf , struct change_list *cc ));

/* envlex.c */
int elex_get P((void ));
extern char *elex_ptr, elex_string[];

/* envpar.c */
void epar_error P((char *fmt , char *arg ));
void show_badenv P((char *envir ));
struct env_cond *env_parse P((int ismorph , 
		struct string_class *(*find_cl)() ));

/* myallo.c */
/* extern char *myalloc P((unsigned size )); */
extern char *mystrdup P((char *str ));
extern void myfree P((char *s ));

#undef P

#define LEFT_SIDE  0            /* for function parameter passing */
#define RIGHT_SIDE 1            /*  to env_side() */

char *enverrhead = NULL;                /* used by epar_error() */
char *enverrtail = NULL;
char *envparsetype = NULL;
/*
 *  orthography change list for allomorph string environment
 */
struct change_list *env_ortho = NULL;

/*************************************************************************
 * NAME
 *    epar_error
 * ARGUMENTS
 *    fmt - a printf() style format string
 *    arg - argument for fmt
 * DESCRIPTION
 *    Print an error message regarding the parsing of environments.
 * RETURN VALUE
 *    none
 */
void epar_error(fmt,arg)
char *fmt;
char *arg;
{
printf( "%s", enverrhead );
printf( fmt, arg );
if ((enverrtail != (char *)NULL) && (envparsetype != (char *)NULL))
    printf( enverrtail, envparsetype );
}

/*************************************************************************
 * NAME
 *    show_badenv
 * ARGUMENTS
 *    envir - pointer to entire environment string
 * DESCRIPTION
 *    Print the environment, showing where the error was detected.
 * RETURN VALUE
 *    none
 */
void show_badenv(envir)
char *envir;
{
register char *p;

for ( printf("\n%8s"," "), p = envir ; *p != NUL ; putchar(*p++) )
    {
    if (p == elex_ptr)
	printf("<<ERROR DETECTED HERE>>");
    }
if (p == elex_ptr)
    printf("<<ERROR DETECTED HERE>>\n");
}

#ifdef THINK_C
#define DOTS ...
#else
#define DOTS
#endif

/*************************************************************************
 * NAME
 *    env_side
 * ARGUMENTS
 *    token   - first token of this side of the environment
 *    ismorph - nonzero if morpheme environment, zero if string environment
 *    find_cl - pointer to either find_scl() or find_mcl()
 *    isright - nonzero if right side, zero if left side
 *    flags   - pointer to ec_flags field for env_cond structure
 * DESCRIPTION
 *    Parse one side of an environment.  The list of env_item's is
 *    built such that the first element is closest to the '_' and the last
 *    element is farthest away.
 * RETURN VALUE
 *    pointer to the list of env_item structures, or NULL if an error
 *    occurs
 */
static struct env_item *env_side(token,ismorph,find_cl,isright,flags)
int token, ismorph;
struct string_class *(*find_cl)(DOTS);
int isright;
char *flags;
{
struct env_item *env, *env_tail;
register struct env_item *pe;
register int myflags;		/* what to put in ei_flags field */
int need_lit;                   /* have just seen a '[' */
int need_brack;			/* have just seen '[' <CLASSNAME> */
int need_item;			/* have just seen a '(' */
int need_paren;			/* have just seen '(' <item> */
int need_end;			/* if (isright), have just seen a '#' */
char *p;
char tmpbuf[BUFSIZE];

if (ismorph)
    ismorph = E_MORPHEME;		/* convert to bitflag value */

need_lit = FALSE, need_brack = FALSE, need_end = FALSE;
need_item = FALSE, need_paren = FALSE;
env = (struct env_item *)NULL, env_tail = (struct env_item *)NULL;

for ( myflags = ismorph ;; token = elex_get() )		/* do forever... */
    {
    /*
     *  check for absence of absolutely required items
     */
    if (need_lit && (token != ENV_LITERAL))
	{
	epar_error("Missing class name", "");    goto bad_side;
	}
    if (need_brack && (token != ENV_RBRACK))
	{
	epar_error("Missing ']'", "" );    goto bad_side;
	}
    if (need_item && (token != ENV_LITERAL) && (token != ENV_NOT) &&
			(token != ENV_LBRACK) && (token != ENV_RBRACK) )
	{
	epar_error("Missing optional item", "");    goto bad_side;
	}
    if (need_paren && (token != ENV_RPAREN))
	{
	epar_error("Missing ')'", "" );    goto bad_side;
	}
    if (need_end && (token != ENV_END) && (token != EOF))
	{
	goto bad_bound;
	}
    /*
     *  take action based on what you see
     */
    switch (token)
	{
	case EOF:
	case ENV_END:
	    if (!isright)
		{
		epar_error("Missing '_'","");    goto bad_side;
		}
	    if (myflags & E_NOT)
		epar_error("Invalid '~'","");
	    if (myflags & E_ELLIPSIS)
		epar_error("Invalid '...'","");
	    return( env );		/* GOOD RETURN FOR RIGHT SIDE */

	case ENV_BOUND:
	    if (isright)
		need_end = TRUE;
	    else if (env != NULL)
		{
bad_bound:      epar_error("Invalid '#'", "");    goto bad_side;
		}
	    goto new_item;		/* share code with ENV_LITERAL */

	case ENV_ELLIPSIS:
	    if (isright)
		myflags |= E_ELLIPSIS;		/* right side */
	    else if (env == (struct env_item *)NULL)
		epar_error("Invalid '...'", "");
	    else
		env->ei_flags |= E_ELLIPSIS;	/* left side */
	    break;

	case ENV_LBRACK:
	    need_lit = TRUE;		/* need a class name following */
	    myflags |= E_CLASS;
	    break;

	case ENV_RBRACK:
	    if (!need_brack)		/* want ']' only if needed */
		{
		epar_error("Unexpected ']' found", "");    goto bad_side;
		}
	    need_brack = FALSE;		/* we don't need it anymore */
	    if (need_item)
		{					/* finished item */
		need_item = FALSE;    need_paren = TRUE;
		}
	    break;

	case ENV_MARK:
	    if (isright)
		{
		epar_error("Second '_' found", "");    goto bad_side;
		}
	    else
		return( env );		/* GOOD RETURN FOR LEFT SIDE */

	case ENV_NOTMARK:
	    if (isright)
		{
		epar_error("Second '_' found", "");    goto bad_side;
		}
	    else
		{
		*flags |= E_NOT;	/* mark a negative environment */
		return( env );		/* GOOD RETURN FOR LEFT SIDE */
		}

	case ENV_NOT:
	    if (myflags & E_NOT)
		epar_error("Invalid '~'", "");
	    myflags |= E_NOT;
	    break;

	case ENV_LITERAL:
	    if (need_lit)
		{
		need_lit = FALSE;    need_brack = TRUE;
		}
	    else if (need_item)
		{                                       /* finished item */
		need_item = FALSE;    need_paren = TRUE;
		}
new_item:   pe = (struct env_item *)myalloc(sizeof(struct env_item));
	    if (token == ENV_BOUND)
		pe->ei_val.ei_string = (char *)NULL;    /* mark boundary */
	    else if (myflags & E_CLASS)
		{
		if (ismorph)            /* get morpheme class */
		    {
		    pe->ei_val.ei_mcl = (struct morph_class *)
						(*find_cl)( elex_string );
		    if ( !pe->ei_val.ei_mcl )       /* If not morpheme */
			{                           /*  try cat */
			/* The following cast of int to pointer is ok */
			/* Myflags shows that it is an int value */
			pe->ei_val.ei_mcl = (struct morph_class *)
				find_cat( elex_string );
			if ( pe->ei_val.ei_mcl )    /* If cat */
			    myflags |= E_CAT;       /* Mark as cat */
			}
		    if ( !pe->ei_val.ei_mcl )       /* If not cat */
			{                           /*  try cat class */
			pe->ei_val.ei_mcl = (struct morph_class *)
						find_ccl( elex_string );
			if ( pe->ei_val.ei_mcl )    /* If cat class */
			    myflags |= E_CCL;       /* Mark as cat class */
			}
		    if (pe->ei_val.ei_mcl == (struct morph_class *)NULL)
			{
			epar_error("Undefined morpheme class %s",
					elex_string );
			goto bad_side;
			}
		    }
		else                    /* get string class */
		    {
		    pe->ei_val.ei_scl = (*find_cl)( elex_string );
		    if (pe->ei_val.ei_scl == (struct string_class *)NULL)
			{
			epar_error("Undefined string class %s",elex_string );
			goto bad_side;
			}
		    }
		}
	    else		/* literal string or morphname */
		{		/* apply orthochange if needed */
		if (ismorph)
		    pe->ei_val.ei_string = elex_string;
		else
		    pe->ei_val.ei_string = apply_cc(elex_string, env_ortho);
		}
	    pe->ei_flags = myflags;
	    myflags = ismorph;		/* reset for the next item */
	    if (isright)
		{
		if (env == (struct env_item *)NULL)
		    env = pe;		/* link at the tail of the list */
		else
		    env_tail->ei_link = pe;
		env_tail = pe;
		}
	    else
		{
		pe->ei_link = env;	/* link at the head of the list */
		env = pe;
		}
	    break;

	case ENV_LPAREN:
	    need_item = TRUE;		/* need an item following */
	    myflags |= E_OPTIONAL;
	    break;

	case ENV_RPAREN:
	    if (!need_paren)		/* want ')' only if needed */
		{
		epar_error("Unexpected ')' found", "");    goto bad_side;
		}
	    need_paren = FALSE;		/* we don't need it anymore */
	    break;

	default:
	    epar_error("Invalid input '%s'", elex_string);    goto bad_side;
	    break;

	} /* end switch (token) */
    } /* end for (;;) */

bad_side:		/* here only on parse error */
			/* release any allocated space */
for ( pe = env ; pe != (struct env_item *)NULL ; pe = env )
    {
    env = pe->ei_link;
    if (!(pe->ei_flags & E_CLASS) && (pe->ei_val.ei_string != (char *)NULL))
	myfree(pe->ei_val.ei_string );
    myfree( (char *)pe );
    }
return( (struct env_item *)NULL );	/* ERROR RETURN */
}

/*************************************************************************
 * NAME
 *    env_parse
 * ARGUMENTS
 *    ismorph - nonzero if morpheme environment, zero if string environment
 *    find_cl - pointer to either find_scl() or find_mcl()
 * DESCRIPTION
 *    Parse a single environment constraint, building the structure for the
 *    interpreter to run over later.
 * RETURN VALUE
 *    pointer to the env_cond structure, or NULL if an error occurs
 */
struct env_cond *env_parse(ismorph,find_cl)
int ismorph;
struct string_class *(*find_cl)();
{
register struct env_cond *ec;
register int token;
struct env_item *e, *ne;

envparsetype = (ismorph) ? "morpheme" : "string";
ec = (struct env_cond *)myalloc(sizeof(struct env_cond));
if (ismorph)
    ec->ec_flags = E_MORPHEME;
/*
 *  parse the left side of the environment (before the '_')
 */
token = elex_get();
if ((token == EOF) || (token == ENV_END))
    goto empty_env;			/* no environment after all */
else if (token == ENV_NOTMARK)
    ec->ec_flags |= E_NOT;		/* starts with "~_" */
else if (token != ENV_MARK)
    {
    ec->ec_left = env_side(token,ismorph,find_cl,LEFT_SIDE, &ec->ec_flags );
    if (ec->ec_left == (struct env_item *)NULL)
	goto bad_side;			/* parsing error occurred */
    }
/*
 *  parse the right side of the environment (after the '_')
 */
token = elex_get();
if ((token != EOF) && (token != ENV_END))
    {
    ec->ec_right = env_side(token,ismorph,find_cl,RIGHT_SIDE, &ec->ec_flags );
    if (ec->ec_right == (struct env_item *)NULL)
	goto bad_side;			/* parsing error occurred */
    }
else if (ec->ec_left == (struct env_item *)NULL)
    goto empty_env;		/* only a '_', nothing on either side */

return( ec );				/* successful return */

empty_env:				/* no environment */
printf("%sEmpty %s environment constraint", enverrhead, envparsetype );

bad_side:				/* need to clean up */
for ( e = ec->ec_left ; e != (struct env_item *)NULL ; e = ne )
    {
    ne = e->ei_link;		/* free anything built for the left */
    myfree( (char *)e );
    }
myfree( (char *)ec );		/* free this node */
return( (struct env_cond *)NULL );
}
