/*      SETTXT.C - process text input control file for AMPLE program
 ***************************************************************************
 *
 *	void set_text(recp)
 *	char *recp;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	25-May-88	Steve McConnel - replaces SALPHA.C, PRINOP.C, and
 *					  SETSTD.C
 *	27-May-88	SRMc - new version of TXTIN()
 *	30-May-88	SRMc - add handling of \format, \barchar, \barcodes,
 *				and \ambig fields
 *	 2-Jun-88	SRMc - move \dsc from acode_tab to text_codes
 *	28-Jul-88	SRMc - replace ssalloc() with malloc() and realloc()
 *	27-Aug-88	SRMc - add error checking and error messages
 *	14-Oct-88	SRMc - add \n after display of word-formation
 *				characters
 *	21-Oct-88	SRMc - reorganize the file header comments
 *	20-Jul-89	hab  - rename dict.h to ample.h
 *	26-Jul-89	hab  - rename load_scl to add_scl
 *                             replace struct strlist in defs.h with
 *                              STAMP's strlist.h
 *      27-Jul-89       hab  - add Copyright 1989
 *      28-Jul-89       hab  - allow for a \nocap option: do not do any
 *                              capitalization processing of input text
 *      01-Aug-89       ab/hab - Define myisspace(), myisdigit(), myispunct()
 *                               to handle 8-bit characters
 * 1.4d 24-Oct-89 ALB Add mystrchr to fix 8-bit wfc bug (removed in 1.5v)
 * 1.5s 16-Mar-90 ALB Add \noincap to prevent internal recapitalization
 * 1.5v  6-Apr-90 ALB Fix bug of all 8-bit looking like wfc chars
 *                      Remove nonalpha, use alpha for everything
 *	20-Dec-90	SRMc - replace zero() with bzero() or memset()
 *			     - declare set_text() as void
 *	27-Dec-90	SRMc - use parse_change() from STAMP's change.c
 *	 1-Mar-91	SRMc - remove #include "ample.h" -- not needed
 *			     - declare get_fields() as void
 *	11-Mar-91	SRMc - remove #define strchr -- it's in OPACLIB.H
 ***************************************************************************
 * Copyright 1988, 1991 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#ifdef BSD
#include <strings.h>
extern void bzero();
#else
#ifndef DJGPP
#define bzero(buf,size) memset(buf,0,size)
#endif
#endif
#ifdef SYS_V
#include <string.h>
#include <memory.h>
#endif
#ifdef MSDOS
#include <string.h>
#include <memory.h>
#endif
#ifdef THINK_C
#include <string.h>
#endif

#include "opaclib.h"
#include "class.h"
#include "envir.h"
#include "change.h"
#include "strlist.h"

extern char *getwd(), *skipwhite();
extern struct change_list *parse_change();
extern void add_scl();

			/* word formation character arrays */
extern char *alpha;
			/* list of format markers to include or exclude */
extern struct strlist *incl_stdfmt, *excl_stdfmt;
			/* input orthography change list */
extern struct change_list *ortho_chg;

extern char forminit;   /* initial character of "format markers" */
extern char barchar;    /* initial character of "bar codes" */
extern char *barcodes;  /* possible second characters of "bar codes" */
extern char ambigchar;  /* character for marking ambiguities */
extern char decomp_char; /* morpheme decomposition separation character */
extern int  do_decap;   /* do de-capitalization processing flag */
extern int incap;       /* Flag for internal recapitalization */

#ifdef SPANISH
#define USING_AS_ALPHA "Usando los siguientes como alfabeticos:"
#define SPACE "espacio"
#define DELETE "delete"
#define CONTROL "CTRL"
#else
#define USING_AS_ALPHA "Using the following as word-formation characters:"
#define SPACE "space"
#define DELETE "delete"
#define CONTROL "CTRL"
#endif

/***************************************************************************
 * NAME
 *    get_fields
 * ARGUMENTS
 *    cp    - pointer to line containing fields
 *    strlp - pointer to head of string list
 * DESCRIPTION
 *    Add each format marker found to the given list.
 * RETURN VALUE
 *    none
 */
static void get_fields(cp, strlp)
char *cp;
struct strlist **strlp;
{
char *end;
struct strlist *slp;

for ( end = getwd(cp) ; *cp != NUL ; end = getwd(cp) )
    {			/* while there are words on the line */
    if (*cp == '\\')	/* if it is a sfm, allocate strlist struct */
	{	
	slp = (struct strlist *)myalloc( sizeof(struct strlist));
	slp->stri = strcpy( myalloc( (unsigned) strlen(cp) + 3), cp);
	slp->slink = *strlp;
	*strlp = slp;
	}               
    cp = end;			/* move to next word on the line */
    }
} /* end get_fields */

/***************************************************************************
 * NAME
 *    set_text
 * ARGUMENTS
 *    recp - pointer to record loaded by set_record(), or NULL
 * DESCRIPTION
 *    Set the control variables for text input from the record pointed to by
 *    recp, or if recp is NULL, set the default values.
 * RETURN VALUE
 *    none
 */
void set_text(recp)
char *recp;
{
register char *rp;              /* internal record pointer */
char *end;
char wordform[256];		/* pretend to support 8-bit characters */
register char *ap;              /* alphabetics pointer */
int k;
int code;
struct change_list *tail, *cc;
char bars[256];                 /* store barcode characters */
static char errhead[] = "\nSETUP TEXT: ";
short seen_ambig, seen_barchar, seen_dsc, seen_format;

/*
 *  set the default alphabetic characters
 */
for ( ap = wordform, k = 1 ; k < 128 ; ++k )
    {
    if (isalpha(k))
	*ap++ = k;
    }
*ap = '\0';                     /* terminate the list */
/*
 *  more initialization
 */
ortho_chg = tail = (struct change_list *)NULL;
incl_stdfmt = excl_stdfmt = (struct strlist *)NULL;
bzero(bars,256);
seen_ambig = seen_barchar = seen_dsc = seen_format = 0;
/*
 *  get the information from the record
 */
for ( rp = recp ; (rp != (char *)NULL) && (*rp != EOR) ; )
    {
    code = *rp++;		/* grab the table code */
    rp = skipwhite(rp);         /* skip following whitespace */
    switch (code)
	{
	case 'A':		/* alphabetic (word formation) characters */
	    if (*rp == NUL)
		printf("%sEmpty word formation character field", errhead );
	    else
		{
		while (*rp != NUL)
		    {
		    k = (*rp++) & 0377;
		    if (    !myisspace(k) &&
			    (strchr(wordform,k)==(char *)NULL) )
			{
			*ap++ = k;	/* add alphabetic character */
			*ap = NUL;
			}
		    }
		}
	    break;

	case 'K':               /* No internal recapitalization */
	    incap = FALSE;
	    break;

	case 'N':               /* definition of include field */
	    if (*rp == NUL)
		printf("%sEmpty include field", errhead );
	    else if (excl_stdfmt != (struct strlist *)NULL)
		printf("%sIgnoring include field following an exclude field",
					errhead );
	    else
		{
		end = rp + strlen(rp);
		get_fields(rp, &incl_stdfmt);
		rp = end;
		}
	    break;

	case 'X':		/* definition of exclude field */
	    if (*rp == NUL)
		printf("%sEmpty exclude field", errhead );
	    else if (incl_stdfmt != (struct strlist *)NULL)
		printf("%sIgnoring exclude field following an include field",
					errhead );
	    else
		{
		end = rp + strlen(rp);
		get_fields(rp, &excl_stdfmt);
		rp = end;
		}
	    break;

	case 'C':		/* orthography change */
	    end = rp + strlen(rp);
	    cc = parse_change(rp);
	    if (cc != (struct change_list *)NULL)
		{               /* link change to end of list */
		if (ortho_chg == (struct change_list *)NULL)
		    ortho_chg = cc;
		else
		    tail->cl_link = cc;
		tail = cc;
		}
	    rp = end;
	    break;

	case 'S':               /* string class definition */
	    end = rp + strlen(rp);
	    add_scl(rp);	/* load the string class definition */
	    rp = end;
	    break;

	case 'a':		/* format character (\format) */
	    if (seen_format++)
		printf("%sFormat field already seen - ignoring this one",
					errhead );
	    else
		forminit = *rp;         /* NUL => no format markers */
	    break;

	case 'b':		/* bar character (\barchar) */
	    if (seen_barchar++)
		printf(
		    "%sBar character field already seen - ignoring this one",
					errhead );
	    else
		barchar = *rp;		/* NUL => no bar code formatting */
	    break;

	case 'c':		/* bar codes (\barcodes) */
	    if (*rp == NUL)
		printf("%sEmpty bar codes field", errhead );
	    else
		{
		while (*rp != NUL)
		    {
		    end = getwd(rp);
		    strcat(bars,rp);	/* keep adding characters */
		    rp = end;
		    }
		}
	    break;

	case 'd':		/* ambiguity marker character (\ambig) */
	    if (*rp == NUL)
		printf("%sEmpty ambiguity marker field - using '%c'",
					errhead, ambigchar );
	    if (seen_ambig++)
		printf("%sAmbiguity marker already seen - ignoring this one",
					errhead );
	    else if (*rp != NUL)
		ambigchar = *rp;	/* have to have one defined! */
	    break;

	case 'e':		/* morpheme decomposition separation char */
	    if (*rp == NUL)
		printf("%sEmpty decomposition separator field - using '%c'",
					errhead, decomp_char );
	    if (seen_dsc++)
		printf(
		 "%sDecomposition separator already seen - ignoring this one",
					errhead );
	    else if (*rp != NUL)
		decomp_char = *rp;	/* have to have one defined! */
	    break;

	case 'f':		/* do no capitalization processing */
	    do_decap = FALSE;
	    break;

	} /* end switch */
    while (*rp++ != NUL)	/* skip rest of this entry in the record */
	;
    } /* end for */
/*
 *  show the alphabetic characters
 */
fflush(stdout);
fprintf(stderr, "\n\t%s\n\t", USING_AS_ALPHA );
for ( ap = wordform ; *ap != NUL ; )
    {
    k = *ap++ & 0377;
    if (k < ' ')
	fprintf(stderr, "<%s/%c>", CONTROL, k | 0100 );
    else if (k == ' ')
	fprintf(stderr, "<%s>", SPACE);
    else if (k < 0177)
	fprintf(stderr, "%c", k );
    else if (k == 0177)
	fprintf(stderr, "<%s>", DELETE );
    else if (k < 0240)
	fprintf(stderr, "<0x80+%s/%c>", CONTROL, (k & 0177) | 0100 );
    else if (k == 0240)
	fprintf(stderr, "<0x80+%s>", SPACE);
    else if (k < 0377)
	fprintf(stderr, "<0x80+%c>", k & 0177 );
    else if (k == 0377)
	fprintf(stderr, "<0x80+%s>", DELETE);
    }
fprintf(stderr,"\n");
/*
 *  set the word initial characters
 */
alpha = myalloc( (unsigned)strlen(wordform)+1 );
strcpy(alpha,wordform);
/*
 *  set the bar codes if any were specified
 */
if (bars[0] != NUL)
    barcodes = strcpy(myalloc((unsigned)strlen(bars)+1),bars);
}
