/* TEXTIN.C - read the next word from a text file
 ***************************************************************************
 *
 *	void load_intx_ctl_file( fname )
 *	char *fname;
 *
 *	struct word_template *textin(infp)
 *	FILE *infp;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	25-JUL-82	D. Weber/Bob Kasper
 *	13-MAR-85	hab/djw
 *	24-Sep-85	SRMc - port CQAP to Unix and MSDOS
 *	05-JUN-86	hab
 *	 2-May-88	SRMc - fflush(stdout) before writing to stderr
 *	25-May-88	SRMc - use change_list struct for ortho_chg, make
 *                              nonalph and word_init char pointers, not
 *				arrays
 *	27-May-88	SRMc - total rewrite of fill_template(), merging in
 *				functionality of do_word()
 *                           - replace set_caps() with decapital()
 *	28-May-88	SRMc - restore (a more flexible) handling of
 *				Manuscripter "bar codes"
 *      30-May-88       SRMc - check for NUL forminit
 *	 1-Jun-88	SRMc - rename norm_word to this_word, and add
 *				last_word and next_word
 *	 2-Jun-88	SRMc - remove new_word[256] field from template
 *				structure
 *	 8-Jun-88	SRMc - handle a single word read in output report
 *	28-Jul-88	SRMc - replace ssalloc() with malloc() and realloc()
 *	21-Oct-88	SRMc - remove register from function parameter
 *				declarations
 *			     - reorganize the file header comments
 *	10-Nov-88	SRMc - replace free() with myfree()
 *	 4-Jan-89	SRMc - fix for Microsoft C
 *	 7-Mar-89	SRMc - add monitoring of progress by printing a dot
 *				as each word is read
 *	18-Mar-89	SRMc - allow something like "word (word) word" or
 *				"word--word--word", i.e., punctuation without
 *				surrounding whitespace
 *	26-Jul-89	hab  - replace struct strlist in defs.h with
 *				STAMP's strlist.h
 *	27-Jul-89	hab  - add extern void myfree()
 *			       move monitoring of progress to anal.c
 *      28-Jul-89       hab  - allow for a \nocap option: do not do any
 *                              capitalization processing of input text
 *      01-Aug-89       ab/hab - Define myisspace(), myisdigit(), myispunct()
 *                               to handle 8-bit characters
 *      07-Aug-89       ab/hab - rename myrealloc() to myshrink()
 * 1.4v  8-Nov-89 ALB Prevent INPUT: xx words message if log to screen
 * 1.5k 10-Jan-90 ALB Fix bugs in handling bar codes at ends of words
 * 1.5m 10-Jan-90 ALB Add code to output bitmap for upper case letters
 * 1.5p 28-Feb-90 ALB Fix bug of not accepting 8-bit char at front of wrd
 * 1.5s 16-Mar-90 ALB Add \noincap to prevent internal recapitalization
 * 1.5v  6-Apr-90 ALB Fix bug of all 8-bit looking like wfc chars
 *                      Remove nonalpha, use alpha for everything
 * 1.6a 21-Jun-90 BK  Fix up for THINKC on MAC
 *	20-Dec-90	SRMc - replace zero() with bzero() or memset()
 *			     - use strchr() and strrchr() throughout
 *			     - replace streq() with strcmp()
 *	28-Dec-90	SRMc - revise to use word_template structure defined
 *				for STAMP
 *			     - renamed TXTIN() to txtin()
 *			     - fix for STAMP's apply_cc()
 *	31-Dec-90	SRMc - *correctly* fix for STAMP's apply_cc()
 *	 5-Jan-91	SRMc - adjust for revised memory allocation with
 *				accounting
 *	28-Jan-91	SRMc - allow multiple consecutive barcodes to follow
 *				a word, as well as to precede a word
 *	 5-Jan-91	SRMc - rename primary function to textin(), change it
 *				to return (struct word_template *) rather
 *				than int, and NOT to fill lookahead and
 *				lookbehind buffers
 *			     - merge in settxt.c, rename set_text() to
 *				static setup_textin(), incorporate
 *				load_intx_ctl_file() as interface for
 *				outside world
 *	 2-Mar-91	SRMc - use old-style memory allocation (no accounting)
 *	11-Mar-91	SRMc - remove #define strchr -- it's in OPACLIB.H
 *	 3-Jan-92	SRMc - change argument list for apply_cc()
 ***************************************************************************
 *	EDIT HISTORY of SETTXT.C (merged in 5-Feb-91)
 *	25-May-88	Steve McConnel - replaces SALPHA.C, PRINOP.C, and
 *					  SETSTD.C
 *	27-May-88	SRMc - new version of TXTIN()
 *	30-May-88	SRMc - add handling of \format, \barchar, \barcodes,
 *				and \ambig fields
 *	 2-Jun-88	SRMc - move \dsc from acode_tab to text_codes
 *	28-Jul-88	SRMc - replace ssalloc() with malloc() and realloc()
 *	27-Aug-88	SRMc - add error checking and error messages
 *	14-Oct-88	SRMc - add \n after display of word-formation
 *				characters
 *	21-Oct-88	SRMc - reorganize the file header comments
 *	20-Jul-89	hab  - rename dict.h to ample.h
 *	26-Jul-89	hab  - rename load_scl to add_scl
 *                             replace struct strlist in defs.h with
 *                              STAMP's strlist.h
 *      27-Jul-89       hab  - add Copyright 1989
 *      28-Jul-89       hab  - allow for a \nocap option: do not do any
 *                              capitalization processing of input text
 *      01-Aug-89       ab/hab - Define myisspace(), myisdigit(), myispunct()
 *                               to handle 8-bit characters
 * 1.4d 24-Oct-89 ALB Add mystrchr to fix 8-bit wfc bug (removed in 1.5v)
 * 1.5s 16-Mar-90 ALB Add \noincap to prevent internal recapitalization
 * 1.5v  6-Apr-90 ALB Fix bug of all 8-bit looking like wfc chars
 *                      Remove nonalpha, use alpha for everything
 * 1.6e 02-Aug-90 hab Add lower-upper word formation character handling
 *	20-Dec-90	SRMc - replace zero() with bzero() or memset()
 *			     - declare set_text() as void
 *	27-Dec-90	SRMc - use parse_change() from STAMP's change.c
 ***************************************************************************
 * Copyright 1991, 1992 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#ifndef isascii
#define isascii(x) (!(x & ~0177))
#endif
#ifdef BSD
#include <strings.h>
extern void bzero();
#else
#ifndef DJGPP
#define bzero(buf,size) memset(buf,0,size)
#endif
#endif
#ifdef SYS_V
#include <string.h>
#include <memory.h>
#endif
#ifdef MSDOS
#include <string.h>
#include <memory.h>
#endif
#ifdef THINK_C
#include <string.h>
#include <unix.h>
#endif

#include "opaclib.h"
#include "template.h"		/* new (5/89) word_template structure */
#include "class.h"		/* new (5/89) class structures */
#include "envir.h"		/* new (5/89) environment structures */
#include "change.h"		/* new (5/89) change structures */
#include "strlist.h"
#include "codetab.h"

#ifdef __STDC__
#define P(s) s
#else
#define P(s) ()
#endif

/* standard library functions */
extern int isatty P((int fd));

/* textin.c */
struct word_template *textin P((FILE *infp ));
void load_intx_ctl_file P((char *fname ));

/* change.c */
struct change_list *parse_change P((char *cp ));
struct env_cond *ccenv_parse P((char *line ));
char *apply_cc P((char *buf , struct change_list *cc ));

/* getwd.c */
char *skipwhite P((char *cp ));
char *getwd P((char *cp ));

/* luwfc.c */
void init_luwfc P((void ));
char *add_luwfc P((char *rp , char *ap ));
void set_luwfc P((void ));

/* strcla.c */
void init_scl P((void ));
void add_scl P((char *line ));
struct string_class *find_scl P((char *name ));
int scl_member P((char *string , struct string_class *class ));
int scl_lmatch P((char *string , struct string_class *class ));
int scl_rmatch P((char *string , struct string_class *class ));
void show_scl P((void ));

/* myalloc.c */
void myalloc_fail P((void ));
/* char *myalloc P((unsigned size )); */
char *mystrdup P((char *str ));
char *myrealloc P((char *s , unsigned size ));
char *myshrink P((char *s ));
void myfree P((char *s ));

/* myctype.c */
extern int myisalpha P((int c));
extern int myislower P((int c));
extern int myisupper P((int c));
extern int mytolower P((int c));
extern int mytoupper P((int c));

/* record.c */
void init_record P((FILE *infp , char *rec_mark , int comment_char ));
void free_record P((char *rp ));
char *get_record P((FILE *infp , char *rec_mark , int *rec_read , struct code_table *code_tab ));

/* ufopen.c */
extern FILE *ufopen P((char *filename, char *mode));

#undef P

extern int comment_char;	/* set by a command line option */

/*****************************************************************************
 *	VARIABLES SET BY THE TEXT INPUT CONTROL FILE (xxINTX.CTL)
 */
/*
 *  number of words read from the input file(s)
 */
extern int num_words;
/*
 *  list of format markers to include
 */
extern struct strlist *incl_stdfmt;
/*
 *  list of format markers to exclude
 */
extern struct strlist *excl_stdfmt;
/*
 *  alphabetic characters
 */
extern char *alpha;
/*
 *  output internal capitalization info
 */
extern int incap;
/*
 *  input orthography change table
 */
extern struct change_list *ortho_chg;
/*
 *  "format markers" begin with a special character (normally '\'), and
 *  continue until a whitespace character
 */
extern char forminit;
/*
 *  "bar codes" begin with a special character (normally '|'), and have
 *  one additional character immediately following
 */
extern char barchar;
extern char *barcodes;
/*
 *  do de-capitalization processing flag
 */
extern int do_decap;
/*
 *  ambiguity marker character -- must not be alphabetic
 */
extern ambigchar;
/*
 *  decomposition marker character -- must not be alphabetic
 */
extern decomp_char;

/***************************************************************************
 * NAME
 *    addchar
 * ARGUMENTS
 *    ch    - character to add
 *    pp    - pointer to current location in buffer
 *    pend  - address of pointer to end of buffer
 *    pbuf  - address of pointer to allocated buffer
 *    psize - pointer to size of allocated buffer
 *    pword - address of pointer to buffer
 * DESCRIPTION
 *    Add a character to a buffer, allocating more space as needed.
 *    *pend, *pbuf, *psize, and *pword may be changed as a side-effect.
 * RETURN VALUE
 *    pointer to current location in buffer (updated value of pp)
 */
static char *addchar(ch, pp, pend, pbuf, psize, pword)
int ch;
char *pp;
char **pend;
char **pbuf;
unsigned *psize;
char **pword;
{
unsigned size;
if (pp >= *pend)
    {
    size = *psize;
    *psize = size ? size+BUFSIZE : 2*BUFSIZE;
    *pbuf = strcpy(myalloc(*psize), *pword);
    if (size)
	myfree( *pword );
    *pword = *pbuf;
    pp = *pbuf + strlen(*pbuf);
    *pend = *pbuf + *psize - 1;
    }
*pp++ = ch;
return(pp);
}

/***************************************************************************
 * NAME
 *    decapital
 * ARGUMENTS
 *    wtp  - pointer to word_template structure
 *    size - size of memory chunk already allocated for wtp->orig_word
 * DESCRIPTION
 *    Make the word all lowercase, and allocate space for it.
 * RETURN VALUE
 *    Capitalization flag for the word
 */
static int decapital(wtp, size)
struct word_template *wtp;
unsigned size;
{
char *word;
register char *p;
short numalpha, numupper, flag, bitfield, bitmask;
/*
 *  Capitalization information is stored in a bit field, where the number 4
 *    is the first char, 8 the second, etc. up to the limit of the integer.
 *  If the word has more capitals than just the first letter, but is not all
 *    caps, then the integer representing the bit field is output.
 */
word = wtp->orig_word;
flag = NOCAP;			/* assume not capitalized */
/*
 *  check decapitalization flag
 */
if (do_decap)
   {
   /*
    *  check for all uppercase letters
    */
    bitfield = 0;
    bitmask = 4;
    for ( numalpha = numupper = 0, p = word ; *p != NUL ; ++p )
	{
	if (myisalpha(*p))		/* If alphabetic */
	    {
	    ++numalpha;			/* Count alphabetics */
	    if (myisupper(*p))		/* If upper case */
		{
		bitfield |= bitmask;	/* Or mask into bitfield */
		++numupper;		/* Count upper case */
		}
	    bitmask <<= 1;		/* Shift mask for each alpha char */
	    if ( bitmask < 0 )		/* If it reaches negative */
		bitmask = 0;		/* Zero it */
	    }
	}
   if ((numalpha > 0) && (numupper == numalpha)) /* If all cap */
       flag = ALLCAP;                       /* Set all cap flag */
   else if ( incap && bitfield > 4 )        /* If more than first cap */
       flag = bitfield;                     /* Output bitfield */
   else
       {
       /*
	*  check for initial letter capitalized
	*/
       for ( p = word ; *p != NUL ; ++p )	/* Find initial letter */
	   {
	   if (myisalpha(*p))
	       break;
	   }
       if (myisupper(*p))			/* If initial is capitalized */
	   flag = INITCAP;			/* Set initcap flag */
       }
    /*
     *  convert word to all lower case
     */
    for ( p = word ; *p != NUL ; ++p )
	*p = mytolower(*p);
    }
/*
 *  allocate space for wtp->orig_word, and return the capitalization flag
 */
if (size)                               /* release unused allocated space */
    wtp->orig_word = myshrink(wtp->orig_word /*,size*/ );
else
    wtp->orig_word = strcpy(myalloc((unsigned)strlen(word)+1),word);
return( flag );
}

/***************************************************************************
 * NAME
 *    in_strlist
 * ARGUMENTS
 *    p     - pointer to the string
 *    strlp - pointer to the head of the strlist linked list
 * DESCRIPTION
 *    Check to see if a string is in a string list structure
 * RETURN VALUE
 *    TRUE if the string is in the list, FALSE otherwise
 */
static int in_strlist(p, strlp)
char *p;
struct strlist *strlp;
{
register struct strlist *sp;

for ( sp = strlp ; sp != (struct strlist *)NULL ; sp = sp->slink )
    {
    if (strcmp(p, sp->stri) == 0)
	return(TRUE);
    }
return(FALSE);
}

/***************************************************************************
 * NAME
 *    fill_template
 * ARGUMENTS
 *    infp - pointer to input FILE
 * DESCRIPTION
 *    Parse input text, filling this_word for each word and its context.
 * RETURN VALUE
 *    pointer to statically allocated word_template structure, or NULL if EOF
 */
static struct word_template *fill_template(infp)
FILE *infp;
{
register int nextc;
register char *p;
char *mybuf;
unsigned mysize;
char buffer[BUFSIZE];
static struct word_template this_word;
char *endbuf;
char *stdfmt;
int c;

this_word.format    = (char *)NULL;
this_word.non_alpha = (char *)NULL;
this_word.capital   = NOCAP;
this_word.orig_word = (char *)NULL;
this_word.word      = (char *)NULL;
/*
 *  check for end of file
 */
if (feof(infp))
    return((struct word_template *)NULL);
/*
 *  read until an alphabetic character or format marker
 */
bzero(buffer,BUFSIZE);
mybuf = (char *)NULL;
mysize = 0;
for ( this_word.format = buffer, p = buffer, endbuf = buffer + BUFSIZE - 1 ;;)
    {
    if ((nextc = getc(infp)) == EOF)
	{
	if (p != buffer)
	    goto eof_format;
	else
	    return((struct word_template *)NULL);
	}
    if ((forminit != NUL) && (nextc == forminit))
	{
	/*
	 *  collect the standard format marker itself
	 */
	for (;;)
	    {
	    p = addchar(nextc,p, &endbuf, &mybuf, &mysize, &this_word.format);
	    if ((nextc = getc(infp)) == EOF)
		goto eof_format;
	    if (myisspace(nextc) || (nextc == forminit))
		break;
	    }
	/*
	 *  check the std fmt marker against the wanted/unwanted lists
	 */
	stdfmt = strrchr(this_word.format,forminit);
	if (    ((incl_stdfmt != (struct strlist *)NULL) &&
			!in_strlist(stdfmt,incl_stdfmt) ) ||
		((excl_stdfmt != (struct strlist *)NULL) &&
			in_strlist(stdfmt,excl_stdfmt) ) )
	    {
	    /*
	     *  skip this field by reading until next std fmt marker
	     */
	    while (nextc != forminit)
		{
		p = addchar(nextc,p,&endbuf,&mybuf,&mysize,&this_word.format);
		if ((nextc = getc(infp)) == EOF)
		    {
eof_format:         if (this_word.format == buffer)
			this_word.format =              /* allocate space */
			    strcpy( myalloc((unsigned)strlen(buffer)+1),
						buffer);
		    else                /* release unused allocated space */
			this_word.format = myshrink(this_word.format /*,mysize*/ );
		    return( &this_word );	/* we do have something... */
		    }
		}
	    ungetc(nextc,infp);
	    }
	else
	    p = addchar(nextc,p, &endbuf, &mybuf, &mysize, &this_word.format);

	continue;               /* skip right to beginning of for loop */
	}
    if ((barchar != NUL) && (nextc == barchar))
	{
	if ((nextc = getc(infp)) == EOF)
	    {
	    p = addchar(barchar,p,&endbuf,&mybuf,&mysize,&this_word.format);
	    goto eof_format;
	    }
	if (strchr(barcodes,nextc) != (char *)NULL)
	    {                   /* put the "barcode" into the format field */
	    p = addchar(barchar,p,&endbuf,&mybuf,&mysize,&this_word.format);
	    p = addchar(nextc, p,&endbuf,&mybuf,&mysize,&this_word.format);
	    continue;           /* skip right to beginning of for loop */
	    }
	else
	    {                   /* put the "barchar" into the format field */
	    p = addchar(barchar,p,&endbuf,&mybuf,&mysize,&this_word.format);
	    }                   /* next char is fair game for alphabetic */
	}
    if (strchr(alpha,nextc) != (char *)NULL)
	break;                          /* found something promising */
    else
	p = addchar(nextc, p, &endbuf, &mybuf, &mysize, &this_word.format);
    }
/*
 *  finish fixing standard format markers and leading punctuation
 */
if (p == buffer)
    this_word.format = (char *)NULL;
else
    {                           /* this_word.format has something in it */
    if (this_word.format == buffer)
	this_word.format =      /* allocate space */
		strcpy(myalloc((unsigned)strlen(buffer)+1),buffer);
    else
	this_word.format = myshrink(this_word.format /*,mysize*/ );
    }
/*
 *  now get the word (nextc is the first character)
 */
bzero(buffer,BUFSIZE);
mybuf = (char *)NULL;
mysize = 0;
for ( this_word.orig_word = buffer, p = buffer, endbuf = buffer+BUFSIZE-1 ;;)
    {
    /*
     *  collect the word itself
     */
    p = addchar(nextc, p, &endbuf, &mybuf, &mysize, &this_word.orig_word);
    if ((nextc = getc(infp)) == EOF)
	{
	this_word.capital = decapital(&this_word, mysize);
	return( &this_word );
	}
			/* Break if a bar code is found */
    if ((barchar != NUL) && (nextc == barchar))
	{
	c = getc(infp);                         /* Get next char */
	ungetc(c,infp);                         /* Push it back */
	if (strchr(barcodes,c) != (char *)NULL)  /* If bar code */
	    break;
	}
			/* Break if a non word formation char is found */
    if (strchr(alpha,nextc) == (char *)NULL)
	break;
    }
this_word.capital = decapital(&this_word, mysize);
/*
 *  now we have the word, get trailing punctuation, bar codes, and whitespace
 */
bzero(buffer,BUFSIZE);
mybuf = (char *)NULL;
mysize = 0;
for ( this_word.non_alpha = buffer, p = buffer, endbuf = buffer+BUFSIZE-1 ;;)
    {                           /* If barcode, collect it */
    if ((barchar != NUL) && (nextc == barchar))
	{
	if ((c = getc(infp)) == EOF)	/* Get next char */
	    break;
	if (strchr(barcodes,c) != (char *)NULL)
	    {				/* If bar code, collect it */
	    p = addchar(nextc,p,&endbuf,&mybuf, &mysize, &this_word.non_alpha);
	    p = addchar(c,    p,&endbuf,&mybuf, &mysize, &this_word.non_alpha);
	    if ((nextc = getc(infp)) == EOF)
		break;
	    continue;			/* may have another bar code */
	    }
	else				/* Else (not bar code) */
	    ungetc(c,infp);		/* Push it back */
	}
    if (strchr(alpha,nextc) == (char *)NULL)
	{				/* If non-alphabetic, collect it */
	if (myisspace(nextc))
	    break;
	p = addchar(nextc, p, &endbuf, &mybuf, &mysize, &this_word.non_alpha);
	if ((nextc = getc(infp)) == EOF)
	    break;
	}
    else                        /* Else (alphabetic) break */
	break;
    }
				/* Collect trailing whitespace */
while (myisspace(nextc))
    {
    p = addchar(nextc, p, &endbuf, &mybuf, &mysize, &this_word.non_alpha);
    if ((nextc = getc(infp)) == EOF)
	break;
    }

if (nextc != EOF)               /* Unget next char for next time */
    ungetc(nextc,infp);

if (p == buffer)
    this_word.non_alpha = (char *)NULL;
else
    {                           /* this_word.non_alpha has something in it */
    if (this_word.non_alpha == buffer)
	this_word.non_alpha = mystrdup(buffer);		/* allocate space */
    else
	this_word.non_alpha = myshrink(this_word.non_alpha /*,mysize*/);
    }
return( &this_word );
} /* end fill_template() */

/***************************************************************************
 * NAME
 *    textin
 * ARGUMENTS
 *    infp - pointer to input FILE
 * DESCRIPTION
 *    read a word of text into a word_template structure
 * RETURN VALUE
 *    pointer to dynamically allocated word_template structure, or NULL if EOF
 */
struct word_template *textin(infp)
FILE *infp;
{
char buffer[BUFSIZE];
register struct word_template *wtp, *w;

wtp = fill_template(infp);
if (wtp != (struct word_template *)NULL)
    {
    w = (struct word_template *)myalloc( sizeof(struct word_template) );
    w->format    = wtp->format;
    w->non_alpha = wtp->non_alpha;
    w->capital   = wtp->capital;
    w->orig_word = wtp->orig_word;
    if (w->orig_word != (char *)NULL)
	{
	/*
	 *  apply orthography change to the input word
	 */
	w->word = apply_cc(w->orig_word, ortho_chg );
	++num_words;
	}
    return( w );
    }
else
    {
    if (!isatty(fileno(stdout))) /* If log file, put word count on screen */
	fprintf(stderr, "\nINPUT: %d word%s processed.\n",
			num_words, (num_words==1) ? "" : "s" );
    return((struct word_template *)NULL);
    }
}

#ifdef SPANISH
#define USING_AS_ALPHA "Usando los siguientes como alfabeticos:"
#define SPACE "espacio"
#define DELETE "delete"
#define CONTROL "CTRL"
#else
#define USING_AS_ALPHA "Using the following as word-formation characters:"
#define SPACE "space"
#define DELETE "delete"
#define CONTROL "CTRL"
#endif

/***************************************************************************
 * NAME
 *    get_fields
 * ARGUMENTS
 *    cp    - pointer to line containing fields
 *    strlp - pointer to head of string list
 * DESCRIPTION
 *    Add each format marker found to the given list.
 * RETURN VALUE
 *    none
 */
static void get_fields(cp, strlp)
char *cp;
struct strlist **strlp;
{
char *end;
struct strlist *slp;

for ( end = getwd(cp) ; *cp != NUL ; end = getwd(cp) )
    {			/* while there are words on the line */
    if (*cp == '\\')	/* if it is a sfm, allocate strlist struct */
	{	
	slp = (struct strlist *)myalloc( sizeof(struct strlist));
	slp->stri = mystrdup( cp );
	slp->slink = *strlp;
	*strlp = slp;
	}               
    cp = end;			/* move to next word on the line */
    }
} /* end get_fields */

/***************************************************************************
 * NAME
 *    setup_textin
 * ARGUMENTS
 *    recp - pointer to record loaded by set_record(), or NULL
 * DESCRIPTION
 *    Set the control variables for text input from the record pointed to by
 *    recp, or if recp is NULL, set the default values.
 * RETURN VALUE
 *    none
 */
static void setup_textin(recp)
char *recp;
{
register char *rp;              /* internal record pointer */
char *end;
char wordform[256];		/* pretend to support 8-bit characters */
register char *ap;              /* alphabetics pointer */
int k;
int code;
struct change_list *tail, *cc;
char bars[256];                 /* store barcode characters */
static char errhead[] = "\nSETUP TEXT: ";
short seen_ambig, seen_barchar, seen_dsc, seen_format;
/*
 *  initialize the lower-upper case lists.
 */
init_luwfc();
/*
 *  set the default alphabetic characters
 */
for ( ap = wordform, k = 1 ; k < 128 ; ++k )
    {
    if (isalpha(k))
	*ap++ = k;
    }
*ap = '\0';                     /* terminate the list */
/*
 *  more initialization
 */
ortho_chg = tail = (struct change_list *)NULL;
incl_stdfmt = excl_stdfmt = (struct strlist *)NULL;
bzero(bars,256);
seen_ambig = seen_barchar = seen_dsc = seen_format = 0;
/*
 *  get the information from the record
 */
for ( rp = recp ; (rp != (char *)NULL) && (*rp != EOR) ; )
    {
    code = *rp++;		/* grab the table code */
    rp = skipwhite(rp);         /* skip following whitespace */
    switch (code)
	{
	case 'A':		/* alphabetic (word formation) characters */
	    if (*rp == NUL)
		printf("%sEmpty word formation character field", errhead );
	    else
		{
		while (*rp != NUL)
		    {
		    k = (*rp++) & 0377;
		    if (    !myisspace(k) &&
			    (strchr(wordform,k)==(char *)NULL) )
			{
			*ap++ = k;	/* add alphabetic character */
			*ap = NUL;
			}
		    }
		}
	    break;

	case 'K':               /* No internal recapitalization */
	    incap = FALSE;
	    break;

	case 'L':		/* lower-upper word formation characters */
	    ap = add_luwfc(rp, ap);
	    break;

	case 'N':               /* definition of include field */
	    if (*rp == NUL)
		printf("%sEmpty include field", errhead );
	    else if (excl_stdfmt != (struct strlist *)NULL)
		printf("%sIgnoring include field following an exclude field",
					errhead );
	    else
		{
		end = rp + strlen(rp);
		get_fields(rp, &incl_stdfmt);
		rp = end;
		}
	    break;

	case 'X':		/* definition of exclude field */
	    if (*rp == NUL)
		printf("%sEmpty exclude field", errhead );
	    else if (incl_stdfmt != (struct strlist *)NULL)
		printf("%sIgnoring exclude field following an include field",
					errhead );
	    else
		{
		end = rp + strlen(rp);
		get_fields(rp, &excl_stdfmt);
		rp = end;
		}
	    break;

	case 'C':		/* orthography change */
	    end = rp + strlen(rp);
	    cc = parse_change(rp);
	    if (cc != (struct change_list *)NULL)
		{               /* link change to end of list */
		if (ortho_chg == (struct change_list *)NULL)
		    ortho_chg = cc;
		else
		    tail->cl_link = cc;
		tail = cc;
		}
	    rp = end;
	    break;

	case 'S':               /* string class definition */
	    end = rp + strlen(rp);
	    add_scl(rp);	/* load the string class definition */
	    rp = end;
	    break;

	case 'a':		/* format character (\format) */
	    if (seen_format++)
		printf("%sFormat field already seen - ignoring this one",
					errhead );
	    else
		forminit = *rp;         /* NUL => no format markers */
	    break;

	case 'b':		/* bar character (\barchar) */
	    if (seen_barchar++)
		printf(
		    "%sBar character field already seen - ignoring this one",
					errhead );
	    else
		barchar = *rp;		/* NUL => no bar code formatting */
	    break;

	case 'c':		/* bar codes (\barcodes) */
	    if (*rp == NUL)
		printf("%sEmpty bar codes field", errhead );
	    else
		{
		while (*rp != NUL)
		    {
		    end = getwd(rp);
		    strcat(bars,rp);	/* keep adding characters */
		    rp = end;
		    }
		}
	    break;

	case 'd':		/* ambiguity marker character (\ambig) */
	    if (*rp == NUL)
		printf("%sEmpty ambiguity marker field - using '%c'",
					errhead, ambigchar );
	    if (seen_ambig++)
		printf("%sAmbiguity marker already seen - ignoring this one",
					errhead );
	    else if (*rp != NUL)
		ambigchar = *rp;	/* have to have one defined! */
	    break;

	case 'e':		/* morpheme decomposition separation char */
	    if (*rp == NUL)
		printf("%sEmpty decomposition separator field - using '%c'",
					errhead, decomp_char );
	    if (seen_dsc++)
		printf(
		 "%sDecomposition separator already seen - ignoring this one",
					errhead );
	    else if (*rp != NUL)
		decomp_char = *rp;	/* have to have one defined! */
	    break;

	case 'f':		/* do no capitalization processing */
	    do_decap = FALSE;
	    break;

	} /* end switch */
    while (*rp++ != NUL)	/* skip rest of this entry in the record */
	;
    } /* end for */
/*
 *  show the alphabetic characters
 */
fflush(stdout);
fprintf(stderr, "\n\t%s\n\t", USING_AS_ALPHA );
for ( ap = wordform ; *ap != NUL ; )
    {
    k = *ap++ & 0377;
    if (k < ' ')
	fprintf(stderr, "<%s/%c>", CONTROL, k | 0100 );
    else if (k == ' ')
	fprintf(stderr, "<%s>", SPACE);
    else if (k < 0177)
	fprintf(stderr, "%c", k );
    else if (k == 0177)
	fprintf(stderr, "<%s>", DELETE );
#ifdef GENERIC_TERMINAL
    else if (k < 0240)
	fprintf(stderr, "<0x80+%s/%c>", CONTROL, (k & 0177) | 0100 );
    else if (k == 0240)
	fprintf(stderr, "<0x80+%s>", SPACE);
    else if (k < 0377)
	fprintf(stderr, "<0x80+%c>", k & 0177 );
    else if (k == 0377)
	fprintf(stderr, "<0x80+%s>", DELETE);
#else
    else
	fprintf(stderr, "%c", k );
#endif /* GENERIC_TERMINAL */
    }
fprintf(stderr,"\n");
/*
 *  set the word initial characters
 */
alpha = myalloc( (unsigned)strlen(wordform)+1 );
strcpy(alpha,wordform);
/*
 *  handle special lower/upper pairs
 */
set_luwfc();
/*
 *  set the bar codes if any were specified
 */
if (bars[0] != NUL)
    barcodes = strcpy(myalloc((unsigned)strlen(bars)+1),bars);
}

/*****************************************************************************
 * NAME
 *    load_intx_ctl_file
 * ARGUMENTS
 *    fname - filename of text input control file
 * DESCRIPTION
 *    Load a text input control file into memory.
 * RETURN VALUE
 *    none
 */
void load_intx_ctl_file( fname )
char *fname;
{
FILE *fp;
char *rp;
int rec_read;
char fake_record[2];
/*
 *  text input control code table
 */
static struct code_table text_codes =
    {
    "\\wfc\0A\0\\luwfc\0L\0\\noincap\0K\0\\incl\0N\0\\excl\0X\0\\ch\0C\0\
\\scl\0S\0\\format\0a\0\\barchar\0b\0\\barcodes\0c\0\\ambig\0d\0\\dsc\0e\0\
\\nocap\0f",
    13
    };

if (strcmp(fname,"-")==0)
    {
    /*
     *  special filename "-" means no text input control file
     */
    fake_record[0] = EOR;
    fake_record[1] = NUL;
    setup_textin(fake_record);
    return;
    }
fp = ufopen(fname, "r");
init_record(fp, (char *)NULL, comment_char);
if (rp = get_record(fp, (char *)EOF, &rec_read, &text_codes))
    {
    setup_textin(rp);	     /* set values for wfc, scl, incl_stdfmt, etc. */
    free_record(rp);
    }
fclose(fp);
}
