/*      DTBIN.C - read the next record from an AMPLE output file
 ***************************************************************************
 *
 *      void init_dtbin(infp,ochg,ambig,decomp)
 *      FILE *infp;
 *      struct change_list *ochg;
 *      int ambig;
 *      int decomp;
 *
 *      struct word_template *dtbin()
 *
 *      void free_word( word )
 *      struct word_template *word;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	 5-MAR-82	D. Weber/Bob Kasper
 *	19-MAR-85	hab/djw
 *	23-Sep-85	SRMc
 *	31-Jul-86	hab
 *	30-Mar-88	hab - no ctrl-x
 *       2-May-88       SRMc - fflush(stdout) before writing to stderr
 *      23-May-88       SRMc - implement code_table structure
 *       2-Jun-88       SRMc - remove new_word[256] field from template
 *                              structure
 *      28-Jul-88       SRMc - replace ssalloc() with malloc() and realloc()
 *       6-Sep-88       SRMc - fix for \a being first field rather than \w
 *                           - replace '%' with ambigchar
 *                           - bunches of bug fixes
 *      28-Sep-88       SRMc - always allocate memory for this_word->orig_word
 *                           - use \w field contents if it exists
 *      21-Oct-88       SRMc - remove register from function parameter
 *                              declarations
 *                           - reorganize the file header comments
 *      24-Oct-88       SRMc - allow ambiguity count of 0 (zero) for failures
 *      26-Oct-88       SRMc - modify decode() for new encoded() in DTBOUT.C
 *      10-Nov-88       SRMc - replace free() with myfree()
 *      20-May-89       SRMc - revised for STAMP to remove external global
 *                              variables, and to include "template.h"
 *                           - added init_dtbin()
 *                           - debugged decode()
 *      13-Jul-89       hab  - de-"lint" the source
 * 1.0j  9-Mar-90 ALB Set up for sentence transfer of multiple words
 *                          Allocate this_word, make separate free_word()
 * 1.1b 29-Jun-90 BK/ALB Fix for portability to MAC, add string.h
 *	17-Jan-91	SRMc - add extern declarations for atoi() and
 *				free_record()
 *			     - add #ifdef BSD for <strings.h>
 *			     - change index() to strchr() throughout
 *			     - add ANSI-fied function prototypes
 *	19-Jan-91	SRMc - handle \cat and \fd fields output by AMPLE 1.6
 *	 7-Feb-91	SRMc - fill in ->orig_word only if \w field exists
 *			     - fill in ->word only from failure in \a field
 *			     - eliminate reconstruct_word() - no longer used
 *	11-Mar-91	SRMc - remove #define strchr -- it's in OPACLIB.H
 *	 3-Jan-92	SRMc - change argument list for apply_cc()
 ***************************************************************************
 * Copyright 1988, 1992 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#ifdef BSD
#include <strings.h>
#else
#include <string.h>
#endif
#include "opaclib.h"
#include "template.h"
#include "change.h"
#include "codetab.h"
#include "strlist.h"

#ifdef __STDC__
#define P(s) s
#else
#define P(s) ()
#endif

/* standard library functions */
#ifndef DJGPP
extern int atoi P((char *string));
#endif

/* change.c */
extern char *apply_cc P((char *buf , struct change_list *cc ));

/* myallo.c */
extern char *mystrdup P((char *str ));
extern void myfree P((char *s ));

/* getwd.c */
extern char *skipwhite P((char *cp ));

/* record.c */
extern void init_record P((FILE *infp , char *rec_mark , int comment_char ));
extern char *get_record P((FILE *infp , char *rec_mark , int *rec_read ,
			   struct code_table *code_tab ));
extern void free_record P((char *rp ));

/* strlist.c */
extern void free_strlist P((struct strlist *list ));

/* zapnl.c */
extern char *zapnl P((char *s ));

#undef P

/***************************************************************************
 *   Data internal to dtbin()
 */
/*
 *  code table, record marker, and record counter for feeding to get_record()
 */
static struct code_table incode_tab =	/* code table for AMPLE output */
    { "\\a\0A\0\\c\0C\0\\d\0D\0\\f\0F\0\\n\0N\0\\w\0W\0\\cat\0T\0\\fd\0E", 8 };
static char *rec_mark = "\\a";
static int rec_read = 0;
/*
 *  word_template buffer for storing the information (address returned)
 */
static struct word_template *this_word;
/*
 *  parameters established by init_dtbin() and used throughout this code
 */
static FILE *dtbinfp;			/* input FILE pointer */
static struct change_list *ortho_chg;	/* input orthography change table */
static char ambig_char;			/* marks ambiguities and failures */
static char decomp_char;		/* morpheme decomposition separator */

/***************************************************************************
 * NAME
 *    decode
 * ARGUMENTS
 *    string - pointer to string contained encoded field
 * DESCRIPTION
 *    restores special graphic chars in format fields of template
 * RETURN VALUE
 *    pointer to modified (shorter) string
 */
static char *decode( string )
char *string;
{
register char *newp, *oldp;
register int c;
/*
 *  pass over string from left to right
 *    oldp moves on ahead, pointing to the next character to convert
 *    newp lags behind, pointing to the characters that have been converted
 */
for ( newp = oldp = string ; (c = *oldp++) != NUL ; )
    {
    if (c == '\\')
	{
	c = *oldp++;		/* look at the next character */
	switch ( c )
	    {
	    case 'b':	*newp++ = '\b';	break;
	    case 'f':	*newp++ = '\f';	break;
	    case 'n':	*newp++ = '\n';	break;
	    case 'r':	*newp++ = '\r';	break;
	    case 't':	*newp++ = '\t';	break;
	    case NUL:   *newp   = NUL;  return( string );
	    default:    *newp++ = c;            /* copy other chars */
	    }
	}
    else if ((c == '\n') || (c == '\t'))
	continue;			/* ignore newlines and tabs */
    else
	*newp++ = c;			/* copy anything else */
    }
*newp = NUL;
return( string );
} /* end decode */

#if 0
/***************************************************************************
 * NAME
 *    reconstruct_word
 * ARGUMENTS
 *    decomp - pointer to decomposition string
 * DESCRIPTION
 *    Reconstruct the orthochanged word and original word from a
 *    decomposition string.
 * RETURN VALUE
 *    none
 */
static void reconstruct_word(decomp)
char *decomp;
{
register char *w, *d;
char ortho_buf[BUFSIZE];		/* buffer for orthochange */

d = decomp;
this_word->word = w = myalloc((unsigned)strlen(d)+1);
while (*d != NUL)
    {
    if ((*d != decomp_char) && (*d != '0'))	/* skip decomp and nulls */
	*w++ = *d++;
    else
	++d;
    }
*w = NUL;
/*
 *  reconstruct the original word
 */
if (this_word->orig_word == (char *)NULL)
    {
    this_word->orig_word = apply_cc(decomp, ortho_chg);
    for ( w = d = this_word->orig_word ; *d != NUL ; ++d )
	{
	if (*d != decomp_char)
	    *w++ = *d;
	}
    }
*w = NUL;
}
#endif

/***************************************************************************
 * NAME
 *    listin
 * ARGUMENTS
 *    code - 'A' for analysis field, 'D' for decomposition field, 'T' for
 *		category field, or 'E' for feature field
 *    rp   - pointer to contents of field
 * DESCRIPTION
 *    Input a strlist structure from an analysis database records
 * RETURN VALUE
 *    pointer to created strlist structure
 */
static struct strlist *listin(code,rp)
int code;
char *rp;
{
struct strlist *list;
register struct strlist *slp;
register char *pos;
int n;

list = NULL;
if ((pos = strchr(rp,'\n')) != (char *)NULL)
    *pos = NUL;				/* remove any trailing newline */
if (*rp == ambig_char)
    {
    if (pos = strchr(++rp, ambig_char))	/* terminate ambiguity count */
	*pos++ = NUL;
    n = atoi(rp);
    if (pos)				/* advance past ambiguity count */
	rp = pos;
    if (n <= 1)				/* either %0% or %1% for failures */
	{
	if (pos = strchr( rp, ambig_char))
	    *pos++ = NUL;
#if 0
	if ((code == 'A') && (this_word->orig_word == (char *)NULL))
	    this_word->orig_word = mystrdup( rp );       /* save failure */
	if (code == 'D')
	    reconstruct_word(rp);	/* extract word from decomposition */
#else
	if ((code == 'A') && (this_word->word == (char *)NULL))
	    this_word->word = mystrdup( rp );       /* save failure */
#endif
	}
    else
	{				/* ambiguity */
	while (n--)
	    {
	    if (pos = strchr( rp, ambig_char))
		*pos++ = NUL;
	    slp = (struct strlist *)myalloc( sizeof(struct strlist));
	    slp->stri = strcpy(myalloc((unsigned)strlen(rp)+1), rp);
	    slp->slink = list;
	    list = slp;
	    if (pos)
		rp = pos;
	    }
	}
    }
else
    {				/* single analysis */
    list = (struct strlist *)myalloc( sizeof(struct strlist));
    list->stri = strcpy(myalloc((unsigned)strlen(rp)+1), rp);
    list->slink = NULL;
    }
return(list);
} /* end listin */

/***************************************************************************
 * NAME
 *    dtbin
 * ARGUMENTS
 *    none
 * DESCRIPTION
 *    Read one record from an analysis database file, storing the
 *    information in a static word_template struct.  The records in the
 *    analysis database file have these fields in this order:
 *                      \a   = analysis (ambiguities and failures marked)
 *    (optional)        \d   = morpheme decomposition (goes with \a)
 *    (optional)	\cat = final word category
 *    (optional)	\fd  = morpheme feature list (goes with \a)
 *    (optional)        \w   = original word
 *    (if needed)       \f   = preceding format marks
 *    (if needed)       \c   = capitalization
 *    (if needed)       \n   = trailing nonalphabetics (encoded)
 * RETURN VALUE
 *    pointer to the static word_template struct, or NULL on EOF
 */
struct word_template *dtbin()
{
char *rp, *rp1;
char *recp;
int code;

/* Allocate and clear a word structure */
this_word = structalloc( word_template );
this_word->word = (char *)NULL;
this_word->orig_word = (char *)NULL;
this_word->format = (char *)NULL;
this_word->non_alpha = (char *)NULL;
this_word->capital   = 0;
this_word->anlist = (struct strlist *)NULL;
this_word->dclist = (struct strlist *)NULL;
this_word->new_words = (struct strlist *)NULL;

/*
 *  read the next record, if there's any left to read
 */
if ((recp=get_record(dtbinfp,rec_mark,&rec_read,&incode_tab))==(char *)NULL)
    return((struct word_template *)NULL);
/*
 *  fill in fields according to codes in record
 */
rp = recp;
while (*rp != EOR)
    {
    code = *rp++;		/* field code is first char on line */
    rp = skipwhite(rp);		/* always followed by whitespace */
    rp1 = rp + strlen(rp);	/* point to the end of the field */
    zapnl(rp);			/* remove any trailing '\n's */
    switch (code)               /* fill in this_word according to code */
	{
	case 'A':		/* analysis */
		this_word->anlist = listin(code,rp);
		break;
	case 'D':		/* morpheme decomposition */
		this_word->dclist = listin(code,rp);
		break;
	case 'W':		/* word */
		if (this_word->orig_word != (char *)NULL)
		    myfree(this_word->orig_word);
		this_word->orig_word = mystrdup( rp );
		break;
	case 'C':		/* capitalizaton */
		this_word->capital = atoi(rp);
		break;
	case 'F':		/* format */
		this_word->format = mystrdup( decode( rp ) );
		break;
	case 'N':		/* non_alphabetics */
		this_word->non_alpha = mystrdup( decode( rp ) );
		break;
	case 'T':		/* category */
		this_word->catlist = listin(code,rp);
		break;
	case 'E':		/* feature list */
		this_word->fdlist = listin(code,rp);
		break;
	} /* end switch */
    rp = rp1;			/* pass over rest of line */
    if (*rp == NUL)
	++rp;
    } /* end of record */
#if 0
/*
 *  reconstruct the original word and orthochanged word
 */
if (this_word->dclist)
    reconstruct_word( this_word->dclist->stri );
#endif

free_record(recp);              /* release the space */

return( this_word );
}

/***************************************************************************
 * NAME
 *    init_dtbin
 * ARGUMENTS
 *    ochg   - orthography change to apply to incoming stuff
 *    ambig  - ambiguity marker for \a and \d fields
 *    decomp - decomposition character for \d fields
 * DESCRIPTION
 *    Initialize for subsequent calls to dtbin().  This must be called
 *    once per input file, so that init_record() can be called.
 * RETURN VALUE
 *    none
 */
void init_dtbin(infp,ochg,ambig,decomp)
FILE *infp;
struct change_list *ochg;
int ambig;
int decomp;
{
dtbinfp = infp;			/* save for future use by dtbin */
ortho_chg = ochg;
ambig_char = ambig;
decomp_char = decomp;

rec_read = 0;			/* we'll be ready for first record */

init_record(dtbinfp, rec_mark, NUL);	/* prepare get_record() */
}


/***************************************************************************
 * NAME
 *    free_word
 * ARGUMENTS
 *    word - word_template structure to free
 * DESCRIPTION
 *    Free everything in a word_template structure, and the structure.
 * RETURN VALUE
 *    None
 */
void free_word( word )
struct word_template *word;
{

if ( word->word )
    myfree( word->word );
if ( word->orig_word )
    myfree( word->orig_word );
if ( word->format )
    myfree( word->format );
if ( word->non_alpha )
    myfree( word->non_alpha );
if ( word->anlist )
    free_strlist( word->anlist );
if ( word->dclist )
    free_strlist( word->dclist );
if ( word->new_words )
    free_strlist( word->new_words );
myfree( (char *)word );
}
