/*      RECORD.C - read records in standard format database format
 ***************************************************************************
 *
 *	void init_record(infp, rec_mark, comment_char)
 *	FILE *infp;
 *	char *rec_mark;
 *	char comment_char;
 *
 *	char *get_record(infp, rec_mark, rec_read, code_tab)
 *	FILE *infp;
 *	char *rec_mark;
 *	int *rec_read;
 *	struct code_table *code_tab;
 *
 *	free_record(rp)
 *	char *rp;
 * 
 *	char rec_comment_char;		globally settable comment character
 ***************************************************************************
 *	EDIT HISTORY
 *	15-MAR-83	D. Weber/Bob Kasper
 *       3-MAY-85       br/hab/djw
 *	26-Sep-85	SRMc
 *	30-Oct-86	hab - tab processing bug
 *	24-Mar-88	hab - remove CTRL-X as EOR
 *	19-Apr-88	hab - fix bug in ! processing
 *	 2-May-88	SRMc - add end-of-line comment processing using global
 *				rec_comment_char
 *	23-May-88	SRMc - implement code_table structure
 *	30-May-88	SRMc - move declaration of rec_comment_char to
 *				anroot.c
 *	 3-Jun-88	SRMc - reorder tests in delete_null() to always call
 *				find_code()
 *	14-Jun-88	SRMc - use <ctype.h> macros
 *			     - revise eor_chk() to use ftell()/fseek() rather
 *				than multiple ungetc()'s
 *			     - pass '\n's through in records
 *	28-Jul-88	SRMc - replace ssalloc() with malloc() and realloc()
 *	18-Oct-88	SRMc - prevent dereferencing NULL pointer in eor_chk()
 *	20-Oct-88	SRMc - remove register from function parameter
 *				declarations
 *			     - reorganize the file header comments
 *                           - use explicit lookahead buffer instead of using
 *                              fseek() in eor_chk()
 *                              [fix for braindead Aztec C function]
 *                              this wreaks havoc everywhere, since now a
 *                              standard format database file must be handled
 *                              in its entirety by get_record() and consume()
 *                              before processing another such file
 *                           - consume() always called with EOR, so remove
 *                              that as a parameter
 *                           - rename recfree() to free_record, don't allow
 *                              free(NULL) any more
 *                           - add init_record() as global function, make
 *                              consume() private
 *      10-Nov-88       SRMc - replace free() with myfree()
 *      20-May-89       SRMc - change #include's for shared use by STAMP
 *                           - revised init_record() to include comment_char
 *      13-Jul-89       hab  - de-"lint" the source
 *      27-Jul-89       hab  - add Copyright 1989
 *      01-Aug-89       ab/hab - Define myisspace(), myisdigit(), myispunct()
 *                               to handle 8-bit characters
 * 1.6a 21-Jun-90 BK  Fix up for THINKC on MAC
 * 1.1b 29-Jun-90 BK/ALB Fix for portability to MAC, add string.h
 * 1.1c 29-Jun-90 ALB Fix bug of 8-bit fail in envir, add myisspace
 * 1.6c 20-Jul-90 ALB Fixes to andata.h and record.c from Steve McC
 *	17-Jan-91	SRMc - make free_record() explicitly void
 *			     - make static consume() explicitly void
 *			     - use bzero() or memset() for zero()
 *			     - use strcmp() instead of streq()
 *			     - add ANSI-fied extern function declarations
 *	29-Jan-91	SRMc - merged in AMPLE 1.6f sources
 ***************************************************************************
 * Copyright 1988, 1991 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */
#include <stdio.h>
#include <ctype.h>
#ifndef isascii
#define isascii(x) (!(x & ~0177))
#endif
#ifdef BSD
#include <strings.h>
extern void bzero();
#define zero bzero
#else
#include <string.h>
#ifndef THINK_C
#include <memory.h>
#endif
#define zero(b,n) memset(b,0,n)
#endif

#include "opaclib.h"		/* common constants */
#include "codetab.h"		/* code_table structure */

#ifdef __STDC__
#define P(s) s
#else
#define P(s) ()
#endif

/* record.c */
void init_record P((FILE *infp , char *rec_mark , int comment_char ));
void free_record P((char *rp ));
char *get_record P((FILE *infp , char *rec_mark , int *rec_read , struct code_table *code_tab ));

/* myallo.c */
/* char *myalloc P((unsigned size )); */
void myfree P((char *s ));

#undef P

static char rec_comment_char = '|';	/* comment marker for input records */

#define FIELDTERM '\\'		/* begins field markers */
#define MARKERSIZE 80		/* max size for field marker */
#define RECSIZE 400		/* size of memory to allocate in trying
				 * to load a record */

char recspace[RECSIZE];

static int eof_just_found = FALSE;
static int eor_just_found = FALSE;
static int rsize;
static char *lookahead;
static char lookbuf[MARKERSIZE];

/***************************************************************************
 * NAME
 *    copy
 * ARGUMENTS
 *    out - pointer to output buffer
 *    in  - pointer to input buffer
 *    n   - number of bytes to copy
 * DESCRIPTION
 *    Copy exactly n bytes from in to out
 * RETURN VALUE
 *    pointer past end of output buffer
 */
static char *copy( out, in, n )
char *out, *in;
int n;
{
while (n--)
    *out++ = *in++;
return( out );
}

/***************************************************************************
 * NAME
 *    find_code
 * ARGUMENTS
 *    lp   - pointer to a line of the dictionary record
 *    ctab - pointer to table of field codes
 * DESCRIPTION
 *    Replace field codes with a one char internal form, looking them up in
 *    ctab.
 * RETURN VALUE
 *    one char code, or NULL if code is not in table.
 */
static int find_code(lp, ctab)
char *lp;
struct code_table *ctab;
{
   register char *match, *cp, *rp;
   char *subs, fcode[MARKERSIZE];
   int ncodes;
   int code;

   /* field code in fcode, and replace original code with spaces */
   subs = "";
   cp = fcode;
   *cp++ = FIELDTERM;		/* insert backslash that was lost earlier */
   for (rp = lp;
	(*rp!=NUL) && !myisspace(*rp) && (cp<fcode+MARKERSIZE);
        *rp++ = ' ')
      *cp++ = *rp;
   *cp = '\0';
   /* look up fcode in ctab */
   for (match = ctab->ct_table, ncodes = ctab->ct_size ; ncodes; ncodes--) {
      subs = match + strlen(match) + 1;
      if (strcmp(match, fcode)==0)
	 break;
      match = subs + strlen(subs) + 1;
      }

   if (ncodes)  /* code found */
      /* replace first char of line by new code */
      code = *lp = *subs;
   else code = FALSE;
   return(code);

} /* end find_code */

/***************************************************************************
 * NAME
 *    delete_null
 * ARGUMENTS
 *    lp       - pointer to a record line
 *    count    - pointer to count of bytes in the record, to be subtracted
 *		 from if a line is eliminated
 *    code_tab - pointer to code table
 * DESCRIPTION
 *    Eliminate null fields and fields not found in code_tab from a record.
 *    A field is null if first char is white-space, or if the field code is
 *    followed only by white space, or is an "ignore record" marker.
 * RETURN VALUE
 *    lp if field is to be deleted, pointer past end of line otherwise.
 */
static char *delete_null(lp, count, code_tab)
char *lp;
int *count;
struct code_table *code_tab;
{
register char *cp;

for ( cp = lp ; (*cp!=NUL) && !myisspace(*cp) ; ++cp )
    ;					/* skip over code */

if ((cp == lp) || !find_code(lp, code_tab) || (*lp == '!') || (*cp == NUL))
    {
    *count -= strlen(lp) + 1;
    return(lp);
    }
else
    return(lp + strlen(lp) + 1);

} /* end delete_null */

/***************************************************************************
 * NAME
 *    rgetc
 * ARGUMENTS
 *    infp     - pointer to the input file stream
 * DESCRIPTION
 *    Get the next record input character.
 * RETURN VALUE
 *    character from either lookahead buffer or input file
 */
static int rgetc(infp)
FILE *infp;
{
if (lookahead != (char *)NULL)
    {
    if (*lookahead != NUL)
	return( *lookahead++ & 0377 );
    else
	{
	lookahead = NULL;
	return( getc(infp) );
	}
    }
else
    return( getc(infp) );
}

/***************************************************************************
 * NAME
 *    eor_chk
 * ARGUMENTS
 *    c        - current character
 *    infp     - pointer to the input file stream
 *    rec_mark - pointer to the record marker string
 * DESCRIPTION
 *    Compare the input to the record marker in order to find the end of the
 *    record.
 * RETURN VALUE
 *    EOR if the end of record has been found, c otherwise
 */
static int eor_chk(c, infp, rec_mark)
int c;
FILE *infp;
char *rec_mark;
{
register char *cp;
register int ch;
register char *p;
char buffer[MARKERSIZE];

if (rec_mark == (char *)EOF)
    {				/* check for end of file */
    if (c == EOF)
	{
	c = EOR;		/* return End Of Record code */
	eor_just_found = TRUE;
	}
    }
else if (c == FIELDTERM)
    {
    /*
     *  since we have a field code, check to see if it's the one wanted
     *  read as much as needed to verify the field code
     */
    p = buffer;
    *p++ = c;
    cp = rec_mark + 1;		/* skip the FIELDTERM */
    while ((*cp != NUL) && ((ch = rgetc(infp)) != EOF))
	{
	*p++ = ch;
	if (*cp != ch)
	    break;
	++cp;			/* read as long as field code matches */
	}
    *p = NUL;
    if (*cp == NUL)
	{
	ch = rgetc(infp);
	if (ch != EOF)
	    {
	    *p++ = ch;
	    *p = NUL;
	    }
	if ((ch == EOF) || myisspace(ch))
	    {			/* found a record marker */
	    c = EOR;		/* return End Of Record code */
	    eor_just_found = TRUE;
	    p = buffer;		/* include the FIELDTERM */
	    }
	else
	    p = &buffer[1];	/* ignore the FIELDTERM */
	}
    else
	p = &buffer[1];		/* ignore the FIELDTERM */
    /*
     *  restore the previous input file location
     */
    if (*p)
	lookahead = strcpy(lookbuf, p);
    else
	lookahead = (char *)NULL;
    }
return(c);

} /* end eor_chk */

/***************************************************************************
 * NAME
 *    consume
 * ARGUMENTS
 *    infp     - pointer to input FILE
 *    rec_mark - pointer to record marker string
 * DESCRIPTION
 *    consume chars from infp until EOR or EOF
 * RETURN VALUE
 *    none
 */
static void consume(infp, rec_mark)
FILE *infp;
char *rec_mark;
{
register int c;
register int lastc;

for ( lastc = '\n' ; (c = rgetc(infp)) != EOF ; lastc = c )
    {
    if (    (c == FIELDTERM) &&
	    (lastc == '\n') &&
	    ((c = eor_chk(c, infp, rec_mark)) == EOR) )
	break;
    }

if (c == EOF)
    eof_just_found = TRUE;

} /* end consume */

/***************************************************************************
 * NAME
 *    init_record
 * ARGUMENTS
 *    infp     - pointer to input FILE
 *    rec_mark - pointer to the record marker string
 * DESCRIPTION
 *    Initialize, preparing for get_record()
 * RETURN VALUE
 *    pointer to buffer containing record.
 */
void init_record(infp, rec_mark, comment_char)
FILE *infp;
char *rec_mark;
char comment_char;
{
lookahead = (char *)NULL;	/* new file, no lookahead possible */
if (rec_mark != (char *)NULL)
    consume(infp, rec_mark);	/* skip to the initial record marker */
rec_comment_char = comment_char;	/* establish comment marker */
}

/***************************************************************************
 * NAME
 *    free_record
 * ARGUMENTS
 *    rp - pointer to record buffer
 * DESCRIPTION
 *    Give back space allocated for a record.
 *    This assumes that get_record was called only once prior to its call
 *    (rsize will be incorrect if two consecutive calls to get_record are made
 *    without an intervening call to free_record)
 * RETURN VALUE
 *    none
 */
void free_record(rp)
char *rp;
{
if ((rp != (char *)NULL) && (rp != recspace))	/* don't free basic space */
    myfree(rp );
}

/***************************************************************************
 * NAME
 *    get_record
 * ARGUMENTS
 *    infp     - pointer to input FILE
 *    rec_mark - pointer to the record marker string
 *    rec_read - pointer to the number of records read, including "do not
 *		  load" ones
 *    code_tab - pointer to field code table
 * DESCRIPTION
 *    Read data until EOR (or EOF).
 * RETURN VALUE
 *    pointer to buffer containing record.
 */
char *get_record(infp, rec_mark, rec_read, code_tab)
FILE *infp;
char *rec_mark;
int *rec_read;
struct code_table *code_tab;
{
register char *cp, *lp, *op;
register int c, lastc;
char *recbuf, *nrec;
int count, get_more;
unsigned lin_len;

new_record:
				/* re-init record space */
zero( recspace, RECSIZE );
lp = cp = recbuf = recspace;
rsize = RECSIZE;
count = 0;
c = NUL;
lastc = '\n';
get_more = TRUE;

for (;;)
    {
				/* while there is room in the buffer,
				 * get a character that is neither EOF
				 * nor the record marker */
				/* if it is a FIELDTERM, check for rec mark */
    while ((count++ < rsize) && get_more )
	{
	if ( ( ((c = rgetc(infp)) == FIELDTERM) && (lastc == '\n')) || 
                (c == EOF))
	    {
	    *cp++ = '\0';
	    lp = cp = delete_null( op = lp, &count, code_tab );
	    if ((op == lp) && (*lp == '!'))
		{			/* do not load this record */
                if ((c = eor_chk(c, infp, rec_mark)) != EOR)
		    consume( infp, rec_mark);
		*cp = EOR;	/* free record; may be large already */
		free_record(recbuf);
                (*rec_read)++;		/* have read another record */
		goto new_record;
		}
            if (c == EOF)
		get_more = FALSE;
            else if (!eor_just_found && (c = eor_chk(c,infp,rec_mark)) == EOR)
                get_more = FALSE;
            else
                eor_just_found = FALSE;
	    }
	else if (c == rec_comment_char)
            {					/* eliminate comments */
	    while ( ((c = rgetc(infp)) != EOF) && (c != '\n') )
		;
	    if (c == '\n')
		goto storeit;
	    --count;
	    eof_just_found = TRUE;
	    }
	else
	    {
storeit:    lastc = c;
		*cp++ = c;
	    eof_just_found = FALSE;
	    }
	}
    if (c == EOR)
	{		/* successful termination */
	*cp = c;
        (*rec_read)++;		/* have read another record */
	return(recbuf);
	}
    else if (c == EOF)
	{
        if (!eof_just_found)
           {			/* process the last record */
  	   *cp = EOR;
           (*rec_read)++;		/* have read another record */
           eof_just_found = TRUE;
	   return(recbuf);
           }
        else
           {		/* flush record */
  	   free_record(recbuf);
	   return(NULL);
           }
	}
    else
	{		/* record is too big, get more space */
	nrec = myalloc( (unsigned) rsize+RECSIZE );
	lin_len = (unsigned) (cp - lp);
	cp = copy( nrec, recbuf, rsize );
	lp = cp - lin_len;
	if (recbuf != recspace)
	    myfree( recbuf );
	recbuf = nrec;
	rsize += RECSIZE;
	--count;
	}
    }

} /* end get_record */
