/* readfile.c -- Copyright 1992 Liam R. E. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* $Id: readfile.c,v 1.6 92/07/05 12:50:06 lee Exp $
 * readfile - read files or lines into memory.
 */

#include <stdio.h>
#include <ctype.h>

#include "globals.h"
#include "emalloc.h"

extern int AsciiTrace;

#include <stdio.h>
#include <ctype.h>	/* isspace() etc */
#include <malloc.h>	/* This declares malloc(), realloc() etc... */

#include "globals.h"
#include "error.h"
#include "emalloc.h"

#include "readfile.h"

/** File Reading Routines:
 **
 **  long ReadFile(int Severity; char *Name, *What; char ***Lines; int Flags)
 **	Reads lines from the named file (Name) and puts them in an array
 **	that it allocates with malloc().  Returns a pointer to the
 **	start of the array of pointers to lines (in *Lines), and
 **	the number of lines read.  If the file can't be opened, or it runs
 **	out of memory, ReadFile() calls Error() with the given Severity,
 **	and with an error message constructed out of What, which should
 **	be a short (e.g. 3-word) description of the purpose of the file.
 **	The Flags can currently include
 **		UF_IGNBLANKS to throw away blank lines,
 **		UF_IGNSPACES to discard leading and trailing spaces,
 **		UF_IGNHASH   to discard leading comments (# with a hash-sign)
 **		UF_IGNALLHASH   to discard comments (# with a hash-sign)
 **		UF_ESCAPEOK  to accept \# and \\ as # and \
 **
 **	util.h defines UF_NORMAL as all four of the above.
 **
 **
 **	The variant
 **  long fReadFile(int S; FILE *f; char *Name, *What; char ***Lines; int Flags)
 **	is identical except that it takes an already-opened FILE pointer and
 **	reads up to EOF.
 **  
 ** int fReadLine(FILE *f, char **Linep; int Flags)
 **	This reads a single line into memory, making room with malloc()
 **	and setting *Linep to point to the new storage.  It returns -1
 **	when it detects the end of input (EOF).
 **	If malloc() or realloc returns 0 (indicating that no more memory
 **	is available), *Linep is set to 0 and the number of characters
 **	read is returned.
 **	The Flags argument is the same as for ReadFile:
 **		UF_IGNBLANKS to throw away blank lines,
 **		UF_IGNSPACES to discard leading and trailing spaces,
 **		UF_IGNHASH   to discard leading comments (# with a hash-sign)
 **		UF_IGNALLHASH   to discard comments (# with a hash-sign)
 **		UF_ESCAPEOK  to accept \# and \\ as # and \
 **
 **/

long
ReadFile(Severity, Name, What, Lines, Flags)
    int Severity;	/* to pass to Error() if necessary */
    char *Name;		/* the filename to read */
    char *What;
    char ***Lines;
    int Flags;
{
    FILE *f;
    long NLines;
    extern FILE *fEopen();

    f = fEopen(Severity, Name, What, "r");

    if (f == (FILE *) 0) {
	if (Lines) {
	    *Lines = (char **) 0;
	}
	return -1L;
    }

    NLines = fReadFile(Severity, f, Name, What, Lines, Flags);

    if (fclose(f) == EOF) {
	Error(E_WARN|E_SYS, "Couldn't close \"%s\" (%s)", Name, What);
	/* This is a little improbable -- and most likely an NFS problem,
	 * but the message could save a lot of frustration.  Perhaps I
	 * should have an fEclose()...
	 */
    }

    return NLines;
}

long
fReadFile(Severity, f, Name, What, Lines, Flags)
    int Severity;	/* to pass to Error() if necessary */
    FILE *f;
    char *Name;		/* the filename to read */
    char *What;
    char ***Lines;
    int Flags;
{
    /* This is where the real work is done... */

    long NumberOfLines = 0L;
    long LinesAllocated = 0L;
    char *Linep;

    LinesAllocated = 20;
    *Lines = (char **) malloc(sizeof(char *) * LinesAllocated);

    while (fReadLine(f, &Linep, Flags) >= 0) {
	if (NumberOfLines >= LinesAllocated) {
	    LinesAllocated += 50;
	    *Lines = (char **) realloc((char *) *Lines,
				(unsigned) (LinesAllocated * sizeof(char *)));
	    if (!*Lines) {
		Error(Severity|E_MEMORY, "Lines from \"%s\" (%s)", Name, What);
		return -1;
	    }
	}

	(*Lines)[NumberOfLines] = Linep;
	++NumberOfLines;
    }

    return NumberOfLines;
}

int
fReadLine(f, Linep, Flags)
    FILE *f;
    char **Linep;
    int Flags;
{
    register char *p;
    static int LinesRead = 0;
    unsigned int LineLength = 0;
    unsigned long TotalSoFar = 0L;
    int c;

    /* NOTE: we mantain a running average of linelengths, and use
     * this as a heuristic for the initial buffer size.  At the end,
     * we realloc() if we over-estimated by more than one byte.
     */

TryAgain:

    *Linep = 0;
    p = (*Linep);

    while ((c = getc(f)) != EOF && c != '\n') {
	/* Look for things to skip before allocating any space: */

	if (c == '\\' && (Flags & UF_ESCAPEOK)) {
	    if ((c = getc(f)) == EOF) break;
	    else if (c == '\n') continue; /* join lines together */
	} else if (c == '#' && ((Flags & UF_IGNALLHASH) ||
				((Flags & UF_IGNHASH) && p == (*Linep)))) {
	    while ((c = getc(f)) != EOF && c != '\n') {
		/*NULLBODY*/ ;
	    }
	    if (c == EOF) {
		break;
	    } else {
		(void) ungetc(c, f);
		continue;
	    }
	} else if (p == (*Linep) && isspace(c) && (Flags & UF_IGNSPACES)) {
	    continue; /* ignore leading blanks */
	}

	/* Now ensure that we have allocated enpough space: */
	if (!p || p - (*Linep) >= LineLength - 1) {
	    /* the -1 is to leave room for a trailing \0 */

	    int WhereWeWere = p - (*Linep);

	    if (LineLength == 0) {
		LineLength = TotalSoFar / (LinesRead ? LinesRead : 1);
		if (LineLength < 2) {
		    LineLength = 10;
			/* it seems silly allocating less than this... */
		    LinesRead = 0;
		}
		*Linep = malloc(LineLength);
	    } else {
		if (LineLength < 20) LineLength += 10;
		else LineLength += (LineLength / 2);

		*Linep = realloc(*Linep, LineLength);
	    }

	    if (!*Linep) return WhereWeWere + 1;

	    p = (*Linep);
	    p = &p[WhereWeWere];
	}
	*p++ = c;
	*p = '\0';  /* p a r a n o i a */
    }

    if (!p) {
	if (c == EOF) {
	    return -1;
	} else if (Flags & UF_IGNBLANKS) {
	    if (*Linep) (void) free(*Linep);
	    *Linep = (char *) 0;
	    goto TryAgain;
	} else {
	    *Linep = (char *) 0;
	    return -1;
	}
    }

    *p = '\0';

    /* Discard a trailing \r, as these are usually the result of
     * file transfers (or using script(1) on 4.2 BSD Unix) and are
     * unwanted.
     * I hope.  (is this a bogus mis-feature?  I don't think so, but then,
     * I wrote it...)
     */
    if (p > *Linep && p[-1] == '\r') {
	*--p = '\0';
    }

    if (Flags & UF_IGNSPACES) {
	/* delete trailing spaces, being careful to keep "\ " if requested */
	while (p > *Linep && isspace(p[-1])) {
	    if (&p[-1] > *Linep && p[-2] == '\\' && (Flags && UF_ESCAPEOK)) {
		break; /* This is the case where we must keep "\ " */
	    }
	    *--p = '\0';
	}
    }

    TotalSoFar += p - *Linep;
    ++LinesRead;
	/* Note: we include blank lines in the average */

    if (p == *Linep) {
	if (c == EOF) {
	    return -1;
	} else if (Flags & UF_IGNBLANKS) {
	    (void) free(*Linep);
	    *Linep = (char *) 0;	/* p a r a n o i a */
	    goto TryAgain;
	 }
    }

    /* ASSERT: p > *Linep */

    if (p - *Linep < LineLength - 2) {
	/* The +2 in the test above is one for a \0 and one extra to reduce
	 * the number of relloc() calls -- an extra byte per line is no
	 * disaster, since we discard the \n anyway.
	 */
	LineLength = p - *Linep;
	    /* ASSERT: LineLength != 0 */
	if ((*Linep = realloc(*Linep, LineLength + 1)) == (char *) 0) {
	    return LineLength;
	}

	p = (*Linep) + LineLength;

    }
    return p - *Linep;
}
