/* DefaultFilter.c -- Copyright 1992 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* $Id: DefaultFilter.c,v 1.1 92/08/24 00:38:06 lee Exp $
 */

/* Filter for unidentified (e.g. ASCII) files
 *
 * Detects Shar and uuencoded matter.
 * See FilterMain and wordrules.h for more info.
 *
 */

#ifdef SYSV
 extern int _filbuf(), _flsbuf(); /* for lint! */
#endif

#include "globals.h"
#include "error.h"

#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
#include "wordrules.h"

#include "emalloc.h"

#ifndef STREQ
# define STREQ(boy, girl) ((*(boy) == *(girl)) && !strcmp(boy, girl))
#endif

/** C Library functions that need to be declared: **/
#ifndef tolower
 extern int tolower();
#endif
extern int strcmp();
/** Functions in this file that need to be declared **/
INLINE static int GetChar();
static char *GetLine();
static void PutLine();
#define PUTMODE_IGNORE	1
#define PUTMODE_PRINT	0

void Body();
/** **/

extern char *progname;
void Filter();

char *KeepThese[] = { /* these must be sorted on the first character */
    "From",
    "Keywords",
    "Organisation",
    "Organization",
    "Summary",
    "Subject",
    0
};

static INLINE int
FirstWord(Line, Word)
    char *Line;
    char *Word;
{
    int n = strlen(Word);

    if (strncmp(Line, Word, n) == 0) {
	return (Line[n] == '\0' || isspace(Line[n]) || ispunct(Line[n]));
    }
    return 0;
}

int
IsWanted(Line)
    char *Line;
{
    char **pp;
    int ch = Line[0];

    if (isupper(ch)) ch = tolower(ch);

    for (pp = KeepThese; *pp && **pp; pp++) {
	if (**pp > *Line) return 0; /* gone too far */
 	else if (FirstWord(Line, *pp)) return 1;
    }
    return 0;
}

void
Filter(InputFile, Name)
    FILE *InputFile;
    char *Name;
{
    Body(InputFile, Name);
}

int InWord = 0;

INLINE static int
GetChar(fd)
    FILE *fd;
{
    static int LastChar = 0;

    if (LastChar) {
	int ch = LastChar;
	LastChar = 0;
	return ch;
    }

    /* Only return a single quote if it is surrounded by letters */
    if ((LastChar = getc(fd)) == '\'') {
	LastChar = getc(fd);
	if (InWord && isalpha(LastChar)) return '\'';
	else return ' ';
    } else {
	int ch = LastChar;
	LastChar = 0;
	return ch;
    }
}

static char *
GetLine(InputFile)
    FILE *InputFile;
{
    static char *Line = NULL;
    static unsigned long Length = 0L;
    register char *p;
    int ch;

    if (!Length) {
	Length = 1024;
	Line = emalloc(Length);
    }
    p = Line;

    while ((ch = getc(InputFile)) != EOF) {
	if (p - Line + 1 >= Length) { /* +1 for trailing \0 */
	    int Where = p - Line;
	    Length += 128;
	    Line = erealloc(Line, Length);
	    p = &Line[Where];
	}
	*p = ch;
	if (ch == '\n') {
	    *++p = '\0';
	    return Line;
	}
	p++;
    }

    if (p == Line) {
	return (char *) NULL;
    }

    if (p - Line + 1 >= Length) { /* +1 for trailing \0 */
	int Where = p - Line;
	Length += 2;
	Line = erealloc(Line, Length);
	p = &Line[Where];
    }
    *p = '\0';
    return Line;
}

#define LS_NORMAL	00
#define LS_UUENCODE	01
#define LS_SHAR		02 /* can be combined with UUENCODE */

char SharCharacter = 0;

static void
PutLine(Line, Ignore)
    char *Line;
    int Ignore;
{
    register char *p;

    InWord = 0;

    switch (Ignore) {
    case PUTMODE_PRINT:
	Ignore = 0;
	break;
    case PUTMODE_IGNORE:
	Ignore = 1;
	break;
    default:
	Error(E_FATAL|E_BUG, "PutLine(\"%8.8s...\", %d not in {%d,%d})",
		Line, Ignore, PUTMODE_IGNORE, PUTMODE_PRINT
	);
    }

    for (p = Line; *p; p++) {
	if (*p == '\n') {
	    InWord = 0;
	    putchar(*p);
	} else if (InWord) {
	    if (WithinWord(*p)) {
		if (WithinWord(p[1])) {
		    putchar(Ignore ? 'x' : *p);
		} else if (EndsWord(*p)) {
		    putchar(Ignore ? 'x' : *p);
		    InWord = 0;
		} else {
		    putchar(' ');
		}
	    } else {
		putchar(' ');
		InWord = 0;
	    }
	} else {
	    if (StartsWord(*p)) {
		InWord = 1;
		putchar(Ignore ? 'q' : *p);
	    } else if (isdigit(*p)) {
		putchar('q');
		while (*++p  && !isspace(*p)) {
		    putchar('x');
		}
		--p; /* gone too far */
	    } else {
		putchar(' ');
	    }
	}
    }
}

void
Body(InputFile, Name)
    FILE *InputFile;
    char *Name;
{
    register char *p;
    char *Line;
    int LineState = 0;
    int CheckForShar = 0; /* seen a line starting w/ "-" "#!" or ":" recently */
    char *EOFStr = 0;

    while ((p = Line = GetLine(InputFile)) != (char *) NULL) {

	/* When material is quoted with > or |, ignore it */
	if (!SharCharacter && (*p == '>' || *p == '|')) {
	    while (*p == '>' || *p == '|' || isspace(*p)) {
		putchar(' ');
		Line = ++p;
	    }
	}

	if (CheckForShar) {
	    ++CheckForShar;
	    if (FirstWord(Line, "sed") || FirstWord(Line, "cat")) {
		register char *q;

		for (q = Line; *q; q++) {
		    if (*q == '<' && q[1] == '<') {
			int HasQuotes = 0;

			LineState |= LS_SHAR;
			CheckForShar = 0;
			q++; q++; /* skip the << */
			while (isspace(*q)) q++;

			/* cat > file << 'word', but there are several
			 * alternate forms.  We ignore <<- because it's not
			 * portable enough for a shar.
			 * A \ can be used instead of a quote, but in this
			 * case there can be no space in the word.
			 */

			if (*q == '\'' || *q == '"') {
			    HasQuotes = (*q);
			    q++;
			} else if (*q == '\\') {
			    q++;
			}
			p = q;
			while (*q && *q != '\n') {
			    if (!HasQuotes && isspace(*q)) break;
			    else if (*q == HasQuotes) break;
			    q++;
			}
			EOFStr = emalloc(q - p + 1);
			(void) strncpy(EOFStr, p, q - p);
			EOFStr[q - p] = '\0';

			/* determine the shar character, usually an X;
			 * we look for something like "s/^X//"
			 */
			for (q = Line; *q; q++) {
			    if (*q == 's' &&
					(q[1] == '/' || ispunct(q[1])) &&
					q[2] == '^' &&
					q[3] && !isspace(q[3]) &&
					q[4] == q[1] &&
					q[5] == q[1]) {
				SharCharacter = q[3];
				q[3] = ' '; /* don't index it! */
				break;
			    }
			} /* for */
			break;
		    } /* if << */
		} /* for q = Line... */
	    } /* if FirstWord is sed or cat */

	    if (CheckForShar > 30) {
		/* No << on the line, so not the start of a shar */
		CheckForShar = 0;
	    }
	}
	/* end of check for shar */

	p = Line;

	if (LineState & LS_SHAR) {
	    if (EOFStr) {
		int n = strlen(EOFStr);
		if (strncmp(Line, EOFStr, n) == 0) {
		    if (!Line[n] || Line[n] == '\n') {
			LineState &= ~LS_SHAR;
			CheckForShar = 1;
			efree(EOFStr);
			EOFStr = (char *) NULL;
			SharCharacter = 0;
		    }
		}
	    }
	    if (SharCharacter && *p == SharCharacter) {
		*p = ' ';
		putchar(' ');
		Line = ++p;
	    }
	} else {
	    if (Line[0] == ':' || Line[0] == '#' ||
			(Line[0] == '-' && Line[2] == '-' && Line[3] == '-')) {
		/* check for --- rather than "--" as .signature starts
		 * with "-- ", except people who add a signature by hand
		 * might forget the space.
		 */
		CheckForShar = 1;
	    }
	}

	if (LineState & LS_UUENCODE) { /* check for "end" and index that */
	    if (*p == 'e' && p[1] == 'n' && p[2] == 'd' &&
		    (!p[3] || isspace(p[3]))) {
		LineState &= ~LS_UUENCODE;
		/* fall through */
	    }
	}
	
	/* Now we've determined whether we're in a shar or not,
	 * and also whether we are in uuencoded drivel.
	 * Furthermore, if we are in a shar, we have determined that
	 * the current line is not the last of the current file within the
	 * shar archive, and have removed the first character if appropriate.
	 */

	if (LineState & LS_UUENCODE) {
	    PutLine(Line, PUTMODE_IGNORE);
	} else {
	    /* look for "begin mode filename" */
	    register char *q;

	    /* first, print the line */
	    PutLine(Line, PUTMODE_PRINT);

	    /* now, look for the start of uuencoded material */
	    if (FirstWord(Line, "begin")) {
		q = &Line[5]; /* skip over the "begin" */
		if (isspace(*q)) {
		    q++;
		    if (isdigit(*q)) {
			while (isdigit(*q) && *q != '8' && *q != '9') {
			    q++;
			}
			if (*q == ' ' && *++q) {
			    /* found it! */
			    LineState |= LS_UUENCODE;
			}
		    }
		}
	    }
	} /* else !LS_UUENCODE */
    } /* while GetLine */

    return;
}
