#include <stdio.h>
#include <ctype.h>

/* don't make anything static if we're debugging... */
#if !defined(STATIC)
#if defined(DEBUG)
#define STATIC
#else
#define STATIC	static
#endif
#endif

#include "char-types.h"
#include "lisp.h"
#include "y.tab.h"

extern char *xmalloc();

STATIC int classify_token();
STATIC void errmsg();
STATIC int input();
STATIC void insert_char();
STATIC int read_string();
STATIC int read_token();
STATIC void uninput();


int current_line = 1;
int current_column = 0;

/* main must initialize before calling yyparse */
extern FILE *input_stream;

/*
 * as we read each character,
 * we insert it into the char_buffer array
 * and its attributes into the char_attr array
 *
 * if the buffer fills up, we malloc a larger buffer,
 * copy the current buffer into the new one, and then
 * free the old buffer...
 */
STATIC char *char_buffer = NULL;
STATIC int *char_attr = NULL;
STATIC int buf_size = 0;
STATIC int buf_ndx = 0;

/*
 * We return the token ERROR if we detect an error
 * in the input stream...
 */
int
yylex()
{
    int cur_char;
    int ret_code;
    lispobj *obj;
    int base = 10;

    if (Qsexp_read)
	return (0);		/* pseudo EOF */

 get_next_char:
    cur_char = input();

    if ((cur_char != EOF) && !isascii(cur_char)) {
	errmsg("non-ascii char in input file", cur_char);
	return (ERROR);
    }

    switch (char_types[cur_char + 1].type) {
    case T_EOF:
	return (0);		/* FIXME -- eof indicator */

    case T_ILLEGAL:
	errmsg("Illegal char in input file", cur_char);
	return (ERROR);

    case T_WHITESPACE:
	goto get_next_char;

    case T_MACRO_TERM:
    case T_MACRO_NONTERM:
	switch (cur_char) {
	case '"':		/* read string */
	    /*
	     * we're reading a string
	     * all characters are themselves
	     * read until we come across an unescaped "
	     */
	    if (read_string() == READ_ERROR)
		return (ERROR);

	    obj = (lispobj *)xmalloc(sizeof (lispobj));
	    obj->lo_type = LispObj_String;
	    obj->lo_string = xmalloc(buf_ndx + 1);
	    bcopy(char_buffer, obj->lo_string, buf_ndx);
	    obj->lo_symbol[buf_ndx] = '\0';
	    yylval.bu_lispobj = obj;
	    return (STRING);

#if defined(DO_SHARP)
	case '#':		/* sharp sign */
	    switch (cur_char = input()) {
#if 0
	    case '\'':		/* (function ...)  */
		return (SHARP_QUOTE);
#endif

	    case '|':		/* balanced comment */
		/*
		 * read balanced comment
		 * remembering that they nest and
		 * that escapes are still honored
		 */
		return (ERROR);	/* for now... */

#if defined(RECURSIVE)
	    case '+':		/* read-time conditional */
	    case '-':		/* read-time conditional */
		/*
		 * these require a recursive parser;
		 * one is available with bison:
		 * plan of attack:
		 * (0) specify %pure_parser when building bison parser
		 * (1) call yyparse to read the next s-exp
		 * (2) determine truth value of the s-exp read in (1)
		 * (3) if (true and -) or (false and +) swallow next
		 * s-exp by calling yyparse again
		 * (4) goto get_next_char to do the parsing of the
		 * following token
		 * (5) argument passing is different
		 */
		if (yyparse())
		    return (ERROR); /* yyparse detected an error */

		value = Feval_feature_sexp(Qsexp_read);
		Ffree(Qsexp_read);

		if (((cur_char == '-') && (value != Qnil)) ||
		    ((cur_char == '+') && (value == Qnil))) {
		    /* We should swallow one more s-exp... */
		    if (yyparse())
			/* Query: should be ignore errors?  If so, then how? */
			return (ERROR);
		    /*
		     * Query: should we free it in the error case (above)?
		     *
		     * Or, should we let the user do it?  It's probably
		     * useful to the user in trying to track down just
		     * where the error is in the input file...
		     */
		    Ffree(Qsexp_read);
		}

		goto get_next_char;
#endif /* RECURSIVE */

	    case 'b':		/* binary */
	    case 'B':		/* binary */
	    case 'o':		/* octal */
	    case 'O':		/* octal */
	    case 'x':		/* hexadecimal */
	    case 'X':		/* hexadecimal */
		/* set the number base (used by classify_token) */
		switch (cur_char) {
		case 'b':	/* binary */
		case 'B':	/* binary */
		    base = 2;
		    break;

		case 'o':	/* octal */
		case 'O':	/* octal */
		    base = 8;
		    break;

		case 'x':	/* hexadecimal */
		case 'X':	/* hexadecimal */
		    base = 16;
		    break;
		}

		if ((cur_char = input()) == EOF)
		    return (ERROR);

		if (char_types[cur_char + 1].type != T_CONSTITUENT) {
		    errmsg("illegal char type at start of number", cur_char);
		    return (ERROR);
		}
		if (!(char_types[cur_char + 1].attributes & A_DIGIT) &&
		    !(char_types[cur_char + 1].attributes & A_SIGN)) {
		    errmsg("non-digit/sign char at start of number", cur_char);
		    return (ERROR);
		}
		ret_code = read_token(cur_char,
				      char_types[cur_char + 1].attributes,
				      NO_MULTI_ESCAPE);

		return (verify_number(base));
#if 0
		errmsg("Only decimal numbers are supported", cur_char);
		return (ERROR);
#endif
	    case EOF:		/* end of file */
		errmsg("EOF encountered immediately after # escape", cur_char);
		return (ERROR);

	    default:
		errmsg("unhandled # escape encountered\n", cur_char);
		return (ERROR);
	    }
	    break;
#endif /* DO_SHARP */

	case '\'':		/* single quote */
	    return ('\'');

	case '(':		/* open paren */
	    return ('(');

	case ')':		/* close paren */
	    return (')');

	case ',':		/* comma */
	    errmsg("input file contains a comma", cur_char);
	    return (ERROR);

	case ';':		/* semicolon, comment -- until end of line */
	    /*
	     * dispose of the rest of the line
	     * and then go back to top of loop
	     * for another character
	     */
	    while (((cur_char = input()) != '\n') &&
		   (cur_char != EOF))
		;
	    if (cur_char == EOF)
		return (0);	/* eof indicator */

	    goto get_next_char;

	case '`':		/* backqote */
	    errmsg("input file contains backquote", cur_char);
	    return (ERROR);

	default:
	    errmsg("unexpected macro character, aborting parse...\n",
		    cur_char);
	    return (ERROR);
	}
	break;
		    
    case T_SINGLE_ESCAPE:	/* presumed to be \ */
	/*
	 * add the character to the token buffer,
	 * marking it as alphabetic...
	 */
	if ((cur_char = input()) == EOF)
	    return (ERROR);

	ret_code = read_token(cur_char, A_ALPHABETIC, NO_MULTI_ESCAPE);
	break;

    case T_MULTIPLE_ESCAPE:	/* presumed to be | */
	/*
	 * add the characters to the token buffer,
	 * marking each one as alphabetic...
	 */
	ret_code = read_token(NO_CHAR, A_ALPHABETIC, MULTI_ESCAPE);
	break;

    case T_CONSTITUENT:
	/*
	 * add the character to the token buffer,
	 * marking it as whatever type it happens to be...
	 */

	/*
	 * converting to upper-case is a *LOSE*
	 * but, being incompatible with common-lisp
	 * on this issue is an even bigger lose!
	 * Why couldn't they be reasonable and make
	 * it case-sensitive?  or at least *lower*-case
	 * *sigh*
	 */
	if (islower(cur_char))
	    cur_char = toupper(cur_char);

	ret_code = read_token(cur_char, char_types[cur_char + 1].attributes,
			      NO_MULTI_ESCAPE);
	break;

    default:
	errmsg("illegal character type... aborting\n", cur_char);
	abort();
    }
    if (ret_code == READ_ERROR)
	return (ERROR);
    else
	return (classify_token());
}

/*
 * we pretend to handle four cases:
 * cur_char == `a character' or `no character'
 * combined with
 * multi_escape == `0' or `non-0'
 *
 * actually, we currently only handle two cases:
 * cur_char == `a character' with multi_escape == `0' and
 * cur_char == `no character' with multi_escape == `non-0'.
 *
 * we should extend this to handle the other two cases; or
 * modify the arguments so that it's clear we only handle
 * the two cases mentioned above
 *
 * we have 3 tasks:
 * (1) read a token
 * (2) classify it
 * (3) store its value into yylval and return success or failure
 */
STATIC int
read_token(cur_char, cur_attr, multi_escape)
    int cur_char;		/* what is the current char? */
    int cur_attr;		/* attributes of cur_char */
    int multi_escape;		/* boolean: are we in a multi-escape seq */
{
    buf_ndx = 0;

    if (cur_char != NO_CHAR) {
	/* we have a character, insert it into the array... */
	insert_char(cur_char, cur_attr);
    }

    /*
     * read one character at a time,
     * inserting them into the buffer as they are read
     */
    if (multi_escape) {
	/*
	 * an odd number of multiple escape characters
	 * have been encountered
	 */
    odd_escape_count:
	while ((cur_char = input()) != EOF) {
	    switch (char_types[cur_char + 1].type) {
	    case T_EOF:
	    case T_ILLEGAL:
		return (READ_ERROR);

	    case T_CONSTITUENT:
	    case T_WHITESPACE:
	    case T_MACRO_TERM:
	    case T_MACRO_NONTERM:
		insert_char(cur_char, A_ALPHABETIC);
		break;

	    case T_SINGLE_ESCAPE:
		if ((cur_char = input()) == EOF)
		    return (READ_ERROR);
		insert_char(cur_char, A_ALPHABETIC);
		break;

	    case T_MULTIPLE_ESCAPE:
		goto even_escape_count;
	    }
	}
	return (READ_ERROR);
    } else {
	/*
	 * an even number of multiple escape characters
	 * have been encountered
	 */
    even_escape_count:
	while ((cur_char = input()) != EOF) {
	    switch (char_types[cur_char + 1].type) {
	    case T_EOF:
	    case T_WHITESPACE:
		return (READ_SUCCESS);

	    case T_ILLEGAL:
		return (READ_ERROR);

	    case T_CONSTITUENT:
		/*
		 * converting to upper-case is a *LOSE*
		 * but, being incompatible with common-lisp
		 * on this issue is an even bigger lose!
		 * Why couldn't they be reasonable and make
		 * it case-sensitive?  or at least *lower*-case
		 * *sigh*
		 */
		if (islower(cur_char))
		    cur_char = toupper(cur_char);
		/* and now drop through...*/

	    case T_MACRO_NONTERM:
		insert_char(cur_char, char_types[cur_char + 1].attributes);
		break;

	    case T_MACRO_TERM:
		uninput(cur_char);
		return (READ_SUCCESS);

	    case T_SINGLE_ESCAPE:
		if ((cur_char = input()) == EOF)
		    return (READ_ERROR);

		insert_char(cur_char, A_ALPHABETIC);
		break;

	    case T_MULTIPLE_ESCAPE:
		goto odd_escape_count;
	    }
	}
	return (READ_SUCCESS);
    }
}

STATIC int
read_string()
{
    int cur_char;

    buf_ndx = 0;
 next_string_char:
    while (((cur_char = input()) != '"') &&
	   (cur_char != '\\') && (cur_char != EOF))
	insert_char(cur_char, A_ALPHABETIC);

    if (cur_char == '"') {
	/* we've got the string */
	return (READ_SUCCESS);
    } else if (cur_char == '\\') {
	if ((cur_char = input()) == EOF) {
	    errmsg("EOF encountered while reading a string\n", cur_char);
	    return (READ_ERROR);
	}
	insert_char(cur_char, A_ALPHABETIC);
	goto next_string_char;
    } else if (cur_char == EOF) {
	errmsg("EOF encountered while reading a string\n", cur_char);
	return (READ_ERROR);
    }
}

STATIC void
insert_char(cur_char, cur_attr)
    int cur_char;
    int cur_attr;
{
    /* first, is there room for the new character? */
    if (buf_ndx >= buf_size) {
	/* reallocate the buffer */
	int new_buf_size = buf_size ? 2 * buf_size : 1024;
	char *new_buffer = xmalloc(new_buf_size);
	int *new_attrs = (int *)xmalloc(new_buf_size * sizeof (int));

	if (buf_size) {
	    bcopy(char_buffer, new_buffer, buf_size);
	    bcopy(char_attr, new_attrs, buf_size * sizeof (int));
	    free(char_buffer);
	    free(char_attr);
	}
	char_buffer = new_buffer;
	char_attr = new_attrs;
	buf_size = new_buf_size;
    }

    char_buffer[buf_ndx] = cur_char;
    char_attr[buf_ndx] = cur_attr;
    buf_ndx++;
}

/*
 * attempt to classify the atom
 *
 * there are three possibilities:
 * string, symbol, number
 *
 * string is handled elsewhere (read_string)
 * so we only worry about symbol versus number
 */
STATIC int
classify_token()
{
    int ndx;
    lispobj *obj;

    /* 
     * is it a number?
     * (we only support base 10 numbers here -- other bases go through
     * verify_number)
     */
    for (ndx = 0; ndx < buf_ndx; ndx++) {
	if (!((char_attr[ndx] & A_DIGIT) && isdigit(char_buffer[ndx])) &&
	    !(char_attr[ndx] & A_SIGN))
	    break;
	if ((char_attr[ndx] & A_SIGN) && (ndx != 0))
	    return (ERROR);
    }


    if (ndx < buf_ndx) {	/* it has non-digits */
	if ((buf_ndx == 1) && (char_attr[0] & A_DOT))
	    return ('.');

	if ((buf_ndx == 3) && !strncmp(char_buffer, "NIL", 3)) {
	    obj = Qnil;		/* nil is weird -- both a symbol and a list */
	} else {
	    obj = (lispobj *)xmalloc(sizeof (lispobj));
	    obj->lo_type = LispObj_Symbol;
	    obj->lo_symbol = xmalloc(buf_ndx + 1);
	    bcopy(char_buffer, obj->lo_symbol, buf_ndx);
	    obj->lo_symbol[buf_ndx] = '\0';
	}
	yylval.bu_lispobj = obj;
	return (SYMBOL);	/* we broke out early, its a symbol */
    } else {			/* it's a (base 10) number!! */
	char_buffer[buf_ndx] = '\0';

	obj = (lispobj *)xmalloc(sizeof (lispobj));
	obj->lo_type = LispObj_Number;
	obj->lo_number = atoi(char_buffer);
	yylval.bu_lispobj = obj;

	return (NUMBER);
    }
}

#if defined(DO_SHARP)
/*
 * verify that the number in the token buffer is a valid number
 * in base `base'.  If it's not, return an error; if it's valid,
 * return number *AND* create a lisp object and store it
 */
STATIC int
verify_number(base)
    int base;			/* number base -- 2, 8, or 16 */
{
    int ndx;
    lispobj *obj;
    int sign = 1;
    int digit;
    int number;

    for (ndx = 0; ndx < buf_ndx; ndx++) {
	if (!(char_attr[ndx] & A_DIGIT) && !(char_attr[ndx] & A_SIGN)) {
	    errmsg("non-digit/sign char in number", char_buffer[ndx]);
	    return (ERROR);
	}
	if ((char_attr[ndx] & A_SIGN) && (ndx != 0)) {
	    errmsg("sign char not at start of number", char_buffer[ndx]);
	    return (ERROR);
	}
    }

    if (char_attr[0] & A_PLUS_SIGN)
	ndx = 1;
    else if (char_attr[0] & A_MINUS_SIGN) {
	sign = -1;
	ndx = 1;
    } else
	ndx = 0;

    number = 0;
    for (; ndx < buf_ndx; ndx++) {
	digit = char_buffer[ndx];

	if (isdigit(digit))
	    digit = digit - '0';
	else if (islower(digit))
	    digit = digit - 'a' + 10;
	else if (isupper(digit))
	    digit = digit - 'A' + 10;
	else {
	    errmsg("invalid char has digit attribute... aborting",
		   char_buffer[ndx]);
	    abort();
	}

	if (digit >= base) {
	    errmsg("number has illegal digit for current base",
		   char_buffer[ndx]);
	    return (ERROR);
	}
	number = number * base + digit;
    }
    obj = (lispobj *)xmalloc(sizeof (lispobj));
    obj->lo_type = LispObj_Number;
    obj->lo_number = sign * number;
    yylval.bu_lispobj = obj;

    return (NUMBER);
}
#endif /* DO_SHARP */

STATIC int
input()
{
    int cur_char = getc(input_stream);

    if (cur_char == '\n') {
	current_column = 0;
	current_line++;
    } else if (cur_char != EOF)
	current_column++;

    return (cur_char);
}

STATIC void
uninput(cur_char)
    int cur_char;
{
    ungetc(cur_char, input_stream);

    if (cur_char != '\n') {
	current_column--;
    } else {
	current_line--;
	current_column = -1;
    }
}

yyerror(string)
    char *string;
{
    fprintf(stderr, "%s, at or near line %d, col %d\n",
	    string, current_line, current_column);
}

STATIC void
errmsg(string, cur_char)
    char *string;
    int cur_char;
{
    fprintf(stderr, "%s, at or near line %d, col %d, char 0%o (octal)\n",
	    string, current_line, current_column, cur_char);
}
