/*
 *	lex.c
 */

# include	"kalypso.h"
# include	"lex.h"

long	classTable[256] = {
	IGNORE,		/* ^@ */
	IGNORE,		/* ^A */
	IGNORE,		/* ^B */
	IGNORE,		/* ^C */
	IGNORE,		/* ^D */
	IGNORE,		/* ^E */
	IGNORE,		/* ^F */
	IGNORE,		/* ^G */
	IGNORE,		/* ^H */
	WHITE,		/* ^I */
	WHITE,		/* ^J */
	WHITE,		/* ^K */
	WHITE,		/* ^L */
	WHITE,		/* ^M */
	IGNORE,		/* ^N */
	IGNORE,		/* ^O */
	IGNORE,		/* ^P */
	IGNORE,		/* ^Q */
	IGNORE,		/* ^R */
	IGNORE,		/* ^S */
	IGNORE,		/* ^T */
	IGNORE,		/* ^U */
	IGNORE,		/* ^V */
	IGNORE,		/* ^W */
	IGNORE,		/* ^X */
	IGNORE,		/* ^Y */
	IGNORE,		/* ^Z */
	IGNORE,		/* ^[ */
	IGNORE,		/* ^\ */
	IGNORE,		/* ^] */
	IGNORE,		/* ^^ */
	IGNORE,		/* ^_ */
	PRINTABLE|WHITE,	/*    */
 	PRINTABLE,		/* ! */
 	PRINTABLE|STRINGC,	/* " */
 	PRINTABLE|COMMENT,	/* # */
 	PRINTABLE,		/* $ */
 	PRINTABLE,		/* % */
 	PRINTABLE,		/* & */
 	PRINTABLE|QUOTEC,	/* ' */
 	PRINTABLE|BRA,		/* ( */
 	PRINTABLE|KET,		/* ) */
 	PRINTABLE,		/* * */
 	PRINTABLE|SIGN,		/* + */
 	PRINTABLE,		/* , */
 	PRINTABLE|SIGN,		/* - */
 	PRINTABLE|DOT,		/* . */
 	PRINTABLE,		/* / */
 	PRINTABLE|DIGIT,	/* 0 */
 	PRINTABLE|DIGIT,	/* 1 */
 	PRINTABLE|DIGIT,	/* 2 */
 	PRINTABLE|DIGIT,	/* 3 */
 	PRINTABLE|DIGIT,	/* 4 */
 	PRINTABLE|DIGIT,	/* 5 */
 	PRINTABLE|DIGIT,	/* 6 */
 	PRINTABLE|DIGIT,	/* 7 */
 	PRINTABLE|DIGIT,	/* 8 */
 	PRINTABLE|DIGIT,	/* 9 */
 	PRINTABLE,		/* : */
 	PRINTABLE|COMMENT,	/* ; */
 	PRINTABLE,		/* < */
 	PRINTABLE,		/* = */
 	PRINTABLE,		/* > */
 	PRINTABLE,		/* ? */
  	PRINTABLE,		/*  @ */
	PRINTABLE,		/*  A */
	PRINTABLE,		/*  B */
	PRINTABLE,		/*  C */
	PRINTABLE,		/*  D */
	PRINTABLE|EXP,		/*  E */
	PRINTABLE,		/*  F */
	PRINTABLE,		/*  G */
	PRINTABLE,		/*  H */
	PRINTABLE,		/*  I */
	PRINTABLE,		/*  J */
	PRINTABLE,		/*  K */
	PRINTABLE,		/*  L */
	PRINTABLE,		/*  M */
	PRINTABLE,		/*  N */
	PRINTABLE,		/*  O */
	PRINTABLE,		/*  P */
	PRINTABLE,		/*  Q */
	PRINTABLE,		/*  R */
	PRINTABLE,		/*  S */
	PRINTABLE,		/*  T */
	PRINTABLE,		/*  U */
	PRINTABLE,		/*  V */
	PRINTABLE,		/*  W */
	PRINTABLE,		/*  X */
	PRINTABLE,		/*  Y */
	PRINTABLE,		/*  Z */
	PRINTABLE|BRA,		/*  [ */
	PRINTABLE|BACKSLASH,	/*  \ */
	PRINTABLE|KET,		/*  ] */
	PRINTABLE,		/*  ^ */
	PRINTABLE,		/*  _ */
  	PRINTABLE,		/*  ` */
	PRINTABLE,		/*  a */
	PRINTABLE,		/*  b */
	PRINTABLE,		/*  c */
	PRINTABLE,		/*  d */
	PRINTABLE|EXP,		/*  e */
	PRINTABLE,		/*  f */
	PRINTABLE,		/*  g */
	PRINTABLE,		/*  h */
	PRINTABLE,		/*  i */
	PRINTABLE,		/*  j */
	PRINTABLE,		/*  k */
	PRINTABLE,		/*  l */
	PRINTABLE,		/*  m */
	PRINTABLE,		/*  n */
	PRINTABLE,		/*  o */
	PRINTABLE,		/*  p */
	PRINTABLE,		/*  q */
	PRINTABLE,		/*  r */
	PRINTABLE,		/*  s */
	PRINTABLE,		/*  t */
	PRINTABLE,		/*  u */
	PRINTABLE,		/*  v */
	PRINTABLE,		/*  w */
	PRINTABLE,		/*  x */
	PRINTABLE,		/*  y */
	PRINTABLE,		/*  z */
	PRINTABLE|BRA,		/*  { */
	PRINTABLE|VBAR,		/*  | */
	PRINTABLE|KET,		/*  } */
	PRINTABLE|TWIDDLE,	/*  ~ */
	IGNORE,			/*  ^? */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
	IGNORE,			/* \200 + */
};

static character	staticToken[64];
character		*lexToken = staticToken;
int			lexTokenSize = sizeof (staticToken);
int			lexTokenLen;

long	charClass;
extern char	*strcpy();
extern char	*malloc (), *realloc ();

static
adjustToken ()
{
	lexTokenSize *= 2;
	if (lexTokenSize < 0)
		return error ("lex: input token too large\n");
	if (lexToken == staticToken) {
		lexToken = (character *) malloc ((unsigned) lexTokenSize);
		bcopy ((char *) staticToken, (char *) lexToken, sizeof (staticToken));
	} else {
		lexToken = (character *) realloc ((char *) lexToken, (unsigned) lexTokenSize);
	}
	if (!lexToken) {
		lexToken = staticToken;
		lexTokenSize = sizeof (staticToken);
		error ("lex: input token too large--out of memory\n");
		return 0;
	}
	return 1;
}

#define checkToken	if (lexTokenLen > lexTokenSize - 3 && !adjustToken())\
				return nil;

#define addTokenC(c)	(lexToken[lexTokenLen++] = c)

#define addToken(c)	(special(c) ? (\
				addTokenC(STRQUOTE), \
				addTokenC(quoteChar(c)) \
			) : \
				addTokenC(c) \
			)
#define endToken()	addTokenC((character) '\0')

lex (f)
FILE		*f;
{
	int	c;

	lexTokenLen = 0;
	for (;;) {
		c = lexc (f);
		if (jumping)
			return nil;
		if (charClass & (BRA|KET)) {
			addToken (c);
			endToken ();
			switch (c) {
			case '(':
				return OP;
			case '{':
				return OC;
			case '[':
				return OS;
			case ')':
				return CP;
			case ']':
				return CS;
			case '}':
				return CC;
			}
		} else if (charClass & QUOTEC) {
			addToken (c);
			endToken ();
			return QUOTE;
		} else if (charClass & TWIDDLE) {
			extern char	*itoa ();
			c = lexc (f);
			if (jumping)
				return nil;
			(void) strcpy (lexToken, itoa (c, 10));
			lexTokenLen = strlen (lexToken);
			endToken ();
			return NUM;
		} else if (charClass & WHITE) {
			;
		} else if (charClass & COMMENT) {
			while ((c = iFgetchar (f)) != '\n') {
				if (jumping)
					return nil;
 				if (c == EOF) {
					endToken ();
					return END;
				}
			}
			;
		} else if (charClass & ENDOFFILE) {
			(void) strcpy (lexToken, "<end of file>");
			lexTokenLen = strlen (lexToken);
			endToken ();
			return END;
		} else if (charClass & VBAR) {
			for (;;) {
				c = lexc (f);
				if (jumping)
					return nil;
				if (charClass & (VBAR|ENDOFFILE)) {
					if (charClass & ENDOFFILE)
						clearerr (f);
					else {
						c = lexc (f);
						if (charClass & ENDOFFILE)
							clearerr (f);
						else
							unlexc (f, c);
						endToken ();
						return NAME;
					}
				} 
				checkToken
				addToken (c);
			}
		} else if (charClass & STRINGC) {
			for (;;) {
				c = lexc (f);
				if (jumping)
					return nil;
				if (charClass & (STRINGC|ENDOFFILE)) {
					if (charClass & ENDOFFILE)
						clearerr (f);
					endToken ();
					return STRING;
				}
				checkToken
				addToken (c);
			}
		} else if (charClass & PRINTABLE) {
			int	isnum;
			int	isfloat;
			int	hasdigit;

			isnum = 1;
			isfloat = 0;
			hasdigit = 0;
			for (;;) {
				if (!(charClass & FLOATC)) {
					isnum = 0;
				} else {
					if (lexTokenLen == 0 &&
 					    (charClass & NOTFIRSTINNUM))
					{
						isnum = 0;
					}
 					if (lexTokenLen != 0 &&
					    (charClass & FIRSTINNUM))
					{
						isnum = 0;
					}
 					if (charClass & FLOATONLY)
						isfloat = 1;
					if (charClass & DIGIT)
						hasdigit = 1;
				}
				checkToken
				addToken (c);
				c = lexc (f);
				if (jumping)
					return nil;
				if (charClass & (NOTNAME)) {
					if (charClass & ENDOFFILE)
						clearerr (f);
					unlexc (f, c);
					endToken ();
					if (isnum && hasdigit) {
						if (isfloat)
							return FLOAT;
						else
							return NUM;
					}
					return NAME;
				}
			}
		}
	}
}

static parseQuoted ();

lexc (f)
FILE		*f;
{
	int	c;

	do {
		c = iFgetchar (f);
		if (jumping)
			return nil;
		if (c == EOF) {
			charClass = ENDOFFILE;
			c = 0;
		} else {
			c &= 0177;
			charClass = classTable [c];
			if (charClass & BACKSLASH) {
				c = parseQuoted (f);
				if (c == EOF)
					charClass = ENDOFFILE;
				else
					charClass = PRINTABLE;
			}
		}
	} while (charClass & IGNORE);
	return c;
}

static
parseQuoted (f)
FILE		*f;
{
	int	c;
	int	v;
	int	count;

	c = iFgetchar(f);
	if (jumping)
		return nil;
	if (c == EOF)
		return EOF;
	c &= 0177;
 	switch (c) {
	case 'n':
		return '\n';
	case 'f':
		return '\f';
	case 'b':
		return '\b';
	case 'r':
		return '\r';
	case 'v':
		return '\v';
	case 't':
		return '\t';
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
		v = c - '0';
		count = 1;
		while (count <= 3) {
			c = iFgetchar(f);
			if (jumping)
				return nil;
			if (c == EOF)
				return EOF;
			c &= 0177;
			if (c < '0' || '7' < c) {
				(void) iFungetchar (f, c);
				break;
			}
			v = (v << 3) + c - '0';
			++count;
		}
		return v;
	default:
		return c;
	}
}

unlexc (f, c)
FILE		*f;
{
	(void) iFungetchar (f, c);
}
