/*- -*- Mode: C++ -*-							 -*/
/*- Copyright (C) 1992 Institute for New Generation Computer Technology. -*/
/*- $BG[IU$=$NB>$O(B COPYRIGHT $B%U%!%$%k$r;2>H$7$F$/$@$5$$!%(B                  -*/
/*- (Read COPYRIGHT for detailed information.)                           -*/
/*-                                                                      -*/
/*-		    Author: Shinji Yanagida (yanagida@nsis.cl.nec.co.jp) -*/
/*-		    Author: Toshio Tange (t-tange@nsis.cl.nec.co.jp)	 -*/

// lexical analysis program

#include <stdio.h>
#include <stream.h>
#include <ctype.h>
#include "info.h"
#include "rgline.h"
#include "parse.h"
#include "lyerror.h"
#include "loader/mem.h"

extern int pseudo_instruction (char *pseudo);
extern int genreg_to_int (char *regs);
extern int reg_to_int (char *regs);
extern int instruction (const char *insts, int *type);
extern void lex_error (int errno, int line, const char *msg);
extern istream *ain;

extern	"C" {
    int	    strlen (const char *s);
    int	    strcmp (const char *s1, const char *s2);
    char   *strcpy (char *s1, const char *s2);
    double  atof (const char *s);
}
// KI KO
#define SI	('N'&0x1f)
#define SO	('O'&0x1f)
#define EC	'\033'

#define iskifirst(p)  ((p) == '$')
#define iskilast(p)   ((p) == 'B' || (p) == '@')
#define iskofirst(p)  ((p) == '(')
#define iskolast(p)   ((p) == 'B' || (p) == 'J' || (p)	== 'H')
//)

#define UPPER_CASE \
     'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':\
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':\
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':\
case 'V': case 'W': case 'X': case 'Y': case 'Z'

#define LOWER_CASE \
     'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':\
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':\
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':\
case 'v': case 'w': case 'x': case 'y': case 'z'

#define DIGIT_1_9 \
     '1': case '2': case '3': case '4': case '5': case '6': case '7':\
case '8': case '9'

// character

static const char Colon = ':';
static const char DQuote = '\"';// double quote
static const char Minus = '-';
static const char NewLine = '\n';
static const char Period = '.';
static const char Plus = '+';
static const char Slash = '/';
static const char SQuote = '\'';// single quote
static const char Star = '*';
static const char DBLMKL = 'd'; // double float
static const char DBLMKU = 'D';
static const char EXPMKL = 'e'; // exponent marker
static const char EXPMKU = 'E';
static const char FLTMKL = 'f'; // single float
static const char FLTMKU = 'F';
static const char HEXMKL = 'x'; // hexadecimal
static const char HEXMKU = 'X';

#define Is_true(t) (   (t)[0] == 't' \
		    && (t)[1] == 'r' \
		    && (t)[2] == 'u' \
		    && (t)[3] == 'e' \
		    && (t)[4] == '\0' )

#define Is_false(t) (  (t)[0] == 'f' \
		    && (t)[1] == 'a' \
		    && (t)[2] == 'l' \
		    && (t)[3] == 's' \
		    && (t)[4] == 'e' \
		    && (t)[5] == '\0' )

// buffer
static char *text;

// line counter
int	line;

// ʸѴؿ

static inline int
ctoi (char ascii)
// ascii:	ʸ
//
// ʸѴ롣ʸ [0-9a-fA-F] Ǥʤϡ0 ֤
{
    if (ascii >= '0' && ascii <= '9')
	return ascii - '0';
    else if (ascii >= 'a' && ascii <= 'f')
	return ascii - 'a' + 10;
    else if (ascii >= 'A' && ascii <= 'F')
	return ascii - 'A' + 10;
    else
	return 0;
}

int
str_to_int (char *text, int sign, int radix)
// text:	ʸ
// sign:	(λ 1, λ -1)
// radix:	
//
//  radix  sign Ȥơʸ s Ѵ롥
{
    register int res = 0;
    register char *s = text;

    while (*s != '\0')
	res = (res * radix) + ctoi (*s++);
    return res * sign;
}

static float
str_to_f (char *s)
// s:	ʸ
//
// ʸ s ñưѴ롥
{
    return (float) atof (s);
}

static double
str_to_d (char *s)
// s:	ʸ
//
// ʸ s ưѴ롥
{
    return (double) atof (s);
}

// ʸ¤Ӥؿ

static char *
collect_alnum (char *text)
// s:	ʸ
//
// ʸӲ¤ӤޤȤ롥
{
    char    c;
    register char *s = text;
    register istream *in = ain;

    while (in->get (c)) {
	if (isalnum (c) || c == '_')
	    *s++ = c;
	else
	    break;
    }
    *s = '\0';
    in->putback (c);
    return s;
}

inline int
isodigit (char c)
// ʿʸʤ 1֤Ǥʤ 0֤
{
    return (c >= '0' && c <= '7');
}

inline int
ishdigit (char c)
// 16ʿʸʤ 1֤Ǥʤ 0֤
{
    return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')
	    || (c >= 'A' && c <= 'F'));
}

static char *
collect_digit (char *s, int radix)
// s:	ʸ
// radix:	
//
// ɽ롢¤ӤޤȤ롥
{
    char    c;
    if (radix == 10) {		// ʿ
	while (ain->get (c)) {
	    if (isdigit (c))
		*s++ = c;
	    else
		break;
	}
    }
    else if (radix == 8) {	// ʿ
	while (ain->get (c)) {
	    if (isodigit (c))
		*s++ = c;
	    else
		break;
	}
    }
    else if (radix == 16) {	// ʿ
	while (ain->get (c)) {
	    if (ishdigit (c))
		*s++ = c;
	    else
		break;
	}
    }
    *s = '\0';
    ain->putback (c);
    return s;
}

static char
collect_hexchar ()
{
    char    buf[4];

    for (int i = 0; i < 2; i++) {
	ain->get (buf[i]);
	if (!ishdigit (buf[i])) {
	    buf[i+1] = '\0';
	    lex_error (EL_XDIG, line, buf);
	    return '\0';
	}
    }
    buf[i] = '\0';
    int c = str_to_int (buf, 1, 16);
    if (c == '\0') {
	lex_error (EL_DIG0, line, buf);
	return '\0';
    }
    return (char) c;
}

static char
collect_octchar (char c)
{
    char    buf[4];
    buf[0] = c;
    for (int i = 1; i < 3; i++) {
	ain->get (buf[i]);
	if (!isodigit (buf[i])) {
	    break;
	}
    }
    buf[i] = '\0';
    int o = str_to_int (buf, 1, 8);
    if (o == '\0') {
	lex_error (EL_DIG0, line, buf);
	return '\0';
    }
    return (char) o;
}

typedef enum {
    NOKANJI,
    ZENKAKU,
    HANKAKU,
    ESC,
    KIF,
    KOF
}	CodeType;

static char *
collect_quoted (char *s, char q)
// s:	ʸ
// q:	('\''ޤ'\"')
//
// ǰϤޤ줿¤ӤޤȤ롥ʲΥץ󥹤
// ǧ// '\n', '\t', '\\',	 '\'', '\"', '\ooo', '\xhhh'
{
    char    c;
    CodeType type = NOKANJI;
    CodeType before = NOKANJI;
    ain->get (c);
loop:
    switch (c) {
    case EC:			// Escape Code
	*s++ = c;
	before = type;
	type = ESC;
	break;
    case SI:
	if (type != NOKANJI)
	    goto err_l;
	*s++ = c;
	type = HANKAKU;
	break;
    case SO:
	if (type != HANKAKU)
	    goto err_l;
	*s++ = c;
	type = NOKANJI;
	break;
    default:
	switch (type) {
	case ESC:
	    *s++ = c;
	    if (iskifirst (c))
		type = KIF;
	    else if (iskofirst (c))
		type = KOF;
	    else
		type = before;
	    break;
	case KIF:
	    *s++ = c;
	    if (iskilast (c))
		type = ZENKAKU;
	    break;
	case KOF:
	    *s++ = c;
	    if (iskolast (c))
		type = NOKANJI;
	    break;
	case NOKANJI:
	    if (c == '\\') {
		ain->get (c);
		switch (c) {
		case 'n':
		    *s++ = '\n';
		    break;
		case 't':
		    *s++ = '\t';
		    break;
		case '\\':
		    *s++ = '\\';
		    break;
		case '\'':
		    *s++ = '\'';
		    break;
		case 'x':
		    c = collect_hexchar ();
		    if (c == '\0')
			goto err_l;
		    *s++ = c;
		    break;
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		    c = collect_octchar (c);
		    if (c == '\0')
			goto err_l;
		    *s++ = c;
		    break;
		default:
		    *s++ = c;
		}
	    }
	    else if (c == q) {
		ain->get (c);
		if (c != q) {
		    *s = '\0';
		    ain->putback (c);
		    return s;
		}
		else {
		    *s++ = c;
		}
	    }
	    else {
		*s++ = c;
	    }
	    break;
	case ZENKAKU:
	    *s++ = c;
	    ain->get (c);
	    if ((c >= 0x7F) || (c <= ' '))
		goto err_l;
	    *s++ = c;
	    break;
	case HANKAKU:
	    *s++ = c;
	    break;
	default:
	    goto err_l;
	    break;
	}
	break;
    }
    ain->get (c);
    goto loop;
err_l:
    while (c != q && c != ';' && c != '\n') {
	ain->get (c);
    }
    *s = '\0';
    return s;
}

static char *
collect_float (char *s)
// s:	ʸ
//
// ưʸ¤ӤޤȤ롥
{
    char    c;
    ain->get (c);
    if (c == Minus) {		// -

	*s++ = c;
	ain->get (c);
    }
    while (isdigit (c)) {	// 123

	*s++ = c;
	ain->get (c);
    }
    if (c == Period) {		// 123.

	*s++ = c;
	ain->get (c);
	while (isdigit (c)) {	// 123.456

	    *s++ = c;
	    ain->get (c);
	}
	if (c == EXPMKL || c == EXPMKU) {	// 123.456e

	    *s++ = EXPMKL;
	    ain->get (c);
	    if (c == Minus || c == Plus) {	// 123.456e+

		*s++ = c;
		ain->get (c);
	    }
	    while (isdigit (c)) {	// 123.456e+78

		*s++ = c;
		ain->get (c);
	    }
	}
    }
    *s = '\0';
    ain->putback (c);
    return s;
}

static int
collect_plus_minus (char first)
// first:	'+' ޤ '-'
//
// '+'  '-' ¤Ӥ򣲿ɽȸѴ.
// ('+' = 1, '-' = 0)
{
    char    c;
    int	    x = 1;
    int	    res = (first == Plus);
    ain->get (c);
    while (c == Minus || c == Plus) {
	if (c == Plus)
	    res |= (1 << x);
	x++;
	ain->get (c);
    }
    ain->putback (c);
    return res;
}

// ϴؿ
int
yylex ()
//
// ʸϴؿ yyparse() ƤӽФ롥
{
    static Boolean parse_in_instruction_p = TRUE;
    char    c, nc;
    char    buffer[BUFSIZ];
    text = buffer;
    register char *p = text;
    char   *np;
    int	    instid, type;
    int	    sign = 1;		// number sign ... sign == 1 -> plus

begin:
    if (!ain->get (c))
	return 0;		// EOF

    switch (c) {
    case ' ':
    case '\t':
	goto begin;		// Space or Tab

    case '\n':			// Newline

	line++;
	goto begin;

    case '%':			// comment %...\n

	while (ain->get (c))	// not EOF
	     if (c == NewLine)
		break;
	line++;
	goto begin;
    case '/':			// comment /*... */
	ain->get (nc);
	if (nc == '*') {
	    while (ain->get (c)) {	// not EOF

		if (c == Star) {
		    ain->get (c);
		    if (c == EOF || c == Slash)
			break;
		    else if (c == NewLine)
			line++;
		    else if (c == Star)
			ain->putback (c);
		}
	    }
	    goto begin;
	}
	else
	    ain->putback (nc);
	break;

    case ';':			// semi colon (';')
	parse_in_instruction_p = TRUE;
	return ';';

    case ',':			// comma (',')
	return ',';

    case '!':			// ('!')
	return c;

    case ':':			// (':')

	ain->get (nc);
	if (nc == Colon) {
	    yylval.name = memorysave ("::");
	    return LABEL;
	}
	ain->putback (nc);
	return ':';

    case '.':			// pseudo-instruction

	*p++ = '.';
	np = collect_alnum (p);
	if ((instid = pseudo_instruction (text)) != E_UNDF) {
	    parse_in_instruction_p = FALSE;
	    return instid;
	}
	lex_error (EL_IPSI, line, text);
	return E_ERR;

    case '`':			// boolean

	*p++ = '`';
	np = collect_alnum (p);
	if (Is_true (text + 1))
	    return BTRUE;
	if (Is_false (text + 1))
	    return BFALSE;
	lex_error (EL_IBEX, line, text);
	return E_ERR;

    case '#':			// class name

	*p++ = '#';
	ain->get (c);
	if (islower (c)) {	// #class_name

	    *p++ = c;
	    np = collect_alnum (p);
	    // skip '#'
	    yylval.name = memorysave (text + 1);
	    return CLASS_NAME;
	}
	if (c == '\'') {	// #'class_name'

	    np = collect_quoted (p, SQuote);
	    if (np == 0) {
		lex_error (EL_LBOF, line, "class name");
		return E_ERR;
	    }
	    // skip '#'
	    yylval.name = memorysave (text + 1);
	    return CLASS_NAME;
	}
	lex_error (EL_ICEX, line, text);
	return E_ERR;

    case UPPER_CASE:		// register

	*p++ = c;
	np = collect_alnum (p);
	if ((yylval.number = genreg_to_int (text)) != E_UNDF)
	    return REGS;	// argument register

	if ((yylval.number = reg_to_int (text)) != E_UNDF)
	    return REGS;	// special register

	lex_error (EL_IREX, line, text);
	return E_ERR;

    case LOWER_CASE:		// instruction or label

	*p++ = c;
	np = collect_alnum (p);
	if (parse_in_instruction_p) {
	    if ((instid = instruction (text, &type)) != E_UNDF) {
		parse_in_instruction_p = FALSE;
		yylval.number = instid;
		return type;
	    }
	}
	yylval.name = memorysave (text);
	return LABEL;

    case '\'':			// quoted label

	np = collect_quoted (p, SQuote);
	if (np == 0) {
	    lex_error (EL_LBOF, line, "label");
	    return E_ERR;
	}
	yylval.name = memorysave (text);
	return LABEL;

    case '"':			// string"

	np = collect_quoted (p, DQuote);
	if (np == 0) {
	    lex_error (EL_LBOF, line, "string");
	    return E_ERR;
	}
	yylval.name = memorysave (text);
	return STRINGP;

    case '+':			// +-+-+-+-+-+-

	yylval.number = collect_plus_minus (Plus);
	return PML;

    case '-':
	ain->get (c);
	if (!isdigit (c)) {	// -+-+-+-+-+-+

	    ain->putback (c);
	    yylval.number = collect_plus_minus (Minus);
	    return PML;
	}
	sign = -1;		// minus number
	/* no break */

    case '0':			// special format number

	if (c == '0') {
	    ain->get (c);
	    if (sign != -1) {
		if (isodigit (c)) {	// octal

		    *p++ = c;
		    np = collect_digit (p, 8);
		    yylval.number = str_to_int (text, sign, 8);
		    return OCT;
		}
		if (c == HEXMKL || c == HEXMKU) {	// hexadecimal

		    np = collect_digit (p, 16);
		    yylval.number = str_to_int (text, sign, 16);
		    return HEX;
		}
		if (c == FLTMKL || c == FLTMKU) {	// float

		    np = collect_float (p);
		    yylval.fval = str_to_f (text);
		    return FLOAT;
		}
		if (c == DBLMKL || c == DBLMKU) {	// double

		    np = collect_float (p);
		    yylval.dval = str_to_d (text);
		    return DOUBLE;
		}
	    }
	    sign = 1;		// zero

	    ain->putback (c);
	    yylval.number = 0;
	    return DEC;
	}
	/* no break */

    case DIGIT_1_9:		// number

	*p++ = c;
	np = collect_digit (p, 10);
	yylval.number = str_to_int (text, sign, 10);
	sign = 1;
	return DEC;

    default:
	break;
    }
    lex_error (EL_UDCH, line, text);	// error

    return E_ERR;
}

/*-----------------
* Local Variables:
* c-argdecl-indent:4
* c-indent-level:4
* c-label-offset:-4
* c-continued-statement-offset:4
* End:
*/
