%{
/****************************************************************
Copyright (C) The University of Melbourne 1993
All Rights Reserved

Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appear in all
copies and that both that the copyright notice and this
permission notice and warranty disclaimer appear in supporting
documentation, and that the name of The University of Melbourne 
or any of its entities not be used in advertising or publicity
pertaining to distribution of the software without specific,
written prior permission.

THE UNIVERSITY OF MELBOURNE DISCLAIMS ALL WARRANTIES WITH REGARD
TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL THE UNIVERSITY
OF MELBOURNE OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.

AUTHORS : Jason Lee (jasonl@cs.mu.oz.au)
	  Andrew Davison (ad@cs.mu.oz.au)

COMMENTS : This file contains the definitions used by lex to create
	   a lexical function for bebop. It also contains the way
	   we can print out nice error messages.
*******************************************************************/

/* Include all the header files needed */
#include "bebop.h"

/* Include yacc's list of tokens */
#include "y.tab.h"

/* Extern declarations */
extern char endflag;

/* Prototypes (forward declarations) */
void yyerror(const char *mesg);
FAPTR pred_insert(const char *name, int length);
VARPTR var_lookup(const char *s);
VARPTR var_insert(const char *var, int length);
CONSTPTR const_insert(const char *str, int length, int lineno, const char *error_line, int error_len);

/* The following involving error output and the nice style
   with which errors are displayed to the user is based on
   what Fergus Henderson (fjh) did, thanks Fergus for
   allowing me to use your idea
*/

int nerrors;		/* Number of errors so far */
char *char_line;	/* ptr to the input line being scanned */
char *prev_line;	/* buffer of previous line */
int line_size;		/* size of the line so far, may change */
int column;		/* offset into line of next char */

/* These variables are a communication channel with the parser
   regarding the renaming of variables in the dbs section
   And when ground and ever should be considered tokens
   in a when declaration instead of just atoms.
*/
char var_dbs;
int dbs_counter;
char ground_when;
char var_noinp;
char no_hash;

/* These variables are for communication with errors */
int bec_col;
int iam_col;

/* Re-define input() so as to grab a line at a time and produce nice
   error output.. fjh style.

   Original input() :
   # define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==10?(yylineno ++,yytchar):yytchar)==EOF?0:yytchar)
*/

static char get_next_char(void);

#undef input
#define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):get_next_char())==10?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)

/* These are for comments, double and single quoted strings */
static char c1, c2;

/* These are just for general use */
static int i, j;


%}

/* Regular definitions to make life easy */
line		[\n]
under		[_]
doll		[\$]
whitesp		[ \t]+
upper		[A-Z]
lower		[a-z]
letter		[A-Za-z]
digit		[0-9]
alphanum	[A-Za-z0-9]
valid		[A-Za-z0-9\$_]*
atom1		{lower}{valid}
atom2		{doll}{valid}
var1		{upper}{valid}#?
var2		{under}{valid}#?
dqstring	\"
sqstring	\'
comment1	\%
comment2	"/*"

%%
{line}				{/*no action skip */;}

{whitesp}			{/*no action skip*/;}

"invisible"			{ return(TOK_INVISIBLE);}

"initial"			{ return(TOK_INITIAL);}

"clauses"			{ return(TOK_CLAUSES);}

"pnu"				{ return(TOK_PNU);}

"nu"				{ return(TOK_NU);}

"dbs"				{ return(TOK_DBS);}

"end"				{ return(TOK_END);}

"i"				{ if (!var_noinp)
				  {
					return(TOK_INPUT);
				  } 
				  else
				  {
					yylval.pred = pred_insert(yytext, yyleng);
					return(TOK_ATOM);
				  }
				}

"o"				{ if (!var_noinp)
				  {
					return(TOK_OUTPUT);
				  }
				  else
				  {
					yylval.pred = pred_insert(yytext, yyleng);
					return(TOK_ATOM);
				  }
				}

"$any"				{ return(TOK_ANY);}

"becomes"			{ bec_col = column - LEN_BEC - 1;
				  return(TOK_BECOMES);
				}

"i_am"				{ iam_col = column - LEN_IAM - 1;
				  return(TOK_IAM);
				}

"class"				{ return(TOK_CLASS);}

"if"				{ return(TOK_IF); }

"then"				{ return(TOK_THEN); }

"else"				{ return(TOK_ELSE); }

"lazyDet"			{ return(TOK_LAZYDET); }

"eagerDet"			{ return(TOK_EAGERDET); }

"when"				{ return(TOK_WHEN); }

"ever"				{ if (ground_when)
				  {
				    return(TOK_EVER);
				  }
				  else
				  {
				     yylval.pred = pred_insert(yytext,yyleng);
				     return(TOK_ATOM);
				  }
				}

"ground"			{ if (ground_when)
				  {
				     return(TOK_GROUND);
				  }
				  else
				  {
				     yylval.pred = pred_insert(yytext,yyleng);
				     return(TOK_ATOM);
				  }
				}

"and"				{ return(TOK_AND); }

"or"				{ return(TOK_OR); }

"\\+"				{ return(TOK_NOT); }

"is"				{ return(TOK_IS); }

"?-"				{ return(TOK_PROLOG); }

":-"				{ return(TOK_RULE); }

"<="				{ return(TOK_DEFINE);}

"=>"				{ return(TOK_DO);}

"::"				{ return(TOK_MESSAGE);}

"->"				{ return(TOK_NIF); }

"~="				{ return(TOK_NOT_EQ); }

"=.."				{ return(TOK_F_ARG); }

"="				{ return(TOK_TEQ); }

"\\="				{ return(TOK_NOT_UNIF); }

"=="				{ return(TOK_ARE_IDENT); }

"\\=="				{ return(TOK_NOT_IDENT); }

"@<"				{ return(TOK_TLT); }

"@>"				{ return(TOK_TGT); }

"@=<"				{ return(TOK_LTE); }

"@>="				{ return(TOK_GTE); }

"mod"				{ return(TOK_MOD); }

"\+"				{ return(TOK_PLUS); }

"-"				{ return(TOK_MINUS); }

"*"				{ return(TOK_MULT); }

"/"				{ return(TOK_DIV); }

"//"				{ return(TOK_IDIV); }

"**"				{ return(TOK_POW); }

"/\\"				{ return(TOK_BAND); }

"\\/"				{ return(TOK_BOR); }

"^"				{ return(TOK_BEOR); }

"<<"				{ return(TOK_LSHIFT); }

">>"				{ return(TOK_RSHIFT); }

"\\"				{ return(TOK_COMP); }

"<"				{ return(TOK_ALT); }

"=<"				{ return(TOK_LTEQ); }

">"				{ return(TOK_AGT); }

">="				{ return(TOK_GTEQ); }

"=:="				{ return(TOK_AEQ); }

"=\\="				{ return(TOK_NEQ); }

{atom1}		 |

{atom2}				{/* This matches an atom, which can either
				    begin with a lower case letter or a
				    dollar sign followed by any valid symbol.
				 */
				   yylval.pred = pred_insert(yytext,yyleng);
				   return(TOK_ATOM);
				}

{var1}		|

{var2}				{/* This matches a variable, which can either
				    begin with an upper case letter or an
				    underscore followed by any valid symbol.
				 */
				   if (yytext[--yyleng] == HASH)
				   {
					if (no_hash)
					{
						yyerror("");
						fprintf(stderr,"\nCan not use variant of the becomes '#' in this place.\n");
					}
					else
					{
						yytext[yyleng] = EOS;
						yylval.var = var_lookup(yytext);
						if (!yylval.var || yylval.var->mode == UNKNOWN)
						{
							yyerror("");
							fprintf(stderr,"\nVariable must be an invisible/visible variable in order to use '#'\n");
							yytext[yyleng++] = HASH;
							return(TOK_VAR);
						}
						else
						{
							yytext[yyleng++] = HASH;
							for(i = 0, j = yyleng--; i < yyleng; i++, j++)
							{
								yytext[j] = yytext[i];
							}
							yytext[j] = EOS;
							++yyleng;
							yylval.var = var_insert(&yytext[i], yyleng);
							return(TOK_VAR);
						}
					}
				   }
				   ++yyleng;
				   if (var_dbs && yytext[0] != '_')
				   {
					yytext[yyleng++] = '$';
					itoa(yytext, yyleng, dbs_counter);
					/*
					do
					{
						yytext[yyleng++] = i %10 + '0';
					}while ((i /= 10) > 0);
					yytext[yyleng] = EOS;
					*/
				   }
				   yylval.var = var_insert(yytext, yyleng);
				   return(TOK_VAR); 
				}


[+-]?{digit}+						|

[+-]?{alphanum}'{digit}+				|

[+-]?0'[A-Za-z0-9+\-\*/~<=>`:.?@#&]			|

[+-]?0'\\[bcdefnrstv]					|

[+-]?0'\\([0-8]{1,3})					|

[+-]?0'\\~[A-Z@\[\]~_]		{/* This matches any valid representation of
				    an integer as outlined in the NU-Prolog
				    manual.
				 */
				   yylval.constant = const_insert(yytext, yyleng, yylineno, char_line, line_size);
				   return(TOK_CONSTANT);
				}


[+-]?({digit}+\.{digit}+)([eE][-+]?{digit}+)?	|

[+-]?{digit}+[eE][-+]?{digit}*	{/* This matches any valid representation of
				    a floating-point number as outlined in
				    the NU-Prolog manual.
				 */
				   yylval.constant = const_insert(yytext, yyleng, yylineno, char_line, line_size);
				   return(TOK_CONSTANT);
				}

{comment1}			{/* This matches a comment of style one
				    which is a % sign, anything after it up
				    till end of line is a comment.
				 */
				   while((c1 = input()) != EOL && c1);
				}

{comment2}			{/* This matches a comment of style two
				    which ie like the C way of comments
				    except it cannot nest.
				 */
				  c1 = FALSE;
                                  c2 = input();

                                  for(;;)
                                  {
                                        if (!c2)
                                                break;
                                        if(c1 == '*' && c2 == '/')
                                                break;
                                        c1 = c2;
                                        c2 = input();
                                  }
                                  if(!c2)
				  {
                                  	char_line[0] = EOS;
					yyerror("");
					fprintf(stderr, "\t   no end of comment\n\n");
				  }
				}

{dqstring}			{/* This matches a double quoted string
				    which may contain an escaped double
				    quote within it which is treated as
				    part of the string, or two adjacent
				    quotes which are left for Prolog.
				 */
				  c1 = FALSE;
                                  c2 = input();

                                  for(;;)
                                  {
                                        if(!c2)
                                                break;
                                        if(c1 != '\\'&&c1 != '\"' && c2 == '\"')
                                        {
                                                yytext[yyleng++] = c2;
                                                break;
                                        }
                                        if(c2 == EOL)
                                        {
                                                yyerror("");
                                                fprintf(stderr, "\t   string is not terminated\n\n");
                                                break;
                                        }
                                        c1 = c2;
                                        yytext[yyleng++] = c2;
                                        c2 = input();
                                 }
                                 yytext[yyleng++] = EOS;
                                 if (!c2)
				 {
				 	char_line[0] = EOS;
					yyerror("");
					fprintf(stderr, "\t   string is not terminated\n\n");
				 }
				 yylval.constant = const_insert(yytext, yyleng, yylineno, char_line, line_size);
                                 return(TOK_CONSTANT);
                                }

{sqstring}                      {/* This matches a single quoted string
				    which may contain an escaped single
				    quote within it which is treated as
				    part of the string, or two adjacent
				    quotes which are left for Prolog.
				 */
				  c1 = FALSE;
                                  c2 = input();

                                  for(;;)
                                  {
                                        if(!c2)
                                                break;
                                        if(c1 != '\\'&&c1 != '\'' && c2 == '\'')
                                        {
                                                yytext[yyleng++] = c2;
						break;
                                        }
                                        if(c2 == EOL)
                                        {
                                                yyerror("");
                                                fprintf(stderr, "\t   string is not terminated\n\n");
                                                break;
                                        }
                                        c1 = c2;
                                        yytext[yyleng++] = c2;
                                        c2 = input();
                                  }
                                  yytext[yyleng++] = EOS;
                                  if (!c2)
				  {
					char_line[0] = EOS;
					yyerror("");
					fprintf(stderr, "\t   string is not terminated\n\n");
				  }
				  yylval.pred= pred_insert(yytext, yyleng);
                                  return(TOK_SSTRING);
                                }

.				{/*Anything else just return symbol */
				  return(yytext[0]);
				}

%%

/* These functions below help to read in a line at a time and form part
   of the way errors are neatly outputed to the user.
*/

/* Initialize variables in lex and error */
void init_lex(void)
{
	static char been_here = FALSE;
	line_size = MAX;
	if (!been_here)
	{
		char_line = NEWN(char, MAX);
		prev_line = NULL;
		been_here = TRUE;
	}
	free(prev_line);
	prev_line = NULL;
	char_line[0] = EOS;
	column = 0;
	nerrors = 0;
	endflag = FALSE;
	yylineno = 1;
	var_dbs = FALSE;
	dbs_counter = 0;
	ground_when = FALSE;
	var_noinp = FALSE;
	no_hash = FALSE;
}
	

/* This function stores the next read in character in the array line
   re-sizing the array if the current line being read in is longer than
   the allocate size of line at the start.
*/
static void store_char(char c)
{
        if (column >= line_size)
        {
                line_size *= 2;
                char_line = RSIZE(char_line, char, line_size);
        }
        char_line[column++] = c;
}

/* This returns the next character if the line the character is on has been
   read in, otherwise the next line is read in.
   Thus, we always have the whole line containing the current character
   lex is using.
*/
static char get_next_char(void)
{
        char c;
        int len;

	/* If processed current line get the next */
        if(char_line[column] == EOS)
        {
		free(prev_line);
		prev_line = char_line;
		char_line = NEWN(char, MAX);
		line_size = MAX;
                column = 0;
		char_line[column] = EOS;
		
                while((c = fgetc(yyin)) != EOF && c != EOL)
                {
			/* Expand tabs to spaces, looks better */
                        if (c == TAB)
                        {
                                len = TABWIDTH - (column % TABWIDTH);
                                while ( len-- > 0)
                                {
                                        store_char(BLANK);
                                }
                        }
                        else
                        {
                                store_char(c);
                        }
		}
                if (c != EOF)
                {
                        store_char(EOL);
                        store_char(EOS);
                        column = 0;
                }
                else
                {
                        return EOF;
                }
        }
        return char_line[column++];
}

