%{
/*	This file is part of the software similarity tester SIM.
	Written by Dick Grune, Vrije Universiteit, Amsterdam.
	$Header: pascallang.l,v 2.2 91/09/17 21:33:04 dick Exp $
*/

/*
	PASCAL language front end for the similarity tester.
	Author:	Maarten van der Meulen <maarten@cs.vu.nl>
*/

#include	"cport.h"
#include	"private.h"
#include	"token.h"
#include	"idf.h"
#include	"stream.h"
#include	"lex.h"

/* Language-dependent Code */

extern char options[];
		
/* Data for module idf */

CONST PRIVATE struct idf ppcmd[] = {
	"define",	META('d'),
	"else",		META('e'),
	"endif",	META('E'),
	"if",		META('i'),
	"ifdef",	META('I'),
	"ifndef",	META('x'),
	"include",	MTCT('I'),
	"line",		META('l'),
	"undef",	META('u')
};

CONST PRIVATE struct idf reserved[] = {
	"and",		NORM('&'),
	"array",	NORM('A'),
	"begin",	NORM('{'),
	"case",		NORM('c'),
	"const",	NORM('C'),
	"div",		NORM('/'),
	"do",		NORM('D'),
	"downto",	NORM('d'),
	"else",		NORM('e'),
	"end",		NORM('}'),
	"extern",	CTRL('E'),
	"file",		NORM('F'),
	"for",		NORM('f'),
	"function",	NORM('p'),	/* Equal to procedure */
	"goto",		NORM('g'),
	"if",		NORM('i'),
	"in",		NORM('I'),
	"label",	NORM('l'),
	"mod",		NORM('%'),
	"nil",		NORM('n'),
	"not",		NORM('!'),
	"of",		SKIP,
	"or",		NORM('|'),
	"packed",	NORM('P'),
	"procedure",	NORM('p'),
	"program",	SKIP,
	"record",	NORM('r'),
	"repeat",	NORM('R'),
	"set",		NORM('s'),
	"then",		SKIP,
	"to",		NORM('t'),
	"type",		NORM('T'),
	"until",	NORM('u'),
	"var",		NORM('v'),
	"while",	NORM('w'),
	"with",		NORM('W')
};

/* Special treatment of identifiers */

PRIVATE
lower_case(str)
	char *str;
{
	/*	Turns upper case into lower case, since Pascal does not
		distinguish between them.
	*/
	register char *s;
	
	for (s = str; *s; s++) {
		if ('A' <= *s && *s <= 'Z') {
			*s += (-'A' + 'a');
		}
	}
}

PRIVATE TOKEN
idf2token(hashing)
	int hashing;
{
	register TOKEN tk;

	lower_case(yytext);
	tk = idf_in_list(yytext, reserved, IDF);
	if (TOKEN_EQ(tk, IDF) && hashing) {
		/* return a one-token hash code */
		tk = idf_hashed(yytext);
	}
	return tk;
}

/* Token sets for module algollike */
CONST TOKEN Headers[] = {
	IDF,		/* identifier */
	NORM('{'),	/* also begin */
	NORM('('),
	NORM('['),
	NORM('A'),	/* array */
	NORM('c'),	/* case */
	NORM('C'),	/* const */
	NORM('/'),	/* div */
	CTRL('E'),	/* extern */
	NORM('F'),	/* file */
	NORM('f'),	/* for */
	NORM('g'),	/* goto */
	NORM('i'),	/* if */
	NORM('l'),	/* label */
	NORM('P'),	/* packed */
	NORM('p'),	/* procedure/function */
	NORM('r'),	/* record */
	NORM('R'),	/* repeat */
	NORM('s'),	/* set */
	NORM('T'),	/* type */
	NORM('v'),	/* var */
	NORM('w'),	/* while */
	NORM('W'),	/* with */
	NOTOKEN
};
CONST TOKEN Trailers[] = {
	NORM(')'),
	NORM('}'),
	NORM(';'),
	NOTOKEN
};
CONST TOKEN Openers[] = {
	NORM('{'),
	NORM('('),
	NORM('['),
	NOTOKEN
};
CONST TOKEN Closers[] = {
	NORM('}'),
	NORM(')'),
	NORM(']'),
	NOTOKEN
};

/* Language-INdependent Code */

yystart() {
	BEGIN INITIAL;
}

PRIVATE int
yywrap() {
	return 1;
}

%}

%Start	Comment

Layout		([ \t])

AnyQuoted	(\\.)
StrChar		([^'\n\\]|{AnyQuoted})
ChrChar		([^'\n\\]|{AnyQuoted})

StartComment	("{"|"(*")
EndComment	("}"|"*)")
SafeComChar	([^*}\n])
UnsafeComChar	("*")

Digit		([0-9])
Idf		([A-Za-z][A-Za-z0-9_]*)

%%

{StartComment}	{			/* See clang.l */
		BEGIN Comment;
	}

<Comment>{SafeComChar}+	{		/* safe comment chunk */
	}

<Comment>{UnsafeComChar}	{	/* unsafe char, read one by one */
	}

<Comment>"\n"		{		/* to break up long comments */
		return_eol();
	}

<Comment>{EndComment}	{		/* end-of-comment */
		BEGIN INITIAL;
	}

\'{StrChar}*\'	{			/* strings */
		return_ch('"');
	}

\'{ChrChar}+\'	{			/* characters */
		return_ch('\'');
	}

#{Layout}*include.*	{		/* skip #include line */
	}

#{Layout}*{Idf}	{			/* a preprocessor line */
		register char *n = yytext+1;

		/* skip layout in front of preprocessor identifier */
		while (*n == ' ' || *n == '\t') {
			n++;
		}
		return_tk(idf_in_list(n, ppcmd, NORM('#')));
	}

{Digit}+	{			/* numeral, passed as an identifier */
		return_tk(IDF);
	}

{Idf}/"("	{			/* identifier in front of ( */
		register TOKEN tk;

		tk = idf2token(options['F']);
		if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
	}

{Idf}	{				/* identifier */
		register TOKEN tk;
		
		tk = idf2token(0 /* no hashing */);
		if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
	}

\;	{				/* semicolon, conditionally ignored */
		if (options['f']) return_ch(yytext[0]);
	}

{Layout}	{			/* ignore layout */
	}

\n	{				/* count newlines */
		return_eol();
	}

.	{				/* copy other text */
		if (!(yytext[0]&0200)) return_ch(yytext[0]);
	}

%%

