(* ctt.lex by Jonathan Baccash *)
type svalue = Tokens.svalue
type pos = int
type ('a, 'b) token = ('a, 'b) Tokens.token
type lexresult = (svalue, pos) token
type arg = {sm : SourceMap.sourcemap}

fun err(p1,p2) = ErrorMsg.error p1 p2

val bString = ref false
val sString = ref ""

(* handling EOF *)
fun eof {sm} = (if !bString then err(1, "EOF found before end of string")
                else (); Tokens.EOF(1,1))

fun newLine sm p = SourceMap.newline sm p

val parseInt = valOf o Int.fromString
val drop = List.drop

fun parseTemp s =
     let val v = implode (drop (explode s, 1))
     in parseInt( v ) end

fun parseBits s =
valOf (IntInf.fromString (implode (List.take (explode s, (String.size s)-4))))

fun mkRealNum s = ((case (StringCvt.scanString Real.scan s)
		    of SOME r => r
		    | _ => (0.0))
		   handle OverFlow => (0.0))

exception eHash
structure T = Tokens
structure H = HashTable
val lOneKWds = [("additive", T.ADDITIVE), ("address", T.ADDRESS),
                ("after", T.AFTER), ("and", T.AND),
                ("another", T.ANOTHER),
                ("are", T.ARE), ("array", T.ARRAY), ("as", T.AS),
                ("assigning", T.ASSIGNING), ("at", T.AT),
                ("automatic", T.AUTOMATIC), ("be", T.BE),
                ("before", T.BEFORE), ("beginning", T.BEGINNING),
                ("bits", T.BITS), ("bitwise", T.BITWISE),
                ("block", T.BLOCK), ("both", T.BOTH),
                ("by", T.BY), ("called", T.CALLED), ("calling", T.CALLING),
                ("case", T.CASE), ("cast", T.CAST), ("character", T.CHARACTER),
                ("code", T.CODE), ("complement", T.COMPLEMENT),
                ("completion", T.COMPLETION), ("computing", T.COMPUTING),
                ("constant", T.CONSTANT), ("contents", T.CONTENTS),
                ("decrementing", T.DECREMENTING), ("default", T.DEFAULT),
                ("defined", T.DEFINED), ("divided", T.DIVIDED),
                ("does", T.DOES), ("each", T.EACH), ("either", T.EITHER),
                ("enclosing", T.ENCLOSING), ("end", T.END),
                ("ends", T.ENDS), ("enumeration", T.ENUMERATION),
                ("equal", T.EQUAL), ("equals", T.EQUALS),
                ("exclusive", T.EXCLUSIVE), ("execution", T.EXECUTION),
                ("exists", T.EXISTS), ("external", T.EXTERNAL),
                ("extralong", T.EXTRALONG), ("follows", T.FOLLOWS),
                ("for", T.FOR), ("fractional", T.FRACTIONAL),
                ("from", T.FROM), ("function", T.FUNCTION),
                ("greater", T.GREATER), ("if", T.IF), ("in", T.IN),
                ("incrementing", T.INCREMENTING), ("initial", T.INITIAL),
                ("integer", T.INTEGER), ("inverse", T.INVERSE),
                ("is", T.IS), ("iteration", T.ITERATION),
                ("left", T.LEFT), ("length", T.LENGTH),
                ("less", T.LESS), ("list", T.LIST), ("long", T.LONG),
                ("loop", T.LOOP), ("minus", T.MINUS), ("modulus", T.MODULUS),
                ("name", T.NAME), ("nearest", T.NEAREST), ("no", T.NO),
                ("nonzero", T.NONZERO), ("not", T.NOT),
                ("nothing", T.NOTHING), ("of", T.OF), ("one", T.ONE),
                ("only", T.ONLY), ("operation", T.OPERATION),
                ("or", T.OR), ("otherwise", T.OTHERWISE),
                ("passing", T.PASSING),
                ("plus", T.PLUS), ("point", T.POINT),
                ("pointed", T.POINTED), ("pointer", T.POINTER),
                ("prior", T.PRIOR), ("provided", T.PROVIDED),
                ("register", T.REGISTER), ("result", T.RESULT),
                ("returning", T.RETURNING), ("right", T.RIGHT),
                ("saturate", T.SATURATE), ("shifted", T.SHIFTED),
                ("short", T.SHORT), ("signed", T.SIGNED),
                ("size", T.SIZE), ("start", T.START),
                ("starting", T.STARTING), ("static", T.STATIC),
                ("string", T.STRING), ("structure", T.STRUCTURE),
                ("such", T.SUCH), ("than", T.THAN), ("the", T.THE),
                ("times", T.TIMES), ("type", T.TYPE), ("union", T.UNION),
                ("unknown", T.UNKNOWN), ("unnamed", T.UNNAMED),
                ("unsigned", T.UNSIGNED), ("upon", T.UPON),
                ("variable", T.VARIABLE), ("void", T.VOID),
                ("volatile", T.VOLATILE), ("where", T.WHERE),
                ("which", T.WHICH), ("whose", T.WHOSE),
                ("with", T.WITH), ("zero", T.ZERO)]

val hOneKWds : (string, (pos*pos) -> (svalue, pos) token) H.hash_table =
     H.mkTable (HashString.hashString, op =) (128 (* sizeHint *), eHash)
val _ = map (H.insert hOneKWds) lOneKWds
(* Return an alpha string's token if it's in our table.
   Otherwise, it's just an id. *)
fun alphaToken (s,l,r) =
     case H.find hOneKWds s
     of NONE   => T.ID (s,l,r)
      | SOME f => f(l,r)

%%
%header (functor CttLexFun (structure Tokens: Ctt_TOKENS));
%arg ({sm});
%s COMMENT;
%s STRING;
%s STRINGESC;
realnum = (([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+-]?[0-9]+)?[lL]?;
onespace = [\ \t\b\n];
spc  = {onespace}*;
spc2 = {onespace}+;
floatpn = floating{spc2}point{spc2}number;
id = [a-zA-Z_][a-zA-Z0-9_]*;
%%

<STRING>\\                    => (YYBEGIN STRINGESC;
                                  sString := !sString ^ yytext;
                                  continue());
<STRING>\"                    => (YYBEGIN INITIAL; bString := false;
                                  Tokens.QUOTEID (!sString, yypos,
                                                  yypos+size yytext));
<STRING>.                     => (sString := !sString ^ yytext; continue());

<STRINGESC>.                  => (YYBEGIN STRING; sString := !sString ^ yytext;
                                  continue());

<INITIAL>\;                   => (Tokens.SEMIC(yypos,yypos+1));
<INITIAL>\.                   => (Tokens.DOT(yypos, yypos+1));
<INITIAL>,                    => (Tokens.COMMA(yypos, yypos+1));
<INITIAL>,{spc}and            => (Tokens.COMAND(yypos, yypos+size yytext));
<INITIAL>,{spc}or             => (Tokens.COMOR(yypos, yypos+size yytext));
<INITIAL>,{spc}starting      => (Tokens.COMSTARTING(yypos, yypos+size yytext));
<INITIAL>,{spc}where          => (Tokens.COMWHERE(yypos, yypos+size yytext));
<INITIAL>[Aa]"fter that,"     => (continue());
<INITIAL>an?                  => (Tokens.ARTCL (yypos, yypos+size yytext));
<INITIAL>and{spc2}otherwise => (Tokens.ANDOTHERWISE(yypos, yypos+size yytext));
<INITIAL>arguments?           => (Tokens.ARGUMENT(yypos, yypos+size yytext));
<INITIAL>[Aa]ssign            => (Tokens.ASSIGN(yypos, yypos+6));
<INITIAL>[Aa]ssume            => (Tokens.ASSUME (yypos, yypos+6));
<INITIAL>at{spc2}index        => (Tokens.ATINDEX(yypos, yypos+size yytext));
<INITIAL>[Bb]reak             => (Tokens.BREAK(yypos,yypos+5));
<INITIAL>[Cc]all              => (Tokens.CALL(yypos, yypos+4));
<INITIAL>case-block           => (Tokens.CASEBLOCK(yypos,yypos+10));
<INITIAL>[Cc]ompute           => (Tokens.COMPUTE(yypos, yypos+7));
<INITIAL>[Cc]ontinue          => (Tokens.CONTINUE(yypos,yypos+8));
<INITIAL>[Cc]"ontinuing on, we next" => (continue());
<INITIAL>[Dd]ecrement         => (Tokens.DECREMENT(yypos, yypos+9));
<INITIAL>[Dd]o                => (Tokens.DO(yypos, yypos+2));
<INITIAL>double-precision     => (Tokens.DOUBLEP(yypos, yypos+16));
<INITIAL>elements?            => (Tokens.ELEMENT(yypos, yypos+7));
<INITIAL>[Ee]valuate          => (Tokens.EVALUATE(yypos, yypos+8));
<INITIAL>[Ff]irst             => (Tokens.FIRST(yypos, yypos+5));
<INITIAL>{floatpn}            => (Tokens.FLOATPN(yypos, yypos+size yytext));
<INITIAL>[Gg]o                => (Tokens.GO(yypos,yypos+2));
<INITIAL>[Ii]ncrement         => (Tokens.INCREMENT(yypos, yypos+9));
<INITIAL>[Ii]t                => (Tokens.IT     (yypos, yypos+2));
<INITIAL>[Ll]abel             => (Tokens.LABEL(yypos,yypos+5));
<INITIAL>[Ll]et               => (Tokens.LET    (yypos, yypos+3));
<INITIAL>members?             => (Tokens.MEMBER(yypos, yypos+6));
<INITIAL>[Nn]ext,             => (continue());
<INITIAL>[Pp]ass              => (Tokens.PASS(yypos, yypos+4));
<INITIAL>[Pp]erform           => (Tokens.PERFORM(yypos, yypos+7));
<INITIAL>[Rr]epeatedly        => (Tokens.REPEATEDLY(yypos,yypos+10));
<INITIAL>[Rr]eturn            => (Tokens.RETURN(yypos,yypos+6));
<INITIAL>[Ss]et               => (Tokens.SET(yypos, yypos+3));
<INITIAL>[Tt]hen              => (continue());
<INITIAL>[Tt]his              => (Tokens.THIS(yypos, yypos+4));
<INITIAL>[Tt]o                => (Tokens.TO     (yypos, yypos+2));
<INITIAL>values?              => (Tokens.VALUE  (yypos, yypos+size yytext));
<INITIAL>[a-zA-Z]*            => (alphaToken (yytext,yypos,yypos+size yytext));
<INITIAL>a[0-9]+(_[0-9]+)*    => (Tokens.ELEMID(yypos, yypos+size yytext));
<INITIAL>e[0-9]+              => (Tokens.TEMPEXP(parseTemp yytext,
                                                 yypos, yypos+size yytext));
<INITIAL>[0-9]+-bit           => (Tokens.NBITS(parseBits yytext, yypos,
                                               yypos+size yytext));
<INITIAL>-?[0-9]+             => (Tokens.INT(yytext,yypos,yypos+size yytext));
<INITIAL>\"                   => (YYBEGIN STRING; sString := "";
                                  bString := true; continue());
<INITIAL>{id}                 => (Tokens.ID (yytext,yypos,yypos+size yytext));
<INITIAL>{realnum}            => (Tokens.REAL(mkRealNum yytext, yypos,
				              yypos+size yytext));
<INITIAL>\n{spc}\n            => (newLine sm yypos; newLine sm (yypos+1);
                                  Tokens.EOP(yypos, yypos+2));
<INITIAL>{onespace}           => (continue());
<INITIAL>.                    => (err (yypos, "illegal character " ^ yytext);
                                  continue ());
