(* -*- sml-lex -*- *)

open LexUtil.Tokens

(* position tracking *)

val lineno = ref 1
val lastnlpos = ref 0

fun inclineno (yypos) = (lineno := !lineno + 1 ; lastnlpos := yypos)

fun posn (yypos) = (yypos - !lastnlpos, !lineno)

fun reset_posn () = (lineno := 1; lastnlpos := 0)

(* Required components *)

(* types svalue and token defined in opened LexUtil.Tokens *)
type pos = Location.pos
type lexresult = (svalue, pos) token

fun eof _ = (reset_posn (); EOF (Location.dummypos, Location.dummypos))

(* User components *)


val nesting_level = ref 0

fun error (msg, yypos, yytext) = LexUtil.signal_error (msg, (yypos-2 - !lastnlpos, !lineno))

fun token0 (tokfn, yypos, yytext) =
    tokfn ((yypos - 2 - !lastnlpos, !lineno), (yypos - 2 + size yytext - !lastnlpos, !lineno))
fun token1 (tokfn, value, yypos, yytext) =
    tokfn (value, (yypos - 2 - !lastnlpos, !lineno), (yypos - 2 + size yytext - !lastnlpos, !lineno))


(* Lexer starting states
   ---------------------
   INITIAL: regular
   COMMENT: skipping delimited comment

   (This comment really ought to be at the %s declaration
   below. I wish ML-Lex supported comments.)

*)

%%

%header ( functor PcfLexFun(structure LexUtil : LEX_UTIL) );

%s COMMENT;

whitechar       = [\ \t] ;
whitespace      = {whitechar}+ ;

newline         = \n\013 | \n ;

nonnl           = [^\013\n] ;

noncomment      = [^\013\n*()]* ;

letter          = [A-Za-z] ;
alphanumeric    = [A-Za-z0-9_'] ;
ident           = {letter}{alphanumeric}* ;
digit           = [0-9] ;
hexdigit        = [A-Fa-f0-9] ;
octdigit        = [0-7] ;
bit             = [01] ;

%%

<INITIAL>\n                          => ( inclineno (yypos) ; continue ());
<INITIAL>{whitespace}                => ( continue () );

<INITIAL>"("                         => ( token0 (LPAREN, yypos, yytext) );
<INITIAL>")"                         => ( token0 (RPAREN, yypos, yytext) );
<INITIAL>true                        => ( token0 (TRUE, yypos, yytext) );
<INITIAL>false                       => ( token0 (FALSE, yypos, yytext) );
<INITIAL>0                           => ( token0 (ZERO, yypos, yytext) );
<INITIAL>succ                        => ( token0 (SUCC, yypos, yytext) );
<INITIAL>pred                        => ( token0 (PRED, yypos, yytext) );
<INITIAL>iszero                      => ( token0 (ISZERO, yypos, yytext) );
<INITIAL>"if"                        => ( token0 (IF, yypos, yytext) );
<INITIAL>"then"                      => ( token0 (THEN, yypos, yytext) );
<INITIAL>"else"                      => ( token0 (ELSE, yypos, yytext) );
<INITIAL>L                           => ( token0 (LAMBDA, yypos, yytext) );
<INITIAL>"."                         => ( token0 (DOT, yypos, yytext) );
<INITIAL>:                           => ( token0 (COLON, yypos, yytext) );
<INITIAL>fix                         => ( (*token0 (FIX, yypos, yytext)*) continue () );
<INITIAL>"let"                       => ( token0 (LET, yypos, yytext) );
<INITIAL>"val"                       => ( token0 (VAL, yypos, yytext) );
<INITIAL>"in"                        => ( token0 (IN, yypos, yytext) );
<INITIAL>"="                         => ( token0 (EQUAL, yypos, yytext) );
<INITIAL>"end"                       => ( token0 (END, yypos, yytext) );
<INITIAL>roll                        => ( token0 (ROLL, yypos, yytext) );
<INITIAL>unroll                      => ( token0 (UNROLL, yypos, yytext) );
<INITIAL>inl                         => ( (*token0 (INL, yypos, yytext)*) continue () );
<INITIAL>inr                         => ( (*token0 (INR, yypos, yytext)*) continue () );
<INITIAL>"case"                      => ( (*token0 (CASE, yypos, yytext)*) continue () );
<INITIAL>"<"                         => ( (*token0 (LANGLE, yypos, yytext)*) continue () );
<INITIAL>">"                         => ( (*token0 (RANGLE, yypos, yytext)*) continue () );
<INITIAL>","                         => ( (*token0 (COMMA, yypos, yytext)*) continue () );
<INITIAL>[12]                        => ( (*token1 (DIGIT, LexUtil.string2digit(posn(yypos), yytext), yypos, yytext) *) continue () );
<INITIAL>bool                        => ( token0 (BOOL, yypos, yytext) );
<INITIAL>nat                         => ( token0 (NAT, yypos, yytext) );
<INITIAL>unit                        => ( token0 (UNIT, yypos, yytext) );
<INITIAL>"rec"                       => ( token0 (REC, yypos, yytext) );
<INITIAL>"*"                         => ( (* token0 (TIMES, yypos, yytext) *) continue ());
<INITIAL>"+"                         => ( (* token0 (PLUS, yypos, yytext) *) continue () );
<INITIAL>"->"                        => ( token0 (ARROW, yypos, yytext) );
<INITIAL>{ident}                     => ( token1 (IDENT, yytext, yypos, yytext) );

<INITIAL>"(*"                        => ( nesting_level := 1 ; YYBEGIN COMMENT; continue () );


<COMMENT>\n                          => ( inclineno (yypos) ; continue ());
<COMMENT>{whitespace}                => ( continue () );
<COMMENT>{noncomment}                    => ( continue () );
<COMMENT>"*"                         => ( continue ()) ;
<COMMENT>"("                         => ( continue ()) ;
<COMMENT>")"                         => ( continue ()) ;
<COMMENT>"(*"                        => ( nesting_level := !nesting_level + 1; continue () );
<COMMENT>"*)"                        => ( nesting_level := !nesting_level - 1;
					  if !nesting_level = 0 then (YYBEGIN INITIAL ; continue ())
					  else continue () );
