(* Lexer *)
(* 15-411 *)
(* by Roland Flury *)
(* @version $Id: lexer.mll,v 1.4 2003/09/17 18:54:07 rflury Exp $ *)

{
 open Grammar               (* open the grammar for the tokens *)
 exception EXIT    (* This error is raised to exit the program *)

let lstart = Lexing.lexeme_start
let lend = Lexing.lexeme_end
let get = Lexing.lexeme
    
(* Comment handling *)
    
let commentLevel = ref 0
let commentPos = ref 0
let linCom = ref false
    
let enterComment yypos =
  commentLevel := !commentLevel + 1;
  commentPos := yypos
      
let linComStart yypos = 
  linCom := true;
  commentPos := yypos
      
let isLinCom () = !linCom
let linComEnd () = linCom := false
    
let exitComment () =
   commentLevel := !commentLevel - 1;
   !commentLevel = 0
    
let isCom () =
  !commentLevel > 0

let eof () =
  if (!commentLevel > 0) then
    begin
      Errormsg.error (!commentPos,!commentPos) "Unterminated comment";
      raise EXIT
    end
  else
    ();
  EOF

let fileOffset = ref 0 (* support cpp *)

let extractFileName s =
  let su = String.sub s 1 (String.length s -1) in
  let pos = String.index su '"' in
  String.sub su 0 pos
    
}

let alpha  = ['a'-'z' 'A'-'Z' '_']
let digit  = ['0'-'9']
let id     = alpha(digit|alpha)*
let nl     = ('\r' | '\n' | "\r\n")
let fname = ('"')[^'\r' '\n']*('"')[^'\r' '\n']* nl

rule token = parse

| eof                      { eof() }
| [' ''\t']+               { token lexbuf } (* ignore white space *)
| nl                       { Errormsg.startNewline (lstart lexbuf);
                             if isCom() then (
                               token lexbuf
                             )  else (
                               token lexbuf 
			     ) 
                           }
(* Parentesis *)
| '{'                      { LBRACE }
| '}'                      { RBRACE }
| '('                      { LPAREN }
| ')'                      { RPAREN }
| '['                      { LBRACKET }
| ']'                      { RBRACKET }
(* Operators *)
| '+'			   { PLUS }
| '*'			   { TIMES } (* also dereference of pointer *)
| '-'			   { MINUS } (* unary and binary *)
| '/'		           { DIVIDE }
| '%'		           { MOD }
| "*+"                     { PPLUS }
| "*-"                     { PMINUS }
| '|'                      { BITOR }
| '&'                      { BITAND } (* also for address_of *)
| '^'                      { BITXOR }
| "<<"                     { SHIFTLEFT }
| ">>"                     { SHIFTRIGHT }
  (* Logic-level *)
|  "&&"                    { LOGICAND }
|  "||"                    { LOGICOR }
|  "^^"                    { LOGICXOR }
|  '!'                     { LOGICNOT } 
(* unary operators *)
| '~'                      { BITNOT }
(* Comparison *)
| "=="                     { LOGICEQ }
| "!="                     { LOGICNOTEQ }
| '<'                      { LOGICLESS }
| "<="                     { LOGICLESSEQ }
| '>'                      { LOGICMORE }
| ">="                     { LOGICMOREEQ }
| "*=="                    { PLOGICEQ }
| "*!="                    { PLOGICNOTEQ }
(* Assignments *)
| '='		           { ASSIGN } 
| "+="			   { PLUSASSIGN }
| "*="			   { TIMESASSIGN }
| "-="			   { MINUSASSIGN }
| "/="		           { DIVIDEASSIGN }
| "%="		           { MODASSIGN }
(* Separators *)
| ','                      { COMMA }
| ';'		           { SEMICOL }
| "->"                     { RARROW }
| '.'                      { DOT }
| ':'                      { COLON }   
(* Types *)
| "int"                    { INTEGER }
| "bool"                   { BOOL }
(* Keywords *)
| "var"                    { VAR }
| "struct"                 { STRUCT }
| "alloc"                  { ALLOC }
| "offset"                 { OFFSET }
| "size"                   { SIZE }
| "if"                     { IF }
| "else"                   { ELSE }
| "for"                    { FOR }
| "while"                  { WHILE }
| "continue"               { CONTINUE }
| "break"                  { BREAK }
| "return"                 { RETURN }
| "true"                   { TRUE }
| "false"                  { FALSE }
| "NULL"                   { NULL }
| "void"                   { VOID }
| "foreign"                { FOREIGN }
(* chars & numbers *)
| digit+                   { INT( Int32.of_string (get lexbuf) ) }
| id                       { ID( get lexbuf ) }
(* Comments *)
|  "//"                    { linComStart (lstart lexbuf);
			     comment lexbuf 
			   }
|  "/*"                    { enterComment (lstart lexbuf);
			     comment lexbuf 
			   }
|  "*/"                    { Errormsg.error (lstart lexbuf,
					     lend lexbuf - 1)
			       "Unbalanced comment"; 
			     raise EXIT; 
			     token lexbuf 
			   }
| "#"                      { cpptoken lexbuf }
(* default (error) *)
|  _                       { Errormsg.error (lstart lexbuf,
					     lend lexbuf - 1)
			       ("illegal character '"
				^ get lexbuf
				^ "'"); 
			     raise EXIT;
			     token lexbuf 
			   }

and comment = parse 
|  "/*"
    {
     if not (isLinCom ()) then (
       enterComment (lstart lexbuf)
     ) else ();
     comment lexbuf }
|  "*/"
    {
     if not (isLinCom ()) && exitComment () then (
       token lexbuf
      ) else (
       comment lexbuf )
   }
|  nl
    {
     if isLinCom () then (
       linComEnd ()
      ) else ();
     Errormsg.startNewline (lstart lexbuf);
     if isCom() then (
       comment lexbuf
      )  else (
       token lexbuf )}
|  eof
    { eof() }
|  _
    { comment lexbuf }

and cpptoken = parse
| digit+          
    { fileOffset := (Pervasives.int_of_string (get lexbuf));
      cpptoken lexbuf 
    }
| fname           
    { Errormsg.startNewFile 
	(lstart lexbuf) 
	(extractFileName (get lexbuf)) 
	!fileOffset;
      token lexbuf 
    }
| nl { token lexbuf }
| [' ''\t']+               { cpptoken lexbuf } (* ignore white space *)
| eof { eof() } 
| _  { Errormsg.error (lstart lexbuf, lend lexbuf - 1) 
	 ("Illegal CPP instruction: " ^ (get lexbuf)); raise EXIT }


