(****************************************************************************)
(*              The Calculus of Inductive Constructions                     *)
(*                                                                          *)
(*                            Projet Coq                                    *)
(*                                                                          *)
(*                  INRIA                        ENS                        *)
(*           Rocquencourt                        Lyon                       *)
(*                                                                          *)
(*                              Coq V5.8                                    *)
(*                            Jan 1st 1993                                  *)
(****************************************************************************)
(*                              lexer.ml                                    *)
(****************************************************************************)

#open "std";;

let (add_in_buff, get_buff, fget_buff) =
  let buff = nref(create_string 80) in (
  (* add_in_buff *) (
    fun i x ->
      let len = string_length buff.v in
      if i >= len then (buff.v <- buff.v ^ (create_string len); ());
      set_nth_char buff.v i x;
      succ i
  ),
  (* get_buff *) (
    fun len ->
      sub_string buff.v 0 len
  ),
  (* fget_buff *) (
    fun len ->
      set_nth_char buff.v len `\000`;
      buff.v
  )
)
;;

let rec spec_char = function
  [< '`n` >] -> `\n` | [< '`t` >] -> `\t`
| [< '`b` >] -> `\b` | [< '`r` >] -> `\r`
| [< '`0`..`9` as c; (spec1 (int_of_char c-int_of_char `0`)) v >] ->
    char_of_int v
| [< 'x >] -> x

and spec1 v = function
  [< '`0`..`9` as c; s >] -> spec1 (10*v+int_of_char c-int_of_char `0`) s
| [< >] -> v
;;

let rec string len = function
  [< '`"` >] -> len
| [< '`\\`;
     (function [< '`\n` >] -> len
           | [< spec_char c >] -> add_in_buff len c) len;
     s >] -> string len s
| [< 'x; s >] -> string (add_in_buff len x) s

and skip_comm = function
  [< '`*`; s >] -> skip1 s
| [< '`(`; skip2 _; s >] -> skip_comm s
| [< '`"`; (string 0) _; s >] -> skip_comm s
| [< '_; s >] -> skip_comm s

and skip1 = function
  [< '`)` >] -> ()
| [< s >] -> skip_comm s

and skip2 = function
  [< '`*`; s >] -> skip_comm s
| [< >] -> ()
;;

let rec number len = function
  [< '`0`..`9` as d; s >] -> number (add_in_buff len d) s
(*
| [< '`.`; (float (add_in_buff len `.`)) f >] -> FLOAT f
| [< '`e` | `E`; (exp (add_in_buff len `e`)) f >] -> FLOAT f
*)
| [< >] -> Tint(int_of_string(fget_buff len))

(*
and float len = function
  [< '`0`..`9` as d; s >] -> float (add_in_buff len d) s
| [< '`e` | `E`; (exp (add_in_buff len `e`)) f >] -> f
| [< >] -> float_of_string(fget_buff len)

and exp len = function
  [< '`+` | `-` as c; (exp2 (add_in_buff len c)) f >] -> f
| [< (exp2 len) f >] -> f

and exp2 len = function
  [< '`0`..`9` as d; s >] -> exp2 (add_in_buff len d) s
| [< >] -> float_of_string(fget_buff len)
*)
;;

let char = function [< '`\\`; spec_char c >] -> c | [< 'x >] -> x
;;

let next_token keyw = next_token_loop
where rec next_token_loop = function
  [< '`a`..`z` | `A`..`Z` as c; (ident keyw (add_in_buff 0 c)) i >] -> i
| [< '`0`..`9` as d; (number (add_in_buff 0 d)) n >] -> n
| [< '` ` | `\n` | `\t`; s >] -> next_token_loop s
| [< '`"`; (string 0) len >] -> Tstring (get_buff len)
| [< '`(`; s >] -> lparen s
| [< '`|` >] -> Tbar
| [< '`:`; (function [< '`=` >] -> Tcolonequal | [< >] -> Tcolon) t >] -> t
| [< '`.` >] -> Tdot
| [< '`=`; (function [< '`=` >] -> Tequalequal | [< >] -> Tequal) t >] -> t
| [< '`>` >] -> Tgreater
| [< '`[` ; (function [< '`{` >] -> Tlbracketlbrace | [< >] -> Tlbracket) t >] -> t
| [< '`<`; (function [< '`-` ; (function [< '`>` >] -> Tlessminusgreater
                                       | [< >] -> Tlessminus) x >] -> x
                   | [< >] -> Tless) t >] -> t
| [< '`-`; (function [< '`>` >] -> Tminusgreater
                 | [< >] -> Tminus) t >] -> t
| [< '`;`; (function [< '`;` >] -> Tsemisemi | [< >] -> Tsemi) t >] -> t
| [< '`~` >] -> Ttilde
| [< '`&` >] -> Tampersand
| [< '`,` >] -> Tcomma
| [< '`{` >] -> Tlbrace
| [< '`+` >] -> Tplus
| [< '`}` >] -> Trbrace
| [< '`]` >] -> Trbracket
| [< '`)` >] -> Trparen
| [< '`#` >] -> Tsharp
| [< '`/`; (function [< '`\\` >] -> Tslashbslash | [< >] -> Tslash) t >] -> t
| [< '`\\`; '`/` >] -> Tbslashslash
| [< '`*` >] -> Tstar
| [< '_ >] -> raise Parse_error

and lparen = function
  [< '`*`; skip_comm _; s >] -> next_token_loop s
| [< '`:` >] -> Tlparencolon
| [< >] -> Tlparen

and ident keyw = ident_loop
where rec ident_loop len = function
  [< '`a`..`z` | `A`..`Z` | `0`..`9` | `'` | `_` as c; s >] ->
      ident_loop (add_in_buff len c) s
| [< s >] ->
    let str = get_buff len in
    try Tkw(hash_assoc str keyw) with _ -> Tident str
;;

let rec reset_lexer = function
  [< '`\n` >] -> ()
| [< '_; reset_lexer x >] -> x
| [< >] -> ()
;;



let token_stream keyw cs =
  stream_from (fun _ -> next_token keyw cs)
;;

let cnt = nref 0;;

let stream_of_string str =
  cnt.v <- -1; str_rec 0
  where rec str_rec i =
    if i >= string_length str then [< >]
    else [< 'nth_char str i; str_rec(i+1) >]
;;

let stream_of_channel ic =
  cnt.v <- 0;
  stream_from (fun _ -> cnt.v <- cnt.v+1; input_char ic)
;;

let print_syntax_error() =
  prerr_string "Syntax error";
  if cnt.v >= 0 then (
    prerr_string ", char ";
    prerr_int cnt.v
  );
  prerr_endline ""
;;

let string_of_token rev_keywords = function
    Tampersand -> "&"
  | Tbar -> "|"
  | Tbslashslash -> "\\/"
  | Tcolon -> ":"
  | Tcolonequal -> ":="
  | Tcomma -> ","
  | Tdot -> "."
  | Tequal -> "="
  | Tequalequal -> "=="
  | Tgreater -> ">"
  | Tident s -> s
  | Tint n -> string_of_int n
  | Tkw kw  -> hash_assoc kw rev_keywords
  | Tlbrace -> "{"
  | Tlbracket -> "["
  | Tless -> "<"
  | Tlessminus -> "<-"
  | Tlessminusgreater -> "<->"
  | Tlparen -> "("
  | Tminus -> "-"
  | Tminusgreater -> "->"
  | Tplus -> "+"
  | Trbrace -> "}"
  | Trbracket -> "]"
  | Trparen -> ")"
  | Tsemi -> ";"
  | Tsemisemi -> ";;"
  | Tsharp -> "#"
  | Tslash -> "/"
  | Tslashbslash -> "/\\"
  | Tstar -> "*"
  | Tstring s -> "\"" ^ (string_for_read s) ^ "\""
  | Ttilde -> "~"
  | Tlparencolon -> "(:"
  | Tlbracketlbrace -> "[{"
;;
