structure Tokens :> TOKENS =
struct

  open Parsing

  infixr 4 << >>
  infixr 3 &&
  infix  2 -- ##
  infix  2 wth suchthat return guard
  infixr 1 ||



  (* Report an error with its position. *)

  fun error s pos = print (Pos.toString pos ^ ": " ^ s ^ "\n")

  (* Succeeds if p succeeds at the current point, but doesn't
     consume anything. *)

  fun ahead p = lookahead p (fn _ => succeed ())

  (* Ignores the result of p *)

  fun ignore p = p return ()



  (**** Tokenizer ****)

  datatype token =
      Word of string
    | Number of int

  (* Predicates for different kinds of characters. *)

  fun isLetter c = Char.isAlpha c orelse Char.contains "_'" c
  fun isSymbolic c = Char.contains "!%&$#+-/:<=>?@\\~`^|*" c
  fun isIdent c = isLetter c orelse Char.isDigit c

  (* A sequence of alphanumeric characters starting with an
     alphabetic character. *)

  val letters = satisfy isLetter && repeat (satisfy isIdent) wth op::

  (* A "word" of letters or symbolic characters. *)

  val word = letters wth implode
          || repeat1 (satisfy isSymbolic) wth implode
          || any wth Char.toString

  (* A sequence of digits. *)

  val integer = repeat1 (satisfy Char.isDigit)
                    wth (Option.valOf o Int.fromString o implode)

  (* A positive or negative integer constant. *)

  val number = integer || (literal #"~" >> integer) wth op~

  (* A comment, potentially with nested comments inside. *)

  fun comment () = string [#"(",#"*"]
                && (repeat ($insideComment) && string [#"*",#")"]
                    guard error "Unterminated comment.")

  (* Either a nested comment or a single character (which is not
     start of a nested comment or the comment terminator). *)

  and insideComment () =
         ignore ($comment)
      || any -- (fn #"*" => ahead (satisfy (fn x => x <> #")"))
                  | #"(" => ahead (satisfy (fn x => x <> #"*"))
                  | _    => succeed ())

  (* White space. *)

  val space = repeat (ignore ($comment) || ignore (satisfy Char.isSpace))

  (* A token (skipping over white space).  The token is marked with
     its position. *)

  val token = space >> (!! (number wth Number || word wth Word))

  fun litWord s = literal (Word s) return s
  fun litNumber i = literal (Number i) return i

  val anyWord = any -- (fn Word s => succeed s | _ => fail)
  val anyNumber = any -- (fn Number i => succeed i | _ => fail)



  (**** Statements ****)

  (* A non-semicolon token. *)

  val junk = satisfy (fn s => s <> Word ";")

  (* Tries a "statement" parser p.  If the parser fails, reports
     an error, skips to the next semicolon, and tries again. *)

  fun recover p =
      let fun try () = p << litWord ";" guard error "Syntax error."
                    || repeat junk >> litWord ";" >> $try
       in $try end

end
