(* Parser *)
(* Frank Pfenning <fp@cs.cmu.edu> *)

(*
 * Handwritten shift/reduce parser to support
 * best possible error messages
 *)

signature PARSE =
sig

    val parse : string -> Ast.env (* may raise ErrorMsg.Error *)
    val parse_preamble : string -> Ast.env (* may raise IO.Io _ *)

end  (* signature PARSE *)


structure Parse :> PARSE =
struct

structure A = Ast
structure PS = ParseState
structure M = Stream
structure T = Terminal
structure L = Lex

(******************)
(* Error Messages *)
(******************)
              
fun pp_tok t = "'" ^ T.toString t ^ "'"

fun pp_toks (nil) = ""
  | pp_toks (t::nil) = " or " ^ pp_tok t
  | pp_toks (t::ts) = pp_tok t ^ ", " ^ pp_toks ts

fun ^^(s1,s2) = s1 ^ "\n[Hint: " ^ s2 ^ "]"
infix ^^

exception ParseError of Lex.lexresult M.front

fun parse_error (region, s) tf =
    ( ErrorMsg.error (PS.ext region) s
    ; raise ParseError tf )

fun msg_expected t' t =
    ("expected " ^ pp_tok t' ^ ", found: " ^ pp_tok t)

fun error_expected (region, t', t) tf =
    ( ErrorMsg.error (PS.ext region) (msg_expected t' t)
    ; raise ParseError tf )

fun error_expected_h (region, t', t, error_hint) tf =
    ( ErrorMsg.error (PS.ext region) (msg_expected t' t ^^ error_hint)
    ; raise ParseError tf )

fun msg_expected_list ts t =
    "expected one of " ^ pp_toks ts ^ ", found: " ^ pp_tok t

fun error_expected_list (region, ts, t) tf =
    ( ErrorMsg.error (PS.ext region) (msg_expected_list ts t)
    ; raise ParseError tf )

fun error_expected_list_h (region, ts, t, error_hint) tf =
    ( ErrorMsg.error (PS.ext region) (msg_expected_list ts t ^^ error_hint)
    ; raise ParseError tf )
 
fun location (NONE) = "_"
  | location (SOME(mark)) = Mark.show(mark)

(*******************)
(* Data structures *)
(*******************)

type region = int * int
type prec = int                 (* precedence *)
datatype vari = Plus | With | None  (* variadic operator *)
datatype tg = T | U
type tptg = A.tp * tg

datatype exp_tp = EX of A.exp | TP of A.tp

(* stack items for shift/reduce parsing *)
datatype stack_item =
   Tok of T.terminal * region                          (* lexer token *)
 | Prefix of  (A.exp -> A.exp) * region                (* prefix expression operator *)
 | Infix of (exp_tp * A.exp -> A.exp) * region         (* infix expression operator *)
 | TpPrefix of (A.tp -> A.tp) * region                 (* prefix type operator *)
 | TpInfix of prec * vari * (tptg * tptg -> tptg) * region (* infix type operator, all right assoc *)
 | TaggedTp of (A.tag * A.tp) list * region            (* tagged type, ('i : tau) or () *)
 | Tp of (A.tp * tg) * region                          (* type *)
 | Exp of A.exp * region                               (* expression *)
 | Pat of A.pat * region                               (* pattern *)
 | TpPat of A.tpvarname * region                       (* type variable pattern *)
 | TExps of (A.tag * A.exp) list * region              (* list of tagged expressions *)
 | Branches of (A.pat * A.exp) list * region           (* list of branches *)
 | Dec of A.dec * region                               (* top-level declaration *)
 | Negation of region                                  (* negating a declaration *)
 | Error                                               (* lexing or parsing error *)

datatype stack
  = Bot
  | $ of stack * stack_item

infix 2 $

fun pp_item (Tok _) = "Tok"
  | pp_item (Prefix _) = "Prefix"
  | pp_item (Infix _) = "Infix"
  | pp_item (TpPrefix _) = "TpPrefix"
  | pp_item (TpInfix _) = "TpInfix"
  | pp_item (TaggedTp _) = "TaggedTp"
  | pp_item (Tp _) = "Tp"
  | pp_item (Exp _) = "Exp"
  | pp_item (Pat _) = "Pat"
  | pp_item (TpPat _) = "TpPat"
  | pp_item (TExps _) = "TExps"
  | pp_item (Branches _) = "Branches"
  | pp_item (Dec _) = "Dec"
  | pp_item (Negation _) = "Negation"
  | pp_item (Error) = "Error"

fun pp_stack Bot = "Bot"
  | pp_stack (S $ item) = pp_stack S ^ " $ " ^ pp_item item

(* This is a hand-written shift/reduce parser
 * I have tried to resist the temptation to optimize or transform,
 * since it is very easy to make mistakes.
 *
 * Parsing functions are named p_<nonterminal>, possibly with a suffix
 * for intermediate parsing states.  Reducing functions are named
 * r_<nonterminal>, possibly with a suffix for intermediate states.
 * With few exceptions, a parsing functions have type
 *
 * p_<nonterminal> : stack * L.lexresult M.Front -> stack * L.lexresult M.Front
 * r_<nonterminal> : stack -> stack
 *
 * Note that in input and output of the parsing function, the first
 * token of the lexer stream is exposed (type L.lexresult M.Front) which
 * make for easy matching and composition of these functions.
 *
 * Generally p_<nt> will consume terminals for an <nt> from the lexresult
 * stream and return with the resulting abstract syntax for <nt> on the stack.
 * Generally r_<nt> will consume a mix of terminal and nonterminals on
 * the stack and push the abstract syntax for an <nt> onto the stack.
 *
 * While parsing expression with infix, prefix, and postfix operators
 * we continue parsing, extending the expression until we encounter
 * a terminal that completes that is not part of an expression.
 *
 * p_<nt> is a function that parses nonterminal <nt>
 * r_<nt> is a function that reduces nonterminal <nt>
 * c_<cond> is a function that checks condition <cond>
 * e_<error> is a function that reports error <error>
 * m_<nt> is a function that marks nonterminal <nt> with region information
 *)

(***********************)
(* Parsing Combinators *)
(***********************)

(* Always call 'first ST' to extract the first token for examination *)
fun first (S, M.Cons((t, r), ts')) = t
fun second (S, ft) = ft

fun shift (S, M.Cons((t, r), ts')) = (S $ Tok(t, r), M.force ts')
fun reduce reduce_fun (S, ft) = (reduce_fun S, ft)

fun drop (S, M.Cons((t, r), ts')) = (S, M.force ts') (* use sparingly *)
fun push item (S, ft) = (S $ item, ft)

fun >>(f,g) = fn x => g(f(x))
fun |>(x,f) = f x

infixr 2 >>
infix 1 |>

(* region manipulation *)
fun join (left1, right1) (left2, right2) = (left1, right2)
fun here (S, M.Cons((t, r), ts')) = r
fun last (Bot) = 0
  | last (S $ Tok(t,(l,r))) = r
  | last (S $ Prefix(_,(l,r))) = r
  | last (S $ Infix(_,(l,r))) = r
  | last (S $ TpPrefix(_,(l,r))) = r
  | last (S $ TpInfix(_,_,_,(l,r))) = r
  | last (S $ TaggedTp(_,(l,r))) = r
  | last (S $ Tp(_,(l,r))) = r
  | last (S $ Exp(_,(l,r))) = r
  | last (S $ Pat(_,(l,r))) = r
  | last (S $ TpPat(_,(l,r))) = r
  | last (S $ TExps(_,(l,r))) = r
  | last (S $ Branches(_,(l,r))) = r
  | last (S $ Dec(_,(l,r))) = r
  | last (S $ Negation(l,r)) = r
  | last (S $ Error) = 0 (* should not be asked *)
val nowhere = (0,0)

(****************************)
(* Building abstract syntax *)
(****************************)

fun m_exp (exp, (left, right)) = A.Marked (Mark.mark' (exp, PS.ext (left, right)))

(* flatten sums, where tags 'l and 'r are defaults, for
 * backward compatiblity with binary sums
 *)
fun arrow ((tau1,_),(tau2,_)) = (A.Arrow(tau1,tau2),U)
fun times ((tau1,_),(tau2,_)) = (A.Times(tau1,tau2),U)
                                 
fun plus((A.Plus(sum1),T),(A.Plus(sum2),T)) = (A.Plus(sum1 @ sum2),T)
  | plus((tau1,_),(tau2,_)) = (A.Plus([("l",tau1),("r",tau2)]),U)

(* Note that '&' is right associative *)
fun with_ ((A.With(prod1),T),(A.With(prod2),T)) = (A.With(prod1 @ prod2),T)
  | with_ ((tau1,_),(tau2,_)) = (A.With([("l",tau1),("r",tau2)]),U)

fun pair_pack (EX(e1), e2) = A.Pair(e1,e2)
  | pair_pack (TP(tau1), e2) = A.Pack(tau1,e2)

(***********)
(* Parsing *)
(***********)

(*
 * Refer to the grammar in readme.txt
 * Comments refer to the nonterminals shown there in <angle brackets>
 *)

(* <dec> *)
fun p_dec ST = case first ST of
    T.TYPE => ST |> shift >> p_id >> p_terminal T.EQ >> p_tp_only >> reduce r_dec
  | T.DECL => ST |> shift >> p_id >> p_terminal T.COLON >> p_tp_only >> reduce r_dec
  | T.DEFN => ST |> shift >> p_id >> p_terminal T.EQ >> p_exp >> reduce r_dec
  | T.NORM => ST |> shift >> p_nat_opt >> p_id >> p_terminal T.EQ >> p_exp >> reduce r_dec
  | T.CONV => ST |> shift >> p_exp >> p_terminal T.EQ >> p_exp >> reduce r_dec
  | T.EVAL => ST |> shift >> p_nat_opt >> p_id >> p_terminal T.EQ >> p_exp >> reduce r_dec
  | T.FAIL => ST |> shift >> reduce r_dec
  | T.PRAGMA _ => ST |> shift >> reduce r_dec
  | T.EOF => ST
  | t => parse_error (here ST, "unexpected token " ^ pp_tok t ^ " at top level") (second ST)

and r_dec (S $ Tok(T.TYPE,r1) $ Tok(T.IDENT(a),_) $ Tok(T.EQ,_) $ Tp((tau,_),r2)) =
    S $ Dec(A.Type(a,tau,PS.ext(join r1 r2)), join r1 r2) (* generalize to allow parameters *)
  | r_dec (S $ Tok(T.DECL,r1) $ Tok(T.IDENT(x),_) $ Tok(T.COLON,_) $ Tp((tau,_),r2)) =
    S $ Dec(A.Decl(x,tau,PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.DEFN,r1) $ Tok(T.IDENT(x),_) $ Tok(T.EQ,_) $ Exp(e,r2)) =
    S $ Dec(A.Defn(x,e,PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.NORM,r1) $ Tok(T.NAT(n),_) $ Tok(T.IDENT(x),_) $ Tok(T.EQ,_) $ Exp(e,r2)) =
    S $ Dec(A.Norm(x,e,SOME(n),PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.NORM,r1) $ Tok(T.IDENT(x),_) $ Tok(T.EQ,_) $ Exp(e,r2)) =
    S $ Dec(A.Norm(x,e,NONE,PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.CONV,r1) $ Exp(e1,_) $ Tok(T.EQ,_) $ Exp(e2,r2)) =
    S $ Dec(A.Conv(e1,e2,PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.EVAL,r1) $ Tok(T.NAT(n),_) $ Tok(T.IDENT(x),_) $ Tok(T.EQ,_) $ Exp(e,r2)) =
    S $ Dec(A.Eval(x,e,SOME(n),PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.EVAL,r1) $ Tok(T.IDENT(x),_) $ Tok(T.EQ,_) $ Exp(e,r2)) =
    S $ Dec(A.Eval(x,e,NONE,PS.ext(join r1 r2)), join r1 r2)
  | r_dec (S $ Tok(T.FAIL,r)) = S $ Negation(r)
  | r_dec (S $ Tok(T.PRAGMA(p,line),r)) =
    S $ Dec(A.Pragma(p,line,PS.ext(r)),r)
  (* | r_dec S = ( TextIO.print (pp_stack S ^ "\n") ; raise Match ) *)

and p_nat_opt ST = case first ST of
    T.NAT(n) => ST |> shift
  | _ => ST

(* p_tp allows a type or a tagged type
 * p_tp_only does not allow a tagged type
 *)
and p_tp_only ST = ST |> p_tp >> reduce (r_tp_only ST)
and r_tp_only ST (S $ Tp((tau,t),r)) = S $ Tp((tau,t),r)
  | r_tp_only ST (S $ TaggedTp(i_tau,r)) =
    parse_error (r, "ambiguous tagged type") (second ST)
  | r_tp_only ST S = parse_error ((last S, #1 (here ST)), "empty type") (second ST)

and p_tp ST = case first ST of
    T.IDENT(a) => ST |> shift >> reduce r_atom_tp >> p_tp_jux
  | T.NAT(1) => ST |> shift >> reduce r_atom_tp >> p_tp_jux
  | T.NAT(0) => ST |> shift >> reduce r_atom_tp >> p_tp_jux
  | T.NAT(n) => parse_error (here ST, "invalid natural number (only 1 and 0 are types)") (second ST)
  | T.EXCL => ST |> shift >> p_id >> p_terminal T.DOT >> reduce r_atom_tp >> p_tp
  | T.QUEST => ST |> shift >> p_id >> p_terminal T.DOT >> reduce r_atom_tp >> p_tp
  | T.DOLLAR => ST |> shift >> p_id >> p_terminal T.DOT >> reduce r_atom_tp >> p_tp
  | T.BACKSLASH => ST |> shift >> p_id >> p_terminal T.DOT >> reduce r_atom_tp >> p_tp
  | T.ARROW => ST |> drop >> p_tp_prec (TpInfix(1, None, arrow, here ST))
  | T.PLUS => ST |> drop >> p_tp_prec (TpInfix(2, Plus, plus, here ST))
  | T.AMPERSAND => ST |> drop >> p_tp_prec (TpInfix(2, With, with_, here ST))
  | T.STAR => ST |> drop >> p_tp_prec (TpInfix(3, None, times, here ST))
  | T.LABEL(i) => ST |> shift >> p_terminal T.COLON >> p_tp >> reduce r_atom_tp >> p_tp_jux
  | T.LPAREN => ST |> shift >> p_tp >> p_terminal T.RPAREN >> reduce r_atom_tp >> p_tp_jux
  | _ => ST |> reduce (r_tp ST) (* type complete: reduce *)

(* call after an atom, not a prefix operator *)
and p_tp_jux (ST as (S,ft)) = case S of
    S $ Tp((tau1,_),r1) $ Tp((tau2,_),r2) => p_tp (S $ Tp((A.TApp(tau1,tau2),U),join r1 r2), ft)
  | S $ Tp _ $ TaggedTp(_,r) => parse_error (r, "type folled by tagged type") ft
  | S $ TaggedTp(_,r) $ Tp _ => parse_error (r, "tagged type followed by type") ft
  | S $ Tp _ => p_tp ST  (* shift: Tp leading or after prefix *)
  | S $ TaggedTp _ => p_tp ST (* shift leading tagged type *)

(* operators are all weak prefixes or right associative, so shift *)
(* todo: push operator on the stack, as in other _prec functions? *)
and p_tp_prec (opr as TpInfix(p',v',f',r')) (ST as (S,ft)) = case S of
    S $ TaggedTp(i_tau,r) => (case v'
                                  of Plus => p_tp_prec opr (S $ Tp((A.Plus(i_tau),T),r), ft)
                                   | With => p_tp_prec opr (S $ Tp((A.With(i_tau),T),r), ft)
                                   | None => parse_error (r, "lonely tagged type") ft)
  | S $ Tp((tau1,tg1),r1) $ TpInfix(p,v,f,_) $ Tp((tau2,tg2),r2) =>
    if p > p' then p_tp_prec opr (S $ Tp(f((tau1,tg1),(tau2,tg2)), join r1 r2), ft) (* reduce *)
    else ST |> push opr >> p_tp (* shift, for p = p' since right associative *)
  | S $ TpInfix(p,v,f,r) => parse_error (join r r', "consecutive infix operators") ft
  | S $ TpPrefix(f,r) => parse_error (join r r', "prefix followed by infix operator") ft
  | S $ Tp _ => ST |> push opr >> p_tp (* shift *)
  | _ => parse_error (r', "leading infix operator") ft

and r_atom_tp (S $ Tok(T.IDENT(a),r)) = S $ Tp((A.TpVar(a),U),r)
  | r_atom_tp (S $ Tok(T.NAT(1),r)) = S $ Tp((A.One,U),r)
  | r_atom_tp (S $ Tok(T.NAT(0),r)) = S $ Tp((A.Plus(nil),U),r)
  | r_atom_tp (S $ Tok(T.LPAREN,r1) $ Tp((tau,tg),_) $ Tok(T.RPAREN,r2)) = S $ Tp((tau,tg),join r1 r2)
  | r_atom_tp (S $ Tok(T.LPAREN,r1) $ TaggedTp(i_tau,_) $ Tok(T.RPAREN,r2)) = S $ TaggedTp(i_tau,join r1 r2)
  | r_atom_tp (S $ Tok(T.LPAREN,r1) $ Tok(T.RPAREN,r2)) = S $ TaggedTp([], join r1 r2)
  | r_atom_tp (S $ Tok(T.EXCL,r1) $ Tok(T.IDENT(a),_) $ Tok(T.DOT,r2)) =
    S $ TpPrefix(fn tau => A.Forall(a,tau), join r1 r2)
  | r_atom_tp (S $ Tok(T.QUEST,r1) $ Tok(T.IDENT(a),_) $ Tok(T.DOT,r2)) =
    S $ TpPrefix(fn tau => A.Exists(a,tau), join r1 r2)
  | r_atom_tp (S $ Tok(T.DOLLAR,r1) $ Tok(T.IDENT(a),_) $ Tok(T.DOT,r2)) =
    S $ TpPrefix(fn tau => A.Rho(a,tau), join r1 r2)
  | r_atom_tp (S $ Tok(T.BACKSLASH,r1) $ Tok(T.IDENT(a),_) $ Tok(T.DOT,r2)) =
    S $ TpPrefix(fn tau => A.TLam(a,tau), join r1 r2)
  | r_atom_tp (S $ Tok(T.LABEL(i),r1) $ Tok(T.COLON,_) $ Tp((tau,_),r2)) = (* tagged irrelevant here *)
    S $ TaggedTp([(i,tau)],join r1 r2)

and r_tp ST (S $ TpPrefix(f,r1) $ Tp((tau,_),r2)) = r_tp ST (S $ Tp((f tau,U), join r1 r2))
  | r_tp ST (S $ TpPrefix(f,r1) $ TaggedTp(i_tau,r2)) = parse_error (r2, "mere tagged type") (second ST)
  | r_tp ST (S $ Tp((tau1,tg1),r1) $ TpInfix(p,v,f,_) $ Tp((tau2,tg2),r2)) =
    r_tp ST (S $ Tp(f((tau1,tg1),(tau2,tg2)), join r1 r2))
  | r_tp ST (S $ Tp((tau1,tg1) ,r1) $ TpInfix(p,v,f,r2) $ TaggedTp(i_tau,r3)) =
    (case v
      of Plus => r_tp ST (S $ Tp((tau1,tg1),r1) $ TpInfix(p,v,f,r2) $ Tp((A.Plus(i_tau),T),r3))
       | With => r_tp ST (S $ Tp((tau1,tg1),r1) $ TpInfix(p,v,f,r2) $ Tp((A.With(i_tau),T),r3))
       | None => parse_error(r3, "ambiguous tagged type") (second ST)
    )
  | r_tp ST (S $ Tp((tau1,_),r1) $ Tp((tau2,_),r2)) = parse_error (join r1 r2, "consecutive types") (second ST)
  | r_tp ST (S $ Tp((_,_),r1) $ TaggedTp(_,r2)) = parse_error (join r1 r2, "consecutive types") (second ST)
  | r_tp ST (S $ TaggedTp(_,r1) $ Tp((_,_),r2)) = parse_error (join r1 r2, "consecutive types") (second ST)
  | r_tp ST (S $ Tp((tau,tg),r)) = S $ Tp((tau,tg),r)  (* must come after prefix and infix cases *)
  | r_tp ST (S $ TaggedTp(i_tau,r)) = S $ TaggedTp(i_tau,r)
  | r_tp ST (S $ TpPrefix(f,r)) = parse_error (join r (here ST), "incomplete type") (second ST)
  | r_tp ST (S $ TpInfix(p,v,f,r)) = parse_error (join r (here ST), "incomplete type") (second ST)
  | r_tp ST S = S (* empty type; allow for () *)
  (* | r_tp ST S = ( TextIO.print (pp_stack S) ; raise Match ) *)
  (* | r_tp ST S = parse_error ((last S, #1 (here ST)), "empty type") (second ST) *)

and p_exp ST = case first ST of
    T.IDENT(x) => ST |> shift >> reduce r_atom >> p_exp_prec
  | T.BACKSLASH => ST |> shift >> p_id >> p_tp_opt >> p_terminal T.DOT >> reduce r_atom >> p_exp_prec
  | T.SLASHBACK => ST |> shift >> p_id >> p_terminal T.DOT >> reduce r_atom >> p_exp_prec
  | T.COMMA => ST |> drop >> push (Infix(pair_pack, here ST)) >> p_exp_prec
  | T.LABEL(i) => ST |> drop >> push (Prefix(fn e => A.Inject(i,e), here ST)) >> p_exp_prec
  | T.DOT => ST |> shift >> p_label >> p_exp_prec
  | T.CASE => ST |> shift >> p_exp >> p_terminal T.OF >> p_branches >> reduce r_atom >> p_exp_prec
  | T.FOLD => ST |> drop >> push (Prefix(A.Fold, here ST)) >> p_exp_prec
  | T.UNFOLD => ST |> drop >> push (Prefix(A.Unfold, here ST)) >> p_exp_prec
  | T.DOLLAR => ST |> shift >> p_id >> p_tp_opt >> p_terminal T.DOT >> reduce r_atom >> p_exp_prec
  | T.LBRACKET => ST |> shift >> p_tp >> p_terminal T.RBRACKET >> reduce r_atom >> p_exp_prec
  | T.LPAREN => ST |> shift >> p_exp_opt >> p_terminal T.RPAREN >> reduce r_atom >> p_exp_prec
  | T.LBANANA => ST |> shift >> push (TExps(nil, here ST)) >> p_record >> p_terminal T.RBANANA >> reduce r_atom >> p_exp_prec
  | _ => ST |> reduce (r_exp ST) (* expression completed: reduce *)

and p_record ST = ST |> p_label >> p_terminal T.RIGHTARROW >> p_exp >> reduce r_texp >> p_recordnext
and p_recordnext ST = case first ST of
    T.BAR => ST |> drop >> p_record
  | _ => ST

and r_texp (S $ TExps(texps,r1) $ Tok(T.LABEL(i),_) $ Tok(T.RIGHTARROW,_) $ Exp(e,r2)) =
    S $ TExps(texps @ [(i,e)], join r1 r2)

and p_branches ST = case first ST of
    T.LPAREN => ST |> shift >> push (Branches(nil, here ST)) >> p_branchseq_opt >> p_terminal T.RPAREN
                   >> reduce r_branches
  | t => parse_error (here ST, "expected '(', found " ^ pp_tok t) (second ST)                                

and p_branchseq_opt ST = case first ST of
    T.RPAREN => ST (* empty list of branches; do not push or drop *)
  | _ => p_branchseq ST

and p_branchseq ST = ST |> p_pat >> p_terminal T.RIGHTARROW >> p_exp >> reduce r_branch >> p_branchnext
and p_branchnext ST = case first ST of
    T.BAR => ST |> drop >> p_branchseq
  | _ => ST

and r_branch (S $ Branches(branches,r1) $ Pat(p,_) $ Tok(T.RIGHTARROW,_) $ Exp(e,r2)) =
    S $ Branches(branches @ [(p,e)], join r1 r2)

and r_branches (S $ Tok(T.LPAREN,r1) $ Branches(branches,_) $ Tok(T.RPAREN,r2)) = S $ Branches(branches, join r1 r2)

and p_pat ST = case first ST of
    T.LPAREN => ST |> shift >> p_pat_opt >> p_terminal T.RPAREN >> reduce r_pat_atom >> p_pat_prec
  | T.LBRACKET => ST |> shift >> p_id >> p_terminal T.RBRACKET >> reduce r_pat_atom >> p_pat_prec
  | T.IDENT(x) => ST |> shift >> reduce r_pat_atom >> p_pat_prec
  | T.LABEL(i) => ST |> shift >> p_pat_prec
  | T.COMMA => ST |> shift >> p_pat_prec
  | T.FOLD => ST |> shift >> p_pat_prec
  | _ => ST |> reduce (r_pat ST)

and p_pat_prec (ST as (S,ft)) = case S of
    (* adding atom *)
    S $ Pat(_,r1) $ Pat(_,r2) => parse_error (join r1 r2, "consecutive patterns") ft
  | S $ Pat _ => p_pat ST (* always shift: prefix incomplete, infix is right associative *)

  | S $ Pat(_,r1) $ TpPat(_,r2) => parse_error (r2, "misplaced type variable") ft
  | S $ Tok(T.LABEL _, r1) $ TpPat(_,r2) => parse_error (r2, "misplaced type variable") ft
  | S $ Tok(T.FOLD, r1) $ TpPat(_,r2) => parse_error (r2, "misplaced type variable") ft

  | S $ TpPat _ => p_pat ST (* shift if leading or following infix op *)

  (* adding prefix *)
  | S $ Tok(T.LABEL(i),_) => p_pat ST (* shift prefix operator, which is incomplete *)
  | S $ Tok(T.FOLD,_) => p_pat ST (* shift prefix operator, which is incomplete *)

  (* adding infix *)
  | S $ Tok(T.LABEL(i),r1) $ Pat(p,r2) $ Tok(T.COMMA,r3) => (* reduce: all prefix have higher priority than infix *)
    p_pat_prec (S $ Pat(A.InjectPat(i,p),join r1 r2) $ Tok(T.COMMA,r3), ft)
  | S $ Tok(T.FOLD,r1) $ Pat(p,r2) $ Tok(T.COMMA,r3) => (* reduce *)
    p_pat_prec (S $ Pat(A.FoldPat(p),join r1 r2) $ Tok(T.COMMA,r3), ft)

  | S $ Pat _ $ Tok(T.COMMA, _) => p_pat ST (* shift: infix is right associative *)
  | S $ TpPat _ $ Tok(T.COMMA, _) => p_pat ST (* shift: infix is right associative *)

  (* error conditions *)
  | S $ Tok(T.COMMA,r1) $ Tok(T.COMMA,r2) => parse_error (join r1 r2, "consecutive infix operators") ft
  | S $ Tok(T.COMMA,r) => parse_error (r, "leading infix operator") ft

and p_pat_opt ST = case first ST of
    T.RPAREN => ST
  | _ => ST |> p_pat

and r_pat_atom (S $ Tok(T.IDENT(x),r)) = S $ Pat(A.VarPat(x),r) (* mark patterns? *)
  | r_pat_atom (S $ Tok(T.LPAREN,r1) $ Tok(T.RPAREN,r2)) = S $ Pat(A.UnitPat,join r1 r2)
  | r_pat_atom (S $ Tok(T.LPAREN,r1) $ Pat(p,_) $ Tok(T.RPAREN,r2)) = S $ Pat(p, join r1 r2)
  | r_pat_atom (S $ Tok(T.LBRACKET,r1) $ Tok(T.IDENT(a),_) $ Tok(T.RBRACKET,r2)) = S $ TpPat(a,join r1 r2)
  (* | r_pat_atom S = ( TextIO.print (pp_stack S) ; raise Match ) *)

and r_pat ST (S $ Tok(T.LABEL(i),r1) $ Pat(p,r2)) = r_pat ST (S $ Pat(A.InjectPat(i,p),join r1 r2))
  | r_pat ST (S $ Tok(T.FOLD,r1) $ Pat(p,r2)) = r_pat ST (S $ Pat(A.FoldPat(p),join r1 r2))
  | r_pat ST (S $ Pat(p1,r1) $ Tok(T.COMMA,_) $ Pat(p2,r2)) = r_pat ST (S $ Pat(A.PairPat(p1,p2),join r1 r2))
  | r_pat ST (S $ TpPat(a,r1) $ Tok(T.COMMA,_) $ Pat(p2,r2)) = r_pat ST (S $ Pat(A.PackPat(a,p2),join r1 r2))
  | r_pat ST (S $ Pat(p,r)) = (S $ Pat(p,r))

  | r_pat ST (S $ TpPat(_,r)) = parse_error (r, "misplaced type variable") (second ST)
  | r_pat ST (S $ Tok(T.LABEL(i),r)) = parse_error (join r (here ST), "incomplete prefix pattern") (second ST)
  | r_pat ST (S $ Tok(T.FOLD,r)) = parse_error (join r (here ST), "incomplete prefix pattern") (second ST)
  | r_pat ST (S $ Pat(_,r) $ Tok(T.COMMA,_)) = parse_error (join r (here ST), "incomplete infix pattern") (second ST)
  | r_pat ST (S $ TpPat(_,r) $ Tok(T.COMMA,_)) = parse_error (join r (here ST), "incomplete infix pattern") (second ST)
  | r_pat ST S = parse_error ((last S, #1 (here ST)), "empty pattern") (second ST)

and p_tp_opt ST = case first ST of
    T.COLON => ST |> shift >> p_tp
  | _ => ST                            

and p_exp_opt ST = case first ST of
    T.RPAREN => ST  (* "()" for the unit element *)
  | _ => ST |> p_exp

(* decide whether to shift or reduce *)
and p_exp_prec (ST as (S,ft)) = case S of
   (* adding expression *)
    S $ Exp(e1,r1) $ Exp(e2,r2) => (* reduce, juxtaposition is left associative *)
    p_exp (S $ Exp(m_exp(A.App(e1,e2),join r1 r2), join r1 r2), ft)
  | S $ Exp(e1,r1) $ Tp((tau2,_),r2) =>
    p_exp (S $ Exp(m_exp(A.TpApp(e1,tau2),join r1 r2), join r1 r2), ft)
  | S $ Exp(e,r1) $ Tok(T.DOT,_) $ Tok(T.LABEL(i),r2) =>
    p_exp (S $ Exp(m_exp(A.Project(e,i),join r1 r2), join r1 r2), ft)
  | S $ Exp _ => p_exp ST (* shift: Exp or Tp leading, or after prefix or infix *)
  | S $ Tp _ => p_exp ST (* shift: Exp or Tp leading, or after prefix or infix *)

  | S $ Prefix _ => p_exp ST (* shift: all prefix operators have higher precedence than infix *)

  (* adding infix *)
  | S $ Prefix(f,r1) $ Exp(e,r2) $ Infix(f',r') => (* reduce: all prefix have higher priority than infix *)
    p_exp_prec (S $ Exp(m_exp(f e, join r1 r2), join r1 r2) $ Infix(f',r'), ft)
  | S $ Prefix(f,r1) $ Tp((tau,_),r2) $ Infix(f',r') => (* error: prefix ops cannot be applied to types *)
    parse_error (r2, "misplaced type") ft
  | S $ Tp _ $ Infix _ => p_exp ST (* shift, only infix operator is right associative *)
  | S $ Exp _ $ Infix _ => p_exp ST (* shift, only infix operator is right associative *)
  | S $ Infix(_,r1) $ Infix(_,r2) => parse_error (join r1 r2, "consecutive infix operators") ft
  | S $ Infix(_,r) => parse_error (r, "leading infix operator") ft
  | S $ Tok(T.DOT,r1) $ Tok(T.LABEL(i),r2) => parse_error (join r1 r2, "leading postfix operator") ft
  | S => ( TextIO.print (pp_stack S) ; raise Match )

and r_atom (S $ Tok(T.IDENT(x),r)) = S $ Exp(m_exp(A.Var(x),r), r)
  | r_atom (S $ Tok(T.LPAREN,r1) $ Tok(T.RPAREN,r2)) = S $ Exp(m_exp(A.Unit, join r1 r2), join r1 r2)
  | r_atom (S $ Tok(T.LPAREN,r1) $ Exp(e,_) $ Tok(T.RPAREN,r2)) = S $ Exp(m_exp(e, join r1 r2), join r1 r2)
  | r_atom (S $ Tok(T.LBANANA,r1) $ TExps(texps,_) $ Tok(T.RBANANA,r2)) = S $ Exp(m_exp(A.Record(texps), join r1 r2), join r1 r2)
  | r_atom (S $ Tok(T.BACKSLASH,r1) $ Tok(T.IDENT(x),_) $ Tok(T.DOT,r2)) =
    S $ Prefix(fn e => A.Lam(x,NONE,e), join r1 r2)
  | r_atom (S $ Tok(T.BACKSLASH,r1) $ Tok(T.IDENT(x),_) $ Tok(T.COLON,_) $ Tp((tau,_),_) $ Tok(T.DOT,r2)) =
    S $ Prefix(fn e => A.Lam(x,SOME(tau),e), join r1 r2)
  | r_atom (S $ Tok(T.SLASHBACK,r1) $ Tok(T.IDENT(a),_) $ Tok(T.DOT,r2)) =
    S $ Prefix(fn e => A.TpLam(a,e), join r1 r2)
  | r_atom (S $ Tok(T.LBRACKET,r1) $ Tp((tau,tg),_) $ Tok(T.RBRACKET,r2)) = S $ Tp((tau,tg), join r1 r2)
  | r_atom (S $ Tok(T.DOLLAR,r1) $ Tok(T.IDENT(x),_) $ Tok(T.DOT,r2)) =
    S $ Prefix(fn e => A.Fix(x,NONE,e), join r1 r2)
  | r_atom (S $ Tok(T.DOLLAR,r1) $ Tok(T.IDENT(x),_) $ Tok(T.COLON,_) $ Tp((tau,_),_) $ Tok(T.DOT,r2)) =
    S $ Prefix(fn e => A.Fix(x,SOME(tau),e), join r1 r2)
  | r_atom (S $ Tok(T.CASE,r1) $ Exp(e,_) $ Tok(T.OF,_) $ Branches(branches,r2)) = S $ Exp(m_exp(A.Case(e,branches), join r1 r2), join r1 r2)
  (* | r_atom S = ( TextIO.print (pp_stack S) ; raise Match ) *)
  (* left/right/fold/unfold are prefix operators *)

and r_exp ST (S $ Exp(e1,r1) $ Exp(e2,r2)) = r_exp ST (S $ Exp(m_exp(A.App(e1,e2),join r1 r2), join r1 r2))
  | r_exp ST (S $ Exp(e1,r1) $ Tp((tau2,_),r2)) = r_exp ST (S $ Exp(m_exp(A.TpApp(e1,tau2),join r1 r2), join r1 r2))
  | r_exp ST (S $ Exp(e,r1) $ Tok(T.DOT,_) $ Tok(T.LABEL(i),r2)) = r_exp ST (S $ Exp(m_exp(A.Project(e,i),join r1 r2), join r1 r2))
  | r_exp ST (S $ Prefix(f,r1) $ Exp(e,r2)) = r_exp ST (S $ Exp(m_exp(f e, join r1 r2), join r1 r2))
  | r_exp ST (S $ Exp(e1,r1) $ Infix(f,_) $ Exp(e2,r2)) = r_exp ST (S $ Exp(m_exp(f(EX(e1),e2),join r1 r2), join r1 r2))
  | r_exp ST (S $ Tp((tau1,_),r1) $ Infix(f,_) $ Exp(e2,r2)) = r_exp ST (S $ Exp(m_exp(f(TP(tau1),e2),join r1 r2), join r1 r2))
  | r_exp ST (S $ Exp(e,r)) = S $ Exp(e,r)

  | r_exp ST (S $ Tp((tau,_),r)) = parse_error (r, "misplaced type") (second ST) (* cannot follow prefix or infix op *)
  | r_exp ST (S $ Prefix(f,r)) = parse_error (join r (here ST), "incomplete prefix expression") (second ST)
  | r_exp ST (S $ Exp(_,r) $ Infix(f,_)) = parse_error (join r (here ST), "incomplete infix expression") (second ST)
  | r_exp ST (S $ Tp((_,_),r) $ Infix(f,_)) = parse_error (join r (here ST), "incomplete infix expression") (second ST)
  | r_exp ST S = parse_error ((last S, #1 (here ST)), "empty expression") (second ST)

(* <id> *)
and p_id ST = case first ST of
    T.IDENT(id) => ST |> drop >> push (Tok(T.IDENT(id), here ST))
  | t => parse_error (here ST, "expected identifier, found " ^ pp_tok t) (second ST)

and p_label ST = case first ST of
    T.LABEL(i) => ST |> shift
  | t => parse_error (here ST, "expected tag, found " ^ pp_tok t) (second ST)

(* parse any token (terminal symbol) 't_needed' *)
and p_terminal t_needed ST = case first ST of
    t => if t_needed = t
	 then ST |> shift
	 else error_expected (here ST, t_needed, t) (second ST)

fun recover (tf as M.Nil) = (Bot $ Error, tf)
  | recover (tf as M.Cons((t,r), tf')) =
    ( case t
       of T.TYPE => (Bot $ Error, tf)
        | T.DECL => (Bot $ Error, tf)
        | T.DEFN => (Bot $ Error, tf) (* token stream including t *)
        | T.NORM => (Bot $ Error, tf)
        | T.CONV => (Bot $ Error, tf)
        | T.EVAL => (Bot $ Error, tf)
        | T.FAIL => (Bot $ Error, tf)
        | T.EOF => (Bot $ Error, tf)
        | _ => (* skip all other tokens *)
          recover (M.force tf') )

fun robust_p_dec ST =
    p_dec ST
    handle Lex.LexError ts => recover (M.force ts)
         | ParseError tf => recover tf

(* (<pragma> | <dec>)* *)
fun parse_decs negated token_front =
    let 
        val ST = if negated then ErrorMsg.suppress (fn () => robust_p_dec (Bot, token_front))
                 else robust_p_dec (Bot, token_front)
        (* val () = if !ErrorMsg.anyErrors then raise ErrorMsg.Error else () *)
    in
        case ST
         of (Bot $ Error, token_front) => (* error in preceding declaration *)
            if negated then ( if !Flags.verbosity >= 1
                              then TextIO.print "(* ignored syntax error in negated declaration *)\n"
                              else ()
                            ; parse_decs false token_front )
            else raise ErrorMsg.Error
          | (Bot $ Negation(r), token_front) =>
            if negated then ( ErrorMsg.error (PS.ext r) "double negation"
                            ; raise ErrorMsg.Error  )
            else parse_decs true token_front
          | (Bot, M.Cons((T.EOF, r), token_front)) => (* whole file processed *)
            if negated then ( ErrorMsg.error (PS.ext r) "negation not followed by declaration"
                            ; raise ErrorMsg.Error )
            else []
          | (Bot $ Dec(dec,r), token_front) =>
            (if negated then A.Not(dec,PS.ext r) else dec) :: parse_decs false token_front
          (* should be the only possibilities *)
    end

(* parse filename = decs
 * first apply lexer, the parser to the resulting token stream
 * raise ErrorMsg.Error if not lexically or syntactically correct
 * or if file does not exist or cannot be opened
 *)
fun parse filename =
    SafeIO.withOpenIn filename (fn instream =>
      let val () = PS.pushfile filename (* start at pos 0 in filename *)
          val token_stream = Lex.makeLexer (fn _ => TextIO.input instream)
          val decs = parse_decs false (M.force token_stream)
          val () = PS.popfile ()
      in decs end)
    handle e as IO.Io _ => ( ErrorMsg.error NONE (exnMessage e)
                           ; raise ErrorMsg.Error )

(* <pragma>*, ignoring remainder of token stream *)
fun parse_preamble_decs token_front =
    let
        val ST = p_dec (Bot, token_front) (* may raise ErrorMsg.Error *)
    in
        case ST
         of (Bot $ Dec(dec as A.Pragma _, _), token_front) =>
            (* part of preamble *)
            dec :: parse_preamble_decs token_front
          | (Bot $ _, token_front) =>
            (* either end of file or declaration which is not a pragma *)
            nil
    end handle ErrorMsg.Error => nil (* turn error into nil *)

(* parse preamble = pragmas *)
fun parse_preamble filename =
    SafeIO.withOpenIn filename (fn instream =>
      let val () = PS.pushfile filename
          val token_stream = Lex.makeLexer (fn _ => TextIO.input instream)
          val pragmas = parse_preamble_decs (M.force token_stream)
          val () = PS.popfile ()
      in
          pragmas
      end)
    (* may raise IO.Io _, must be handled by caller *)

end (* structure Parse *)
