(* Lecture 19: Lexing *) (* Author: Frank Pfenning *) (* Some parsing code for Lecture 20 is included at bottom *) use "/afs/andrew/scs/cs/15-212-X/code/stream.sml"; signature MSTREAM_IO = sig val readTerminal : string -> char MStream.stream val readFile : string -> char MStream.stream end; structure MStreamIO :> MSTREAM_IO = struct fun fromLine f nil = MStream.delay f | fromLine f (c::cs) = MStream.cons (c, fromLine f cs) fun readTerminal (prompt) = let fun getLine () = ( TextIO.output (TextIO.stdOut, prompt) ; TextIO.flushOut (TextIO.stdOut) ; if TextIO.endOfStream TextIO.stdIn then MStream.Empty else MStream.expose (fromLine getLine (String.explode (TextIO.inputLine TextIO.stdIn))) ) in MStream.delay getLine end fun readFile (fileName) = let val instream = TextIO.openIn fileName fun getLine () = if TextIO.endOfStream instream then ( TextIO.closeIn instream ; MStream.Empty ) else MStream.expose (fromLine getLine (String.explode (TextIO.inputLine instream))) in MStream.delay getLine end end; (* structure MStreamIO *) signature LEXER = sig datatype token = INTEGER of int | PLUS | MINUS | STAR | SLASH | LPAREN | RPAREN | SEMICOLON exception Error of string val lex : char MStream.stream -> token MStream.stream end; structure Lexer :> LEXER = struct datatype token = INTEGER of int | PLUS | MINUS | STAR | SLASH | LPAREN | RPAREN | SEMICOLON exception Error of string val ord0 = Char.ord(#"0") fun charVal (c) = Char.ord(c)-ord0 fun lex (s) = MStream.delay (fn () => lex' (MStream.expose s)) and lex' (f as MStream.Cons (c, s)) = if Char.isSpace(c) (* Space, Newline, Tab, Formfeed *) then lex' (MStream.expose s) else if Char.isDigit(c) then lexInt' (f,0) else lexSym' f | lex' (MStream.Empty) = MStream.Empty (* lexSym' (f:char front) where f not Empty *) and lexSym' (MStream.Cons(#"+",s)) = MStream.Cons(PLUS, lex s) | lexSym' (MStream.Cons(#"-",s)) = MStream.Cons(MINUS, lex s) | lexSym' (MStream.Cons(#"*",s)) = MStream.Cons(STAR, lex s) | lexSym' (MStream.Cons(#"/",s)) = MStream.Cons(SLASH, lex s) | lexSym' (MStream.Cons(#"(",s)) = MStream.Cons(LPAREN, lex s) | lexSym' (MStream.Cons(#")",s)) = MStream.Cons(RPAREN, lex s) | lexSym' (MStream.Cons(#";",s)) = MStream.Cons(SEMICOLON, lex s) | lexSym' (MStream.Cons(c,s)) = raise Error ("Illegal character: " ^ Char.toString(c)) and lexInt' (f as MStream.Cons(c,s),n) = if Char.isDigit(c) then lexInt' (MStream.expose s, 10*n + charVal(c)) else MStream.Cons(INTEGER(n), MStream.delay (fn () => lex' f)) | lexInt' (MStream.Empty,n) = MStream.Cons(INTEGER(n), MStream.empty) end; (* structure Lexer *) signature PARSER = sig structure Lexer : LEXER datatype exp = Integer of int | Plus of exp * exp | Minus of exp * exp | Times of exp * exp | Divide of exp * exp | Neg of exp exception Error of string val parse : Lexer.token MStream.stream -> exp MStream.stream val toString : exp -> string end; (* signature PARSER *) functor Parser (structure Lexer' : LEXER) :> PARSER where type Lexer.token = Lexer'.token = struct structure Lexer = Lexer' datatype exp = Integer of int | Plus of exp * exp | Minus of exp * exp | Times of exp * exp | Divide of exp * exp | Neg of exp exception Error of string (* Grammar: ::= SEMICOLON ::= PLUS | MINUS | ::= STAR | SLASH | ::= MINUS | atom ::= INTEGER(n) | LPAREN RPAREN *) (* precedence of operators *) type prec = int datatype stackItem = Exp of exp | BinOp of prec * (exp * exp -> exp) | UnOp of prec * (exp -> exp) (* Stack invariant, refinements of stackItem list *) (*

::= nil | | ::= Exp _ :: nil | Exp _ :: ::= BinOp _ :: | UnOp _ ::

::= Exp _ :: *) structure S = MStream structure L = Lexer (* Enforcing stack invariant *) (* val shift : stackItem *

->

*) fun shift (Exp _, Exp _::p') = raise Error ("Missing operator") | shift (BinOp _, BinOp _::p') = raise Error ("Consective infix operators") | shift (BinOp _, UnOp _::p') = raise Error ("Infix operator following prefix operator") | shift (BinOp _, nil) = raise Error ("Leading infix operator") | shift (opr,p) = opr::p (* val reduce : ->

*) fun reduce (Exp(e2)::BinOp(_,con)::Exp(e1)::p') = Exp(con(e1,e2))::p' | reduce (Exp(e)::UnOp(_,con)::p') = Exp(con(e))::p' (* no other cases should be possible by stack invariant *) (* val reduceAll :

-> exp *) fun reduceAll (Exp(e)::nil) = e | reduceAll (BinOp _::p') = raise Error ("Incomplete infix expression") | reduceAll (UnOp _::p') = raise Error ("Incomplete prefix expression") | reduceAll (nil) = raise Error ("Empty expression") | reduceAll (p) = reduceAll (reduce p) (* val parse' : L.token front *

-> exp * L.token front *) fun parse' (S.Cons(L.INTEGER(n), s), p) = parse' (S.expose s, shift(Exp(Integer(n)),p)) | parse' (S.Cons(L.PLUS, s), p) = decideOpr (BinOp (1, Plus), s, p) | parse' (S.Cons(L.MINUS, s), p as (Exp(_)::p')) = (* binary minus *) decideOpr (BinOp (1, Minus), s, p) | parse' (S.Cons(L.MINUS, s), p) = (* unary minus, shift *) (* p = nil or p : *) parse' (S.expose s, shift(UnOp(3,Neg), p)) | parse' (S.Cons(L.STAR, s), p) = decideOpr (BinOp (2, Times), s, p) | parse' (S.Cons(L.SLASH, s), p) = decideOpr (BinOp (2, Divide), s, p) | parse' (S.Cons(L.LPAREN, s), p) = parseRParen (parse' (S.expose s, nil), p) | parse' (f as S.Cons(L.RPAREN, s), p) = (reduceAll p, f) | parse' (f as S.Cons(L.SEMICOLON, s), p) = (reduceAll p, f) | parse' (S.Empty, p) = raise Error ("Unexpected end of stream") and parseRParen ((e, S.Cons(L.RPAREN, s)), p) = parse' (S.expose s, shift(Exp(e),p)) | parseRParen ((e, _), _) = raise Error ("Unmatched left parenthesis") and decideOpr (opr as BinOp (prec, _), s, p as (Exp(_)::BinOp(prec',_)::Exp(_)::p')) = if prec > prec' then parse' (S.expose s, shift(opr,p)) (* shift *) else decideOpr (opr, s, reduce(p)) (* reduce *) | decideOpr (opr as BinOp (prec, _), s, p as (Exp(_)::UnOp(prec', _)::p')) = if prec > prec' then parse' (S.expose s, shift(opr,p)) (* shift *) else decideOpr (opr, s, reduce(p)) (* reduce *) | decideOpr (opr, s, p) = parse' (S.expose s, shift(opr,p)) (* shift *) (* val parse : Lexer.token MStream.stream -> exp MStream.stream *) fun parse (s) = S.delay (fn () => parseSemi(parse'(S.expose s, nil))) and parseSemi (e, S.Cons(L.SEMICOLON, s)) = S.Cons(e, parse s) | parseSemi (e, S.Cons(L.RPAREN, s)) = raise Error ("Unmatched right parenthesis") | parseSemi (e, S.Empty) = raise Error ("Expression terminated by end of file") (* no other cases should be possible *) (* Convert to string with all parentheses *) fun toString (Integer(n)) = Int.toString (n) | toString (Plus(e1,e2)) = "(" ^ toString e1 ^ "+" ^ toString e2 ^ ")" | toString (Minus(e1,e2)) = "(" ^ toString e1 ^ "-" ^ toString e2 ^ ")" | toString (Times(e1,e2)) = "(" ^ toString e1 ^ "*" ^ toString e2 ^ ")" | toString (Divide(e1,e2)) = "(" ^ toString e1 ^ "/" ^ toString e2 ^ ")" | toString (Neg(e)) = "(" ^ "-" ^ toString e ^ ")" end; (* functor Parser *) structure Parser :> PARSER = Parser (structure Lexer' = Lexer); signature TOP = sig val top : unit -> unit val parseFile : string -> unit end; functor Top (structure MStreamIO : MSTREAM_IO structure Parser : PARSER) :> TOP = struct structure Lexer = Parser.Lexer fun parseAndPrint (charStream) = let val tokenStream = Lexer.lex charStream val expStream = Parser.parse tokenStream fun loop (s) = loop' (MStream.expose s) and loop' (MStream.Cons(e,s)) = ( print (Parser.toString (e) ^ "\n") ; loop (s) ) | loop' (MStream.Empty) = () in loop (expStream) handle Lexer.Error (msg) => print ("Lexing Error: " ^ msg ^ "\n") | Parser.Error (msg) => print ("Parsing Error: " ^ msg ^ "\n") | exn => ( print ("Unrecognized Error\n") ; raise exn ) end fun top () = parseAndPrint (MStreamIO.readTerminal "# ") fun parseFile (fileName) = (* rely on streams which are garbage collected to be closed? *) parseAndPrint (MStreamIO.readFile (fileName)) end; (* functor Top *) structure Top :> TOP = Top (structure MStreamIO = MStreamIO structure Parser = Parser);