(* Lecture 10: Exceptions *) (* Regular expression matching *) signature regExp = sig datatype regExp = Char of char | Times of regExp * regExp | Epsilon | Plus of regExp * regExp | Empty | Star of regExp val accept : regExp -> string -> bool end; structure regExp :> regExp = struct datatype regExp = Char of char | Times of regExp * regExp | Epsilon | Plus of regExp * regExp | Empty | Star of regExp (* val acc : regExp -> char list -> (char list -> bool) -> bool *) fun acc (Char(c)) (c1::s) k = if (c = c1) then k s else false | acc (Char(c)) (nil) k = false | acc (Times(r1,r2)) s k = acc r1 s (fn s' => acc r2 s' k) | acc (Epsilon) s k = k s | acc (Plus(r1,r2)) s k = acc r1 s k orelse acc r2 s k | acc (Empty) s k = false | acc (Star(r)) s k = k s orelse acc r s (fn s' => if s = s' then false else acc (Star(r)) s' k) (* val accept : regExp -> string -> bool *) fun accept r s = acc r (String.explode s) (fn nil => true | (_::_) => false); end; (* General rewriting *) signature REWRITE = sig exception Fail type 'a rewriter = 'a -> 'a (* may raise Fail *) (* infixr THEN ORELSE *) val THEN : 'a rewriter * 'a rewriter -> 'a rewriter val ID : 'a rewriter val ORELSE : 'a rewriter * 'a rewriter -> 'a rewriter val FAIL : 'a rewriter val TRY : 'a rewriter -> 'a rewriter val REPEAT : 'a rewriter -> 'a rewriter end; structure Rewriter :> REWRITE = struct exception Fail type 'a rewriter = 'a -> 'a (* may raise Fail *) infixr THEN ORELSE fun (f THEN g) x = g(f(x)) fun ID x = x fun (f ORELSE g) x = f x handle Fail => g x fun FAIL x = raise Fail fun TRY f x = (f ORELSE ID) x fun REPEAT f x = TRY (f THEN (REPEAT f)) x end; (* Simplifying regular expressions *) signature SIMP_REGEXP = sig val simplify : RegExp.regExp -> RegExp.regExp end; structure SimpRegExp :> SIMP_REGEXP = struct local open Rewriter infixr THEN ORELSE open RegExp in fun times' (r1, Epsilon) = r1 | times' (r1, r2) = Times(r1,r2); (* Using equations (RS)T => R(ST), eR => R, Re => R re-associate all top-level Times operators. assocRTimes : regExp rewriter, unfailing *) fun assocRTimes (Times(Times(r1,r2),r3)) = assocRTimes (Times(r1,Times(r2,r3))) | assocRTimes (Times(Epsilon,r)) = assocRTimes (r) | assocRTimes (Times(r1, r2)) = times' (r1, assocRTimes r2) | assocRTimes (r) = r fun plus' (r1, Empty) = r1 | plus' (r1, r2) = Plus(r1,r2); (* Using equations (R+S)+T => R+(S+T), e+R => R, R+e => R re-associate all top-level Plus operators. assocRTimes : regExp rewriter, unfailing *) fun assocRPlus (Plus(Plus(r1,r2),r3)) = assocRPlus (Plus(r1,Plus(r2,r3))) | assocRPlus (Plus(Empty,r)) = assocRPlus (r) | assocRPlus (Plus(r1, r2)) = plus' (r1, assocRPlus r2) | assocRPlus (r) = r (* Applying f to every subexpression "in parallel" *) (* f should be unfailing *) fun par f (r as Char _) = f r | par f (Times(r1,r2)) = f (Times(par f r1, par f r2)) | par f (r as Epsilon) = f r | par f (Plus(r1,r2)) = f (Plus(par f r1, par f r2)) | par f (r as Empty) = f r | par f (Star(r)) = f (Star(par f r)) fun assocR r = par assocRTimes (par assocRPlus r) (* Rewrite leftmost-outermost matching subexpression with f *) (* f should fail if it is not applicable *) fun lmom f r = (f r handle Fail => lmom' f r) and lmom' f (Times(r1,r2)) = (Times(lmom f r1, r2) handle Fail => Times(r1, lmom f r2)) | lmom' f (Plus(r1,r2)) = (Plus(lmom f r1, r2) handle Fail => Plus(r1, lmom f r2)) | lmom' f (Star(r1)) = Star (lmom f r1) | lmom' f r = raise Fail (* Factoring, using equations RS+RT => R(S+T), RS+R => R(S+e), RO => O *) (* Assumes Times has been right-associated *) (* val factor : regExp rewriter *) fun factor (Plus(Times(r1,s),Times(r2,t))) = if r1 = r2 then Times(r1,Plus(s,t)) else raise Fail | factor (Plus(Times(r1,s),r2)) = if r1 = r2 then Times(r1,Plus(s,Epsilon)) else raise Fail | factor (Times(r,Empty)) = Empty | factor _ = raise Fail (* val factorAll : regExp rewriter, unfailing *) fun factorAll r = (REPEAT (lmom factor)) r val simplify = assocR THEN factorAll end (* local open *) end; (* structure SimpRegExp *) local open RegExp in val C0 = Char(#"0") val C1 = Char(#"1") val r3 = Times(Times(Times(Star(Plus(C0,C1)),C0),C0), Star(Plus(C0,C1))) (* (((0+1)*0)0)(0+1)* *) val r4 = Plus(Times(Star(Plus(C1,Times(C0,C1))),C0), Star(Plus(C1,Times(C0,C1)))) (* (1+01)*0 + (1+01)* *) val r5 = Plus(Times(Star(Plus(C1,Times(C0,C1))),C0), Times(Star(Plus(C1,Times(C0,C1))),C1)) (* (1+01)*0 + (1+01)*1 *) end; val r3' = SimpRegExp.simplify r3; val r4' = SimpRegExp.simplify r4; val r5' = SimpRegExp.simplify r5;