(***************************************************************** ** ExtendedFloat.sml ** sml ** ** Aleksandar Nanevski ** ** Implementation of extended floats. Defines a datatype for ** expansions of size 2, for the efficiency purposes. ** The implementation mainly follows that of Shewchuk, except that ** the arrays are replaced by lists. ** ** WARNING: The implementation assumes correct rounding to nearest ** with rounding-to-even tie-breaking. If the processor is in different ** mode, this code will not work correctly. ** ** ** TODO: ** - distill should be written using sequences ****************************************************************) structure ExtendedFloat :> EXTENDED_FLOAT = struct open Real infix == (* quads are pairs of SML reals (which are *) (* double precision floats, hence the name) *) (* elements are in order of increasing magnitude *) (* elements are nonadjacent *) (* and can contain zeros *) datatype quad = Quad of real*real (* Extended Floats are lists of SML reals *) (* components are in order of increasing magnitudes *) (* components are strongly nonoverlapping *) (* zeros are allowed in the expansions *) datatype xfloat = XFloat of real list (* |a| <= |b| *) fun << (a, b) = ((b > a) = (b > ~a)) (* |a| >= |b| *) fun >> (a, b) = ((a > b) = (a > ~b)) infix << >> (* add/subtract/multiply/square real inputs *) (* Two_Sum, Two_Diff, Two_Fast_Sum, *) (* Two_Fast_Diff, Two_Product, Two_Square *) (* in Shewchuk *) (* *) (* results are nonadjacent when round-to-even *) fun sum (a, b) = let val x = a + b val c = x - a in Quad ((a - (x - c)) + (b - c), x) end fun diff (a, b) = let val x = a - b val c = a - x in Quad ((a - (x + c)) + (c - b), x) end (* condition: a >> b (i.e |a| >= |b|) *) fun sumfast (a, b) = let val x = a + b in Quad (b - (x - a), x) end (* condition: a >> b (i.e |a| >= |b|) *) fun diffast (a, b) = let val x = a - b in Quad ((a - x) - b, x) end local (* 1 + 2^((precision+2) div 2) *) val split = 134217729.0 fun split' a = let val c = split * a val ah = c - (c - a) in Quad (a - ah, ah) end in fun prod (a, b) = let val Quad(al, ah) = split' a val Quad(bl, bh) = split' b val x = a * b in Quad (al*bl - (x - ah*bh - al*bh - ah*bl), x) end fun sq a = let val x = a * a val Quad(al, ah) = split' a in Quad (al*al - (x - ah*ah - (ah+ah)*al), x) end end (* add/sub/mult/square quad inputs *) (* to produce an xfloat *) (* *) (* preserve the nonadjacency and strong *) (* overlapping of the input arguments *) fun addQuad (Quad(al,ah), b) = let val (Quad(x0,h)) = sum(al, b) val (Quad(x1,x2)) = sum(h, ah) in XFloat [x0,x1,x2] end fun subQuad (Quad(al,ah), b) = let val (Quad(x0,h)) = diff(al, b) val (Quad(x1,x2)) = sum(h, ah) in XFloat [x0,x1,x2] end fun isubQuad (b, Quad(al, ah)) = let val (Quad(x0,h)) = diff(b, al) val (Quad(x1,x2)) = diff(h, ah) in XFloat [x0, x1, x2] end fun sumQuad (Quad(al,ah), Quad(bl,bh)) = let val (Quad(x0,h)) = sum(al,bl) val (Quad(tl,th)) = sum(ah,h) val (Quad(x1,h)) = sum(tl,bh) val (Quad(x2,x3)) = sum(th,h) in XFloat [x0,x1,x2,x3] end fun diffQuad (Quad(al,ah), Quad(bl,bh)) = let val (Quad(x0,h)) = diff(al,bl) val (Quad(tl,th)) = sum(ah,h) val (Quad(x1,h)) = diff(tl,bh) val (Quad(x2,x3)) = sum(th,h) in XFloat [x0,x1,x2,x3] end fun scaleQuad (b, Quad(al,ah)) = let val (Quad(x0,h)) = prod(al,b) val (Quad(tl,th)) = prod(ah,b) val (Quad(x1,h)) = sum(tl,h) val (Quad(x2,x3)) = sumfast(th,h) in XFloat [x0,x1,x2,x3] end fun sqQuad (Quad(al, ah)) = let val (Quad(x0,bh)) = sq al val (Quad(cl,ch)) = prod(al+al, ah) val (Quad(x1,dh)) = sum(bh,cl) val (XFloat gs) = sumQuad(sum(dh,ch), sq ah) in XFloat (x0::x1::gs) end fun double (Quad(al, ah)) = Quad(2.0*al, 2.0*ah) (* first projection from a quad *) (* *) (* it is always the error term from the *) (* floating point operation whose exact *) (* version created the quad *) fun err (Quad(x, y)) = x (* we usually don't need to get the *) (* second projection, since it is *) (* always already obtained by the *) (* inexact operation; still... *) fun approxQuad (Quad(x, y)) = y fun toResErr (Quad(x, y)) = {res = y, err = x} fun grow' (nil, y) = if (y == 0.0) then nil else [y] | grow' (x::xt, y) = let val (Quad (h, q)) = sum(x, y) in if (h == 0.0) then grow'(xt, q) else h::(grow' (xt, q)) end fun shrink' (y, nil) = if (y == 0.0) then nil else [y] | shrink' (y, x::xt) = let val (Quad (h, q)) = diff(y, x) in if (h == 0.0) then shrink'(q, xt) else h::(shrink' (q, xt)) end (* add/subtract a real from an expansion *) (* (grow_expansion_zeroelim in Shewchuk) *) (* *) (* result is nonadjacent (strongly nonoverlapping) *) (* if the argument is *) fun addExp (XFloat xs, y) = XFloat (grow'(xs, y)) fun subExp (xx as XFloat xs, y) = (case xs of nil => if (y == 0.0) then xx else XFloat [~y] | (x::xt) => let val (Quad (h, q)) = diff(x, y) in if (h == 0.0) then XFloat(grow'(xt, q)) else XFloat(h::(grow'(xt, q))) end) fun isubExp (y, xx as XFloat xs) = (case xs of nil => if (y == 0.0) then xx else XFloat [y] | (x::xt) => let val (Quad (h, q)) = diff(x, y) in if (h == 0.0) then XFloat(shrink'(q, xt)) else XFloat(h::(shrink'(q, xt))) end) (* add/subtract two expansions in linear time *) (* fast_expansion_sum_zeroelim in Shewchuk *) (* *) (* the result is strongly nonoverlapping if *) (* the inputs are (and hence preserves the *) (* invariant of the xfloat datatype) *) (* *) (* does not preserve nonadjacency nor *) (* nonoverlapping *) fun sumExp (xx as (XFloat xs), yy as (XFloat ys)) = case (xs, ys) of (nil, _) => yy | (_, nil) => xx | (x::xt, y::yt) => if (x >> y) then XFloat (fastMerge'(yt, xs, y)) else XFloat (fastMerge'(xt, ys, x)) (* second argument is guaranteed to be non-nil *) and fastMerge' (nil, y::yt, q) = let val (Quad(h, q)) = sumfast(y, q) in if (h == 0.0) then grow'(yt, q) else h::(grow'(yt, q)) end | fastMerge' (xs as (x::xt), ys as (y::yt), q) = let val (t, s, a) = if (x >> y) then (yt, xs, y) else (xt, ys, x) val (Quad(h, q)) = sumfast(a, q) in if (h == 0.0) then merge'(t, s, q) else h::(merge'(t, s, q)) end and merge' (nil, ys, q) = grow'(ys, q) | merge' (xs as (x::xt), ys as (y::yt), q) = let val (t, s, a) = if (x >> y) then (yt, xs, y) else (xt, ys, x) val (Quad(h, q)) = sum(a, q) in if (h == 0.0) then merge'(t, s, q) else h::(merge'(t, s, q)) end (* subtraction *) (* *) (* a bit longer (and probably slower) than the *) (* corresponding sum function because of the *) (* noncommutativity. *) (* can this be improved? *) fun diffExp (xx as (XFloat xs), yy as (XFloat ys)) = case (xs, ys) of (nil, _) => XFloat (map ~ ys) | (_, nil) => xx | (x::xt, y::yt) => if (x >> y) then XFloat (fastMerge''(xs, yt,~y)) else XFloat (fastMerge''(xt, ys, x)) (* must check if the second argument is nil *) and fastMerge''(nil, ys, q) = grow'(map ~ ys, q) | fastMerge''(xs, nil, q) = grow'(xs, q) | fastMerge''(xs as (x::xt), ys as (y::yt), q) = if (x >> y) then let val (Quad (h, q)) = sumfast(~y, q) in if (h == 0.0) then merge''(xs, yt, q) else h::(merge''(xs, yt, q)) end else let val (Quad (h, q)) = sumfast(x, q) in if (h == 0.0) then merge''(xt, ys, q) else h::(merge''(xt, ys, q)) end and merge''(nil, ys, q) = grow' (map ~ ys, q) | merge''(xs, nil, q) = grow' (xs, q) | merge''(xs as (x::xt), ys as (y::yt), q) = let val (xx, yy, a) = if (x >> y) then (xs, yt, ~y) else (xt, ys, x) val (Quad(h, q)) = sum(a, q) in if (h == 0.0) then merge''(xx, yy, q) else h::(merge''(xx, yy, q)) end (* multiply two quads *) fun prodQuad (a as Quad(_, _), Quad(bl,bh)) = sumExp(scaleQuad(bl, a), scaleQuad(bh, a)) (* multiply an expansion by a real *) (* (scale_expansion in Shewchuk) *) fun scaleExp (y, xx as XFloat xs) = let fun scale'(nil, q) = if (q == 0.0) then nil else [q] | scale'(x::xt, q) = let val (Quad(h1, q1)) = prod(x, y) val (Quad(h2, q2)) = sum(q, h1) val (Quad(h3, q3)) = sumfast(q1, q2) in if (h2 == 0.0) then if (h3 == 0.0) then scale'(xt, q3) else h3::(scale'(xt, q3)) else if (h3 == 0.0) then h2::(scale'(xt, q3)) else h2::h3::(scale'(xt, q3)) end in if y == 0.0 then XFloat nil else case xs of nil => xx | (x::xt) => let val (Quad(h, q)) = prod(x, y) in if (h == 0.0) then XFloat(scale'(xt, q)) else XFloat(h::(scale'(xt, q))) end end (* find equivalent but shorter expansion *) (* no zero components in the result *) (* the result is always nonadjacent *) fun compressExp (xx as XFloat xs) = let fun pass0 nil = nil | pass0 (xs as (x::xt)) = if (x == 0.0) then pass0 xt else xs fun pass1 (x::xt) = case xt of nil => (x, nil) | _ => let val (Q, g) = pass1 xt val (Quad(q, Q)) = sumfast(Q, x) in if (q == 0.0) then (Q, g) else (q, Q::g) end fun pass2 (Q, nil) = [Q] | pass2 (Q, h::ht) = let val (Quad(q, Q)) = sumfast(h, Q) in if (q == 0.0) then pass2(Q, ht) else q::(pass2(Q, ht)) end in case pass0 xs of nil => XFloat nil | _ => XFloat (pass2 (pass1 xs)) end (* approximate an expansion with a real *) fun approxExp (XFloat xs) = let fun approx' nil acc = acc | approx' (x::xt) acc = approx' xt (x + acc) in approx' xs 0.0 end structure XFloat : XFLOAT = struct structure Q = NormalizedRational type xfloat = xfloat type t = xfloat val zero = XFloat [] val one = XFloat [1.0] (* assumes that diffExp eliminates zeros *) (* from the resulting expansion *) fun op == (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => true | _ => false end fun op < (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => false | (z::zt) => Real.<(z, 0.0) end fun op > (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => false | (z::zt) => Real.>(z, 0.0) end fun op <= (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => true | (z::zt) => Real.<(z, 0.0) end fun op >= (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => true | (z::zt) => Real.>(z, 0.0) end fun != (xx, yy) = let val (XFloat zz) = diffExp(xx, yy) in case zz of nil => true | _ => false end fun fromReal x = XFloat [x] val approx = approxExp (* coercion into rational numbers *) (* NONE if not Finite *) (* *) (* by putting it in this file *) (* we can avoid exporting toList *) (* even though it looks reasonable *) (* to have a destructor *) fun toRational xx = (* remove the zeros and compress *) let val XFloat xs = compressExp xx in case xs of nil => Q.fromInt 0 | (x::xt) => let val {odd=m, exp=exp} = NumberTheory.fromReal x in sum' (xt, exp, m) end end and sum' (nil, exp, acc) = Q.fromManExp{man=acc, exp=exp} | sum' (x::xt, exp, acc) = let val {odd=m, exp=e} = NumberTheory.fromReal x in let val m' = IntInf.<<(m, Word.fromInt (Int.-(e, exp))) in sum' (xt, exp, IntInf.+(acc, m')) end end (* sum up a list of expansions *) fun distill (xss: real list) = let fun distill0' nil acc = acc | distill0' (x::nil) acc = (Quad(0.0, x))::acc | distill0' (x1::x2::xt) acc = distill0' xt (sum(x1,x2)::acc) fun distill1' nil acc = acc | distill1' ((Quad(x,y))::nil) acc = (XFloat[x, y])::acc | distill1' (x1::x2::xt) acc = distill1' xt (sumQuad(x1, x2)::acc) fun distill2' [h] nil = h | distill2' nil acc = distill2' acc nil | distill2' (x::nil) acc = distill2' (x::acc) nil | distill2' (x1::x2::xt) acc = distill2' xt (sumExp(x1, x2)::acc) in case xss of nil => XFloat nil | _ => let val d0 = distill0' xss nil val d1 = distill1' d0 nil in distill2' d1 nil end end (* multiply two expansion *) fun prodExp (xx as (XFloat xs), yy as (XFloat ys)) = let fun concat' nil = [] | concat' ((XFloat xs)::tl) = xs @ (concat' tl) in distill (concat'(map (fn t => scaleExp(t, xx)) ys)) end fun double (XFloat xs) = XFloat (map (fn x => 2.0 * x) xs) fun op ~ (XFloat xs) = XFloat (map Real.~ xs) val op + = sumExp val op - = diffExp val op * = prodExp (* can be meaningless, if overflow or underflow *) (* happened, but that's a question to handle separately *) fun sign (XFloat xs) = case xs of nil => 0 | _ => Real.sign (List.last xs) fun min (xx, yy) = if sign(diffExp(xx, yy)) = 1 then yy else xx fun max (xx, yy) = if sign(diffExp(xx, yy)) = 1 then xx else yy (* these things can be done quicker by storing the sign *) (* separately in pair with the expansion, or by copying the *) (* code for diffExp here, thus avoiding the unnecessary *) (* reversing of the expansion *) fun compare (xx, yy) = let val zz = diffExp(xx, yy) in case (sign zz) of 0 => EQUAL | 1 => GREATER | ~1 => LESS end fun abs (xx as XFloat xs) = case xs of nil => xx | _ => let val x = List.last xs in if Real.>=(x, 0.0) then xx else (XFloat (map Real.~ xs)) end val add = addExp val sub = subExp val isub = isubExp val scale = scaleExp val compress = compressExp (* printout of an expansion in *DECIMAL* *) (* *) (* NOTE: take the results with a grain of *) (* salt since the conversion into decimal *) (* also incurs roundoff errors *) (* *) (* ex: sum2(0.5, 0.9) prints as *) (* 1.11022302463e~16 + 1.4 *) (* *) (* the reason is that 0.9 is not *) (* representable exactly in binary *) (* but 0.5 + 0.9 prints out as 1.4 anyway *) fun toString xx = let fun tostring' nil = "" | tostring' [x] = Real.toString x | tostring' (x::xt) = (Real.toString x)^", "^(tostring' xt) in case (compressExp xx) of XFloat nil => "0.0" | XFloat xs => tostring' xs end fun toList (XFloat xs) = xs end (* structure XFloat *) fun fromReal x = Quad(0.0, x) fun toXFloat (Quad(x, y)) = XFloat [x, y] val approx = approxQuad val op + = sumQuad val op - = diffQuad val op * = prodQuad fun op ~ (Quad(x, y)) = Quad(Real.~ x, Real.~ y) val sq2 = sqQuad val add = addQuad val sub = subQuad val isub = isubQuad val scale = scaleQuad fun abs (x as Quad(lo, hi)) = if Real.>(hi, 0.0) then x else Quad(Real.~ lo, Real.~ hi) fun add2 (xss as XFloat xs, Quad (lo, hi)) = if Real.==(lo, 0.0) then XFloat(grow' (xs, hi)) else sumExp (xss, XFloat [lo, hi]) fun sub2 (xss as XFloat xs, Quad (lo, hi)) = if Real.==(lo, 0.0) then XFloat(grow' (xs, Real.~ hi)) else sumExp (xss, XFloat [Real.~ lo, Real.~ hi]) fun isub2 (Quad (lo, hi), xss as XFloat xs) = if Real.==(lo, 0.0) then XFloat(shrink' (hi, xs)) else diffExp (XFloat [lo, hi], xss) fun scale2 (Quad (lo, hi), xss as XFloat xs) = if Real.==(lo, 0.0) then scaleExp (hi, xss) else sumExp (scaleExp (lo, xss), scaleExp (hi, xss)) fun toString (Quad(x, y)) = (Real.toString x) ^ " + " ^ (Real.toString y) end