structure ReEncode :> sig
  val optimize : Il.exp -> Il.exp
  (* val test : string -> string *)
end =
  struct

    exception REFail

    structure M = Meaning
    structure T = Token

    (*
        simple random number gerator
     *)

  local val v : Word32.word ref = 
              ref (Word32.xorb
                   (Posix.Process.pidToWord (Posix.ProcEnv.getpid ()),
                    Word32.fromInt
                    (LargeInt.toInt (Time.toSeconds (Time.now ())))))
  in
    fun randn () = 
      let 
        val vv = !v
        val x = (vv * 0w28574) + 0w39852
        val x = Word32.xorb (x, 0w2309761)
        val x = x mod 0w65537
      in
        v := x;
        abs (Word32.toInt (Word32.andb(x, 0wx7FFFFFFF)))
      end
  end

    (*
        first we convert the document into a property * string vector

        this process also merges whitespace with non-whitespace whenever
        possible
     *)

    fun PS_vec exn =
      let
        val white = ref false

        val allwhite = StringUtil.all (StringUtil.charspec " \n\r\t")

        fun cnt (Il.Tag (_, e), k) = cnt (e, k)
          | cnt (Il.Seq el, k) = foldl cnt k el
          | cnt (Il.Text s, k) = k + 1

        val arr = Array.array (cnt (exn, 0) + 1, (M.null, ""))

        fun cal (Il.Tag (tl, e), (p, k)) = cal (e, (M.addTags (p, tl), k))
          | cal (Il.Seq el, (p, k)) = foldl (fn (e, k) => cal (e, (p, k))) k el
          | cal (Il.Text s, (p, k)) =
              let
                val (p', s') = Array.sub (arr, k)
                val white' = allwhite s
              in
                if (s' = "") then
                  (* fresh segment! *)

                  (white := white'; Array.update (arr, k, (p, s)); k)
                else
                  (* old segment - try to merge *)

                  (white := (!white andalso white');
                  if M.propertyEqual (p, p') then
                    (Array.update (arr, k, (p', s' ^ s)); k)
                  else
                    (* can we do a whitespace merge? *)

                    let val q = M.propertyEqual (M.prop2SpaceProp p,
                                  M.prop2SpaceProp p')
                    in
                      if q andalso !white then
                        (Array.update (arr, k, (p, s' ^ s)); k)
                      else if q andalso white' then
                        (Array.update (arr, k, (p', s' ^ s)); k)
                      else
                        (* no merge possible - make new segment *)

                        (Array.update (arr, k + 1, (p, s)); k + 1)
                    end)
              end

        val k = cal (exn, (M.null, 0))
        val k = if "" = #2 (Array.sub (arr, k)) then k else k + 1
      in
        Array.extract (arr, 0, SOME k)
      end

    (*
        now we take the property * string vector, and try isolate all of the
        distinct properties
     *)

    fun P_vec psvec =
      let
        val arr = Array.array (Vector.length psvec + 1, M.null)

        fun insert (k, p) =
              let val p' = Array.sub (arr, k)
              in
                if M.propertyEqual (p, p') then
                  k
                else
                  if k > 0 andalso M.propertyEqual (p', M.null) then
                    (Array.update (arr, k, p); k)
                  else
                    insert (k + 1, p)
              end

        val k = Vector.foldl (fn ((p, _), k) => Int.max (k, insert (0, p)))
                              0 psvec

        (*
           this algorithm is quadratic in the size of this vector, so
           we have to scale back a bit.. and just hope that the random
           contexts are the right contexts.
         *)

        val len = (Array.length arr) - 1
        val _ = Array.modifyi
                  (fn (i, _) => Array.sub (arr, 1 + (randn() mod len)))
                  (arr, 1, NONE)
      in
        Array.extract (arr, 0, SOME (Int.min(k + 1, 60)))
      end

    (*
        This function finds the optimal tag sequence to transition
        between two contexts. It raises NoTran if none is possible.
     *)

    exception NoTran

    fun TRAN_opt (p0, p1) =
      let
        val (c0, c1) = (M.getColor p0, M.getColor p1)

        val col =
              if c0 <> c1 then
                if c0 <= 7 andalso c1 > 7 then
                  raise NoTran
                else
                  [T.Clr c1]
              else
                nil

        val (s0, s1) = (M.getSize p0, M.getSize p1)
        val sz =
              if s0 <> s1 then
                if s0 <= 9 andalso s1 > 9 then
                  raise NoTran
                else
                  [T.Num s1]
              else
                nil

        val (U0, U1) = (M.getUnderline p0, M.getUnderline p1)
        val (B0, B1) = (M.isSetAttribute (p0,M.B), M.isSetAttribute (p1,M.B))
        val (I0, I1) = (M.isSetAttribute (p0,M.I), M.isSetAttribute (p1,M.I))
        val (S0, S1) = (M.isSetAttribute (p0,M.S), M.isSetAttribute (p1,M.S))
        val (T0, T1) = (M.isSetAttribute (p0,M.Tt), M.isSetAttribute (p1,M.Tt))

        val pl = if (U1 < U0)
                    orelse (not B1 andalso B0)
                    orelse (not I1 andalso I0)
                    orelse (not S1 andalso S0)
                    orelse (not T1 andalso T0)
                  then [T.PL] else nil

        val p0 = M.addTags (p0, pl)

        val U0 = M.getUnderline p0
        val B0 = M.isSetAttribute (p0,M.B)
        val I0 = M.isSetAttribute (p0,M.I)
        val S0 = M.isSetAttribute (p0,M.S)
        val T0 = M.isSetAttribute (p0,M.Tt)

        val u = List.tabulate (U1 - U0, fn _ => T.U)
        val b = if not B0 andalso B1 then [T.Att T.B] else nil
        val i = if not I0 andalso I1 then [T.Att T.I] else nil
        val s = if not S0 andalso S1 then [T.S] else nil
        val t = if not T0 andalso T1 then [T.TT] else nil

        val (E0, E1) = (M.isSetAttribute (p0,M.Em), M.isSetAttribute (p1,M.Em))
        fun xor (a,b) = (not a andalso b) orelse (not b andalso a)
        val e = if not S1 andalso (xor (E0, E1)) then [T.EM] else nil
      in
        pl @ col @ sz @ u @ b @ i @ s @ t @ e
      end

    (*
        Compute the cost of a transition
     *)

    fun TRAN_cost pp = foldl (fn (t, k) => 2 * (T.tagsize t) + k + 1) 0
                        (TRAN_opt pp) handle NoTran => ~1

    (*
        Now we make a 2d array of transition costs between the
        distinct contexts we found earlier.
     *)

    fun COST_array pvec =
      let
        val len = Vector.length pvec

        fun cal (x, y) = TRAN_cost (Vector.sub (pvec, x), Vector.sub (pvec, y))
      in
        Array2.tabulate Array2.RowMajor (len, len, cal)
      end

    (*
        The main event..
     *)

    fun optimize exn =
      let
        datatype newcode = LEAF of int | NODE of int * newcode list

        (* setup *)

        val psvec = PS_vec exn
        val num_sec = Vector.length psvec
        (* val _ = print ("num_sec = " ^ (Int.toString num_sec) ^ "\n") *)

        val pvec = P_vec psvec
        val num_ctx = Vector.length pvec
        (* val _ = print ("num_ctx = " ^ (Int.toString num_ctx) ^ "\n") *)

        val carr = COST_array pvec

        val arr1 = Array2.tabulate Array2.RowMajor
                    (num_ctx, num_sec,
                    fn (ctx, seq) => (TRAN_cost
                        (Vector.sub (pvec, ctx), #1 (Vector.sub (psvec, seq))),
                          LEAF seq))

        val arr2 = Array2.tabulate Array2.RowMajor
                    (num_ctx, num_sec, fn _ => (~1, LEAF 0))

        (* do all of the work *)

        fun iterate (arr1, arr2, len, size) =
          let
            exception Impossible

            val size2 = size div 2

            fun partition (0, l) = l
              | partition (n, l) =
                  let val k = size2 + (randn() mod size2)
                  in
                    if k > n then n::l
                    else partition (n - k, k::l)
                  end

            fun compress (_, _, nil) = ()
              | compress (seq, beg, len::tl) =
                  (Array2.modifyi
                    Array2.ColMajor
                    (fn (ctx, _, _) => Vector.foldri
                      (fn (_, (C, nc), (C', nc')) =>
                        case nc' of
                        NODE (t, nl) =>
                          if C < 0 orelse C' < 0 then
                            (~1, NODE (t, nc::nl))
                          else
                            (C' + C, NODE (t, nc::nl))
                        | _ => raise Impossible)
                      (0, NODE (ctx, nil))
                      (Array2.row (arr1, ctx), beg, SOME len))
                    {base=arr1,row=0,nrows=SOME num_ctx,col=seq,ncols=SOME 1};
                  compress (seq + 1, beg + len, tl))

            fun compare len =
                  Array2.modifyi
                    Array2.ColMajor
                    (fn (ctx, seq, _) => Vector.foldli
                      (fn (ctx', (C, nc), (C', nc')) =>
                        let val t = Array2.sub (carr, ctx, ctx')
                        in
                          if t < 0 orelse C < 0 orelse C + t >= C' then
                            (C', nc')
                          else
                            (C, nc)
                        end)
                      (num_sec * num_ctx * 100, LEAF ~1)
                      (Array2.column (arr1, seq), 0, NONE))
                    {base=arr2,row=0,nrows=NONE,col=0,ncols=NONE}

            val p = partition (len, nil)
            val l = List.length p
            val _ = compress (0, 0, p)
            val _ = compare l
          in
            if l = 1 then arr2 else iterate (arr2, arr1, l, size * 2)
          end

        val arr = iterate (arr1, arr2, num_sec, 2)

        (* convert back to Il form *)

        fun convert (k, NODE (k', tl)) =
              let val tran = TRAN_opt (Vector.sub (pvec,k),Vector.sub (pvec,k'))
              in
                Il.Tag (tran, Il.Seq (map (fn x => convert (k', x)) tl))
              end
          | convert (k, LEAF k') =
              let
                val (p, s) = Vector.sub (psvec, k')
                val tran = TRAN_opt (Vector.sub (pvec,k), p)
              in
                Il.Tag (tran, Il.Text s)
              end
      in
        convert (0, #2 (Array2.sub (arr, 0, 0)))
      end


  end
