(* ctreeify.sml
 *
 * COPYRIGHT (c) 1995 AT&T Bell Laboratories.
 *
 * Builds the ctree and list for frags from a CPS function.
 *
 *  Control.CG.misc4 := 256; to enable optallocptr
 *  Control.CG.misc4 := 512; to disable treeification
 *)

signature CTREEIFY = sig
  val ctreeify : 
         (CPS.function * Label.label IntStrMap.intstrmap *
    	          Label.label IntStrMap.intstrmap * CPS.cty Intmap.intmap)
    	  -> CTree.funCtree * CTree.var list * (Label.label * Frag.frag) list
			   
end


structure CTreeify : CTREEIFY = struct

  fun error msg = ErrorMsg.impossible ("CTreeify." ^ msg)

  structure CT = CTree
  structure T = System.Tags
  structure P = CPS.P
 
(*  val _ = if MachSpec.quasistack then error "quasistack" else () *)

  local
    (* Given a list of expr in their iburg order, *)
    (* returns the "order" variable. *)
    fun good ([]) = 0
      | good ([_]) = 1
      | good (l as [x,y]) =
	  let
	    val x' = case x of (CT.EX (CT.VAR xx,_,_)) => xx | _ => ~1
	    val y' = case y of (CT.EX (CT.VAR yy,_,_)) => yy | _ => ~1
	  in
	    if x'<y' then
	      2
	    else
	      3
	  end
      | good (l as [x,y,z]) =
	  let 
	    val x' = case x of (CT.EX (CT.VAR xx,_,_)) => xx | _ => ~1
	    val y' = case y of (CT.EX (CT.VAR yy,_,_)) => yy | _ => ~1
	    val z' = case z of (CT.EX (CT.VAR zz,_,_)) => zz | _ => ~1
	  in
	    if x'<y' then
	      if y'<z' then
		4
	      else (* z'<=y' *)
		if x'<z' then
		  5
		else (* z'<=x' *)
		  8
	    else (* y'<=x' *)
	      if x'<z' then
		7
	      else (* z'<=x' *)
		if y'<z' then
		  6
		else (* z'<=y' *)
		  9
	  end
      | good l = error ("good : match failed on listlen="
			^(makestring (List.length l)))
	    
  in
    (* gd : returns an expr made of base and childrenvars (in the iburg
     * order), by finding the order of evaluation. *)
    fun gd (base, children) =
      CT.EX (base, children, good children)
  end


  (*
   * The problem with the cached floats is that we must forbid
   * any operation that may cause a float to be flushed during
   * the building of a record.  Currently, it only means that
   * we don't want to move something that returns an freg under
   * a RSTORE operation.  This is so because the only
   * operations that consume an freg and return someting return
   * an freg.  It wouldn't be true anymore if there were a
   * primop like reg: FLOOR(freg).
   *)


  (*
   * Encoding for dependencies :
   *
   * ~1 : node removed
   *
   *  0 : pure
   *  1 : raises overflow
   *  2 : looker
   *  3 : setter
   *  4 : raises overflow and divzero
   *  5 : flow control (SWITCH/BRANCH/APP)
   *  6 : formal parameter (unmoveable thing)
   *  7 : returns an freg (maybe cached)
   *  8 : returns an freg (maybe cached) and raises overflow
   *  9 : returns an freg (maybe cached) and raises overflow and divzero
   * 10 : returns an freg (maybe cached) and looker (SUBSCRIPTF)
   * 11 : RSTORE/RSTOREF/RSTOREF0/RSTOREDESC/RSET (uses allocptr)
   * 12 : RINC/MKSPECIAL (uses & updates allocptr)
   * 13 : RSTORESTORE (uses allocptr and uses storeptr)
   * 14 : RSETSTORE (uses allocptr and updates storeptr)
   * (check optallocptr if you add things here)
   * (change the constant in canswap if you add things here)
   *)

  local
    val O = true
    val x = false
    val depvect = #[
        (* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14*)
           O, O, O, O, O, O, x, O, O, O, O, O, O, O, O,
 (* 1*)	   O, O, O, x, x, O, x, O, O, x, O, O, O, O, O,
 (* 2*)	   O, O, O, x, O, O, x, O, O, O, O, O, O, O, O,
 (* 3*)	   O, x, x, x, x, O, x, O, x, x, x, O, O, O, O,
 (* 4*)	   O, x, O, x, x, O, x, O, x, x, O, O, O, O, O,
 (* 5*)	   O, O, O, O, O, x, x, O, O, O, O, O, O, O, O,
 (* 6*)	   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
 (* 7*)	   O, O, O, O, O, O, x, O, O, O, O, x, x, x, x,
 (* 8*)	   O, O, O, x, x, O, x, O, O, x, O, x, x, x, x,
 (* 9*)	   O, x, O, x, x, O, x, O, x, x, O, x, x, x, x,
 (*10*)	   O, O, O, x, O, O, x, O, O, O, O, x, x, x, x,
 (*11*)	   O, O, O, O, O, O, x, x, x, x, x, O, x, O, O,
 (*12*)	   O, O, O, O, O, O, x, x, x, x, x, x, x, x, x,
 (*13*)	   O, O, O, O, O, O, x, x, x, x, x, O, x, O, x,
 (*14*)	   O, O, O, O, O, O, x, x, x, x, x, O, x, x, x
	  ]
  in
    fun canswap (a,b) = a<0 orelse b<0 orelse Vector.sub (depvect, a*15+b)
      handle zap => (print ("handle.canswap a="^(makestring a)
			    ^" b="^(makestring b)^"\n"); raise zap)
  end

  val noinfo = {ctree = CT.EX (CT.INT 0,[],0), deptype = ~1}
  val unmoveable = {ctree = CT.EX (CT.INT 0,[],0), deptype = 6}

  (* Applies f to the kids in the reverse of the order of evaluation. *)
  fun revevalordermap (f, 0, []) = []
    | revevalordermap (f, 1, [x]) = [f x]
    | revevalordermap (f, 2, [x,y]) =
        let val (y',x') = (f y,f x) in [x',y'] end
    | revevalordermap (f, 3, [x,y]) =
        let val (x',y') = (f x,f y) in [x',y'] end
    | revevalordermap (f, 4, [x,y,z]) =
        let val (z',y',x') = (f z,f y,f x) in [x',y',z'] end
    | revevalordermap (f, 5, [x,y,z]) =
        let val (y',z',x') = (f y,f z,f x) in [x',y',z'] end
    | revevalordermap (f, 6, [x,y,z]) =
        let val (x',z',y') = (f x,f z,f y) in [x',y',z'] end
    | revevalordermap (f, 7, [x,y,z]) =
        let val (z',x',y') = (f z,f x,f y) in [x',y',z'] end
    | revevalordermap (f, 8, [x,y,z]) =
        let val (y',x',z') = (f y,f x,f z) in [x',y',z'] end
    | revevalordermap (f, 9, [x,y,z]) =
        let val (x',y',z') = (f x,f y,f z) in [x',y',z'] end
    | revevalordermap (_,n,l) = error("revevalordermap : match failed on n="
				      ^(makestring n)^" and listlen="
				      ^(makestring (List.length l)))
	

  val curinfoarray = ref (Array.array (1, noinfo))

  fun printSeq(blk,ctree,info) = 
      if Word.andb(Word.fromInt(!Control.CG.misc4), 0w128) = 0w0 
      then ()
      else CT.printSeq(blk,ctree,info)

  fun makeFunseq ((_,f,vl,cl,cpsexpr), stringTable, realTable, typTable) =
    let
      local
	open IntStrMap
      in
	fun enterString(s,lab) = add stringTable (StrgHash.hashString s,s,lab)
	fun lookString s       = map stringTable (StrgHash.hashString s,s)
	fun enterReal(r,lab)   = add realTable   (StrgHash.hashString r,r,lab)
	fun lookReal r         = map realTable   (StrgHash.hashString r,r)
      end

      val BOGt = CPS.PTRt (NONE)

      val addtypbinding = Intmap.add typTable
      val typmap = Intmap.map typTable

      fun grabty(CPS.VAR v)   = typmap v
	| grabty(CPS.LABEL v) = typmap v
	| grabty(CPS.REAL _)  = CPS.FLTt
	| grabty(CPS.INT _)   = CPS.INTt
	| grabty(CPS.INT32 _) = CPS.INT32t
	| grabty(CPS.VOID)    = CPS.FLTt
	| grabty _ 	      = BOGt 

      (* given a CPS.lvar, give me the corresponding var *)
      exception LvarBind
      val lvartable : CTree.var Intmap.intmap = Intmap.new (32, LvarBind)
      val addlvarbinding		      = Intmap.add lvartable
      val varFromLvar			      = Intmap.map lvartable
    
      val curvar 		      = ref 0	(* reset at each function *)
      fun newVar lvar 		      = (addlvarbinding (lvar, !curvar); 
					 !curvar) before (curvar := !curvar + 1)
      fun genVar () 		      = !curvar before (curvar := !curvar + 1)

      (* local frag list, returned to the main loop *)
      val fragList = ref ([] : (Label.label * Frag.frag) list)
      fun addfrag frag = fragList := frag :: (!fragList)


      fun findString s =
	lookString s handle _ =>
	  let val lab = Label.newLabel "" in
	    addfrag (lab, Frag.STRINGfrag s);
	    enterString (s, lab); 
	    lab
	  end
    
      fun findReal r =
	lookReal r handle _ =>
	  let val lab = Label.newLabel "" in
	    addfrag (lab, Frag.REALfrag r);
	    enterReal(r, lab);
	    lab
	  end

      (* getVal : CPS.value -> expr *)
      fun getVal v =
	case v of
	  CPS.VAR lvar => CT.EX (CT.VAR (varFromLvar lvar),[],0)
	| CPS.LABEL lvar => CT.EX (CT.LABEL lvar,[],0)
	| CPS.INT i => CT.EX (CT.INT i,[],0)
	| CPS.INT32 i => CT.EX (CT.INT32 i,[],0)
	| CPS.REAL r => CT.EX (CT.REAL (findReal r),[],0)
	| CPS.STRING s => CT.EX (CT.STRING (findString s),[],0)
	| _ => error "getVal"

      (* seqFromCps : (acc * CPS.cexp) -> newacc * int *)
      (*   acc/newacc is (int * var * expr) list *)
      (*   the last int is the number of the last variable of the sequence *)
      fun seqFromCps (origacc, cexp) = let
	  (*
	   * acc contains a list of (deptype, var, expr)
	   *
	   * It is CRUCIAL to generate vars in the order of evaluation.
	   *)

	  (* It is very important to generate the "variable" for a node
	   * like BR_INT or SWITCH before all its descendants. *)
	  (* accu is to ensure that the var is generated after its children *)

	  fun recordDesc (kind, content) = let val l = List.length content
	  in
	    case (kind, l)  of
		(Access.RK_I32BLOCK, _) => T.make_desc (l*4, T.tag_string)
	      | (Access.RK_VECTOR, _)   => T.make_desc (l, T.tag_record)
	      | (_, 2)                  => T.desc_pair
	      | _                       => T.make_desc (l, T.tag_record)
	  end

	  fun mkRecord (align, desc, size, store, content, lvar, typ, 
							   passedacc) = let
	      val storedesc = 
		(11, genVar (), CT.EX (CT.RSTOREDESC(align,0,desc), [], 0))
	      fun loop ([],_,acc) = acc
		| loop ((v,p)::rest,n,acc) = let
		    val expr = getVal v
		    val stexpr = CT.EX (store(n,p), [expr], 1)
		    val newvar = genVar ()
		    val addacc = (11, newvar, stexpr)
		  in
		    loop(rest, n+size, addacc::acc)
		  end
	      val newacc = loop (content, 4, storedesc::passedacc)
	      val varset = newVar lvar
	      val varinc = genVar ()
	  in
	      addtypbinding (lvar,typ);
	      (11, varset, CT.EX (CT.RSET 4, [], 0))
	      :: (12, varinc, CT.EX (CT.RINC (length content * size + 4), [], 0))
	      :: newacc
	  end

	  fun mkApp([],[]) = CT.EX (CT.ANIL,[],0)
	    | mkApp(v::rest, CPS.FLTt::tl) =
		CT.EX (CT.ACONSF, [getVal v, mkApp(rest,tl)], 2)
	    | mkApp(v::rest, _::tl) = 
		CT.EX (CT.ACONS, [getVal v, mkApp(rest,tl)], 2)

	  fun childseq (clist,acc) = let
	      val lstvar = genVar ()		       (* last var for this seq *)
	      fun next (c, (brs,passedacc)) =
		let
		  val firstvar = !curvar
		  val (newacc, lastvar) = seqFromCps (passedacc, c)
		  val br = (firstvar, lastvar)
		in
		  (br::brs, newacc)
		end
	      val (brs, nacc) = foldl next ([], acc) clist
	  in
	     (lstvar, rev brs, nacc)
	  end

	  local
	    fun dobr brkind (typ,v1,v2,c1,c2,acc) =
	      let
		val children = [getVal v1, getVal v2]
		val(lstvar, [b1,b2], newacc) = childseq ([c1,c2],acc)
	      in
	        ((5, lstvar, gd (brkind  (typ,b1,b2), children)) :: newacc,
		 lstvar)
	      end
	  in
	    val dobrint    = dobr CT.BR_INT
	    val dobrword32 = dobr CT.BR_WORD32
	    val dobrfloat  = dobr CT.BR_FLOAT
	  end

	  fun dobrboxed (v,c1,c2,acc) =
	    let
	      val child = [getVal v]
	      val (lstvar, [b1,b2], newacc) = childseq ([c1,c2],acc)
	    in
	      ((5, lstvar, CT.EX (CT.BR_BOXED (b1,b2), child, 1)) :: newacc,
	       lstvar)
	    end

	  (* records a store operation into mem[v+2*(w-1)]. *)
	  fun doboxedupdate (v,w,x,c,acc) = let
	      val addrvar = genVar()
	      val addrtree = CT.EX(CT.VAR addrvar, [], 0)
	      val int0 = CT.EX(CT.INT 0, [], 0)
	      val addrexpr = (0, addrvar, gd (CT.CALCADDR, [getVal v, getVal w]))
	      val updelem = 
		    (3, genVar (), gd (CT.UNBOXEDUPDATE, [addrtree, int0, getVal x]))
	      val addrelem = 
		    (11, genVar(), CT.EX(CT.RSTORE(0,CPS.OFFp 0), [addrtree], 1))
 	      val storeelem = (13, genVar (), CT.EX (CT.RSTORESTORE 4, [], 0))
 	      val setelem = (14, genVar (), CT.EX (CT.RSETSTORE 0, [], 0))
 	      val incelem = (12, genVar (), CT.EX (CT.RINC 8, [], 0))
	  in
	      loop(incelem::setelem::storeelem::addrelem::updelem::addrexpr::acc, c)
	  end

	  and accu (deptype, lvar, typ, expr, acc, c) = 
	      (addtypbinding(lvar,typ);
	       loop ((deptype, newVar lvar, expr)::acc, c))
	
	  and accugen (deptype, expr, acc, c) =
	    loop ((deptype, genVar (), expr)::acc, c)
	
	  and loop (acc, cexp) =
	    case cexp of
	      CPS.RECORD ((Access.RK_SPILL | Access.RK_CONT),vl,w,e) =>
		loop(acc,CPS.RECORD(Access.RK_RECORD,vl,w,e))
	    | CPS.RECORD (Access.RK_FCONT, vl, w, e) =>
		loop(acc, CPS.RECORD(Access.RK_FBLOCK, vl, w, e))
	    | CPS.RECORD (Access.RK_FBLOCK, vl, lvar, e) => let
		val len = List.length vl
		val desc = 
		  if len=1 then T.desc_reald 
		  else T.make_desc(len, T.tag_realdarray)
		fun store(n, CPS.OFFp 0) = CT.RSTOREF0 n
		  | store arg = CT.RSTOREF arg
		fun remapVl () = let 
		    fun f (r as CPS.REAL _, _)  = (r, CPS.OFFp 0)
		      | f np = np
		in map f vl
		end
		val vl' = remapVl()
		val acc' = mkRecord(true, desc, 8, store, vl', lvar, BOGt, acc)
	      in
		loop(acc', e)
	      end
	    | CPS.RECORD (kind, content, lvar, c) => let
		val desc = recordDesc(kind, content)
		val acc' = mkRecord(false,desc,4,CT.RSTORE,content,lvar,BOGt,acc)
              in loop(acc', c)
	      end
	    | CPS.SELECT (ofs, v, lvar, typ as CPS.FLTt, c) =>
		accu (0, lvar, typ, CT.EX (CT.SELECTF ofs, [getVal v], 1), acc, c)
	    | CPS.SELECT (ofs, v, lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.SELECT ofs, [getVal v], 1), acc, c)
	    | CPS.OFFSET (ofs, v, lvar, c) =>
		accu (0, lvar, BOGt, CT.EX (CT.OFFSET ofs, [getVal v], 1), acc, c)
	    | CPS.LOOKER (P.!, [v], lvar, typ, c) =>
		accu (2, lvar, typ, CT.EX (CT.DEREF, [getVal v], 1), acc, c)
	    | CPS.LOOKER (P.gethdlr, [], lvar, typ, c) =>
		accu (2, lvar, typ, CT.EX (CT.GETHDLR, [], 0), acc, c)
	    | CPS.LOOKER (P.subscript, [v,w], lvar, typ, c) =>
		accu (2, lvar, typ, gd (CT.SUBSCRIPT, [getVal v, getVal w]), acc, c)
	    | CPS.LOOKER (P.numsubscript {kind=P.FLOAT 64},
			  [v,w], lvar, typ, c) =>
		accu (10, lvar, typ, gd (CT.SUBSCRIPTF, [getVal v, getVal w]), acc, c)
	    | CPS.LOOKER (P.getvar, [], lvar, typ, c) =>
		accu (2, lvar, typ, CT.EX (CT.GETVAR, [], 0), acc, c)
	    | CPS.LOOKER (P.deflvar, _, _, _, c) =>  error "seqFromCps.deflvar"
	    | CPS.LOOKER (P.numsubscript {kind=P.INT 8},
			  [v,w], lvar, typ, c) =>
		accu (2, lvar, typ, gd (CT.ORDOF, [getVal v, getVal w]), acc, c)
	    | CPS.LOOKER (P.getspecial, [v], lvar, typ, c) =>
		accu (2, lvar, typ, CT.EX (CT.GETSPECIAL, [getVal v], 1), acc, c)
	    | CPS.LOOKER (P.getpseudo, [v], lvar, typ, c)  => 
		accu (2, lvar, typ, CT.EX(CT.GETPSEUDO,[getVal v], 1), acc, c)
	    | CPS.LOOKER _ => (PPCps.prcps cexp; error "seqFromCps.LOOKER")

	    | CPS.SETTER (P.numupdate{kind=P.INT 8}, [v,w,x], c) =>
		accugen (3, gd (CT.STORE,
				[getVal v,getVal w,getVal x]), acc, c)
	    | CPS.SETTER (P.unboxedupdate, [v,w,x], c) =>
		accugen (3, gd (CT.UNBOXEDUPDATE,
				[getVal v, getVal w, getVal x]), acc, c)
	    | CPS.SETTER (P.boxedupdate, [v,w,x], c) =>
		doboxedupdate (v,w,x,c,acc)
	    | CPS.SETTER (P.update, [v,w,x], c) =>
		doboxedupdate (v,w,x,c,acc)
	    | CPS.SETTER (P.numupdate {kind=P.FLOAT 64},
			  [v,w,x], c) =>
		accugen (3, gd (CT.UPDATEF,
				[getVal v, getVal w, getVal x]), acc, c)
	    | CPS.SETTER (P.sethdlr, [v], c) =>
		accugen (3, CT.EX (CT.SETHDLR, [getVal v], 1), acc, c)
	    | CPS.SETTER (P.setvar, [v], c) =>
		accugen (3, CT.EX (CT.SETVAR, [getVal v], 1), acc, c)
	    | CPS.SETTER (P.setpseudo, [v,i], c) =>
		accugen (3, gd (CT.SETPSEUDO, [getVal v, getVal i]), acc, c)
	    | CPS.SETTER (P.setspecial, [v,w], c) =>
		accugen (3, gd (CT.SETSPECIAL, [getVal v, getVal w]), acc, c)

	    | CPS.SETTER (P.uselvar, _, c) => loop(acc, c)
	    | CPS.SETTER (P.free,_,c) => loop(acc,c)
	    | CPS.SETTER (P.acclink,_,c) => loop(acc,c)
	    | CPS.SETTER (P.setmark, _, c) => loop(acc, c)
	    | CPS.SETTER _ => (PPCps.prcps cexp; error "seqFromCps.SETTER")

	    | CPS.ARITH (P.arith{oper=P.+,kind=P.FLOAT 64}, [v,w], lvar, typ, c) =>
		accu (8, lvar, typ, gd (CT.FADD, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P./,kind=P.FLOAT 64}, [v,w], lvar, typ, c) =>
		accu (9, lvar, typ, gd (CT.FDIV, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P.*,kind=P.FLOAT 64}, [v,w], lvar, typ, c) =>
		accu (8, lvar, typ, gd (CT.FMUL, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P.-,kind=P.FLOAT 64}, [v,w], lvar, typ, c) =>
		accu (8, lvar, typ, gd (CT.FSUB, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P.~,kind=P.FLOAT 64}, [v], lvar, typ, c) =>
		accu (7, lvar, typ, CT.EX (CT.FNEG, [getVal v], 1), acc, c)
	    | CPS.ARITH (P.arith{oper=P.~,kind=P.INT 31}, [v], lvar, typ, c) =>
		accu (1, lvar, typ, CT.EX (CT.NEG, [getVal v], 1), acc, c)
	    | CPS.ARITH (P.round{floor=true,fromkind=P.UINT 32, tokind=P.INT 31},
			 [v], lvar, typ, c) =>
		accu (1, lvar, typ, CT.EX (CT.W32TOINT, [getVal v], 1), acc, c)
	    | CPS.ARITH (P.arith{oper=P.*,kind=P.INT 31},
			 [v,w], lvar, typ, c) =>
		accu (1, lvar, typ, gd (CT.MUL{signed=true}, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P.+,kind=P.INT 31},
			 [v,w], lvar, typ, c) =>
		accu (1, lvar, typ, gd (CT.ADD{signed=true}, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P.-,kind=P.INT 31},
			 [v,w], lvar, typ, c) =>
		accu (1, lvar, typ, gd (CT.SUB{signed=true}, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH (P.arith{oper=P./,kind=P.INT 31},
			 [v,w], lvar, typ, c) =>
		accu (4, lvar, typ, gd (CT.DIV{signed=true}, [getVal v, getVal w]), acc, c)
	    | CPS.ARITH _ => (PPCps.prcps cexp; error "seqFromCps.ARITH ")

	    | CPS.PURE (P.length, [v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.LENGTH, [getVal v], 1), acc, c)
	    | CPS.PURE (P.objlength, [v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.OBJLENGTH, [getVal v], 1), acc, c)
	    | CPS.PURE (P.makeref, [v], lvar, typ, c) => let
		val desc = T.make_desc (1, T.tag_array)
		val content = [(v,CPS.OFFp 0)]
		val acc' = mkRecord(false, desc, 4, CT.RSTORE, content, lvar, typ, acc)
	      in
		loop(acc', c)
	      end
	    | CPS.PURE (P.pure_arith{oper=P.rshift,kind}, [v,w], lvar, typ, c) =>
		let val shiftop = case kind of P.INT 31 => CT.RSHIFT
				        | P.UINT 31 => CT.RSHIFT
					| P.UINT 32 => CT.W32RSHIFT
					| _ => error "rshift"
		in
		  accu (0, lvar, typ, gd (shiftop, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.rshiftl,kind}, [v,w], lvar, typ, c) =>
		let val shiftop = case kind of P.INT 31 => CT.RSHIFTL
				        | P.UINT 31 => CT.RSHIFTL
					| P.UINT 32 => CT.W32RSHIFTL
					| _ => error "rshiftl"
		in
		  accu (0, lvar, typ, gd (shiftop, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.lshift,kind}, [v,w], lvar, typ, c) =>
		let val shiftop = case kind of P.INT 31 => CT.LSHIFT
					| P.UINT 31 => CT.LSHIFT
					| P.UINT 32 => CT.W32LSHIFT
					| _ => error "lshift"
		in
		  accu (0, lvar, typ, gd (shiftop, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.orb,kind}, [v,w], lvar, typ, c) =>
		let val orb = case kind of P.INT 31 => CT.ORB | P.UINT 31 => CT.ORB
					 | P.UINT 32 => CT.W32ORB | _ => error "orb"
		in
		    accu (0, lvar, typ, gd (orb, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.andb,kind}, [v,w], lvar, typ, c) =>
		let
		  val andb = case kind of P.INT 31 => CT.ANDB | P.UINT 31 => CT.ANDB
				        | P.UINT 32 => CT.W32ANDB
					| _ => error "andb"
		in
		    accu (0, lvar, typ, gd (andb, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.xorb,kind}, [v,w], lvar, typ, c) =>
		let
		  val xorb = case kind of P.INT 31 => CT.XORB | P.UINT 31 => CT.XORB
				        | P.UINT 32 => CT.W32XORB
					| _ => error "xorb"
		in
		    accu (0, lvar, typ, gd (xorb, [getVal v, getVal w]), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.notb,kind}, [v], lvar, typ, c) =>
		let
		  val notb = case kind of P.INT 31 => CT.NOTB | P.UINT 31 => CT.NOTB
				        | P.UINT 32 => CT.W32NOTB
					| _ => error "notb"
		in
		  accu (0, lvar, typ, CT.EX (notb, [getVal v], 1), acc, c)
		end
	    | CPS.PURE (P.pure_arith{oper=P.~,kind=P.FLOAT 64},
			[v], lvar, typ, c) =>
		accu (7, lvar, typ, CT.EX (CT.FNEG, [getVal v], 1), acc, c)
	    | CPS.PURE (P.pure_arith{oper=P.abs,kind=P.FLOAT 64},
			[v], lvar, typ, c) =>
		accu (7, lvar, typ, CT.EX (CT.FABS, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.INT 31,tokind=P.FLOAT 64},
			[v], lvar, typ, c) =>
		accu (7, lvar, typ, CT.EX (CT.REALOFINT, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.INT 31, tokind=P.UINT 32},
			[v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.W32FROMINT, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.UINT 31, tokind=P.UINT 32},
			[v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.W31TOW32, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.UINT 32, tokind=P.UINT 31},
			[v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.W31FROMW32, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.UINT 31, tokind=P.INT 31}, 
			[v], lvar, typ, c) =>
		accu(0, lvar, typ, CT.EX (CT.W31TOINT, [getVal v], 1), acc, c)
	    | CPS.PURE (P.real{fromkind=P.INT 31, tokind=P.UINT 31}, 
			[v], lvar, typ, c) =>
		accu(0, lvar, typ, CT.EX (CT.W31FROMINT, [getVal v], 1), acc, c)
	    | CPS.PURE (P.subscriptv, [v,w], lvar, typ, c) =>
		accu (0, lvar, typ, gd (CT.SUBSCRIPTV,[getVal v, getVal w]), acc, c)
	    | CPS.PURE (P.gettag, [v], lvar, typ, c) =>
		accu (0, lvar, typ, CT.EX (CT.GETTAG, [getVal v], 1), acc, c)
	    | CPS.PURE (P.pure_numsubscript {kind=P.INT 8},
			[v,w], lvar, typ, c) =>
		accu (2, lvar, typ, gd (CT.ORDOF, [getVal v, getVal w]), acc, c)
	    | CPS.PURE (P.mkspecial, [v,w], lvar, typ, c) =>
		accu (12, lvar, typ, gd (CT.MKSPECIAL,[getVal v, getVal w]), acc, c)
	    | CPS.PURE (P.wrap, [u], lvar, typ, c) => 
	        accu (0, lvar, typ, CT.EX(CT.WRAP,[],0), acc, c)
	    | CPS.PURE (P.unwrap,[u],lvar,typ,c) => 
		accu (0, lvar, typ, CT.EX(CT.UNWRAP,[],0), acc, c)
	    | CPS.PURE (P.cast,[u],lvar,typ,c)   => 
		accu (0, lvar, typ, CT.EX(CT.CAST,[],0), acc, c)
	    | CPS.PURE (P.getcon,[u],w,t,e) => loop(acc,CPS.SELECT(0,u,w,t,e))
	    | CPS.PURE (P.getexn,[u],w,t,e) => loop(acc,CPS.SELECT(0,u,w,t,e))
	    | CPS.PURE (P.fwrap,[u],w,_,e)  => 
		  loop(acc,CPS.RECORD(Access.RK_FBLOCK,[(u,CPS.OFFp 0)],w,e))
	    | CPS.PURE (P.funwrap,[u],w,_,e) => loop(acc,CPS.SELECT(0,u,w,CPS.FLTt,e))
	    | CPS.PURE (P.iwrap,_,_,_,_)  => error "iwrap not implemented"
	    | CPS.PURE (P.iunwrap,_,_,_,_)  => error "iunwrap not implemented"
	    | CPS.PURE (P.i32wrap,[u],w,_,e)  =>
		  loop(acc,CPS.RECORD(Access.RK_I32BLOCK,[(u,CPS.OFFp 0)],w,e))
	    | CPS.PURE (P.i32unwrap,[u],w,_,e)  =>
		  loop(acc,CPS.SELECT(0,u,w,CPS.INT32t,e))

	    | CPS.PURE (P.pure_arith{oper, kind}, [v,w], lvar, typ, c) => let
	        val unsigned = {signed=false}
		val ctOp = 
		  (case (kind, oper)
		    of (P.UINT 32, P.+)  => CT.W32ADD
	  	     | (P.UINT 32, P.-)  => CT.W32SUB
		     | (P.UINT 32, P.* ) => CT.W32MUL
		     | (P.UINT 32, P./)  => CT.W32DIV
		     | (P.UINT 31, P.+)  => CT.ADD unsigned
		     | (P.UINT 31, P.-)  => CT.SUB unsigned
		     | (P.UINT 31, P.* ) => CT.MUL unsigned
		     | (P.UINT 31, P./)  => CT.DIV unsigned
		     | _ => (PPCps.prcps cexp; error "seqFromCps.PURE(1)")
		   (*esac*))
	      in
		accu(0, lvar, typ, gd(ctOp, [getVal v, getVal w]), acc, c)
	      end
	    | CPS.PURE _ => (PPCps.prcps cexp; error "seqFromCps.PURE(2)")

	    | CPS.APP (CPS.VAR f, args) => let
                  val cty = map grabty args
		  val expr = mkApp(args,cty)
		  val appvar = genVar ()
		in
		  ((5, appvar, CT.EX (CT.APP_VAR(f,cty), [expr], 1)) :: acc,
		   appvar)
		end
	    | CPS.APP (CPS.LABEL l, args) => let
		  val cty = map grabty args 
		  val expr = mkApp(args,cty)
		  val appvar = genVar ()
		in
		  ((5, appvar, CT.EX (CT.APP_LABEL(l,cty), [expr], 1)) :: acc,
		   appvar)
		end

	    | CPS.SWITCH (v,_,clist) =>
		let
		  val child = [getVal v]	 (* must be evaluated first, *)
		  val (lstvar, brs, newacc) = childseq (clist,acc)
		in
		  ((5, lstvar, CT.EX (CT.SWITCH brs, child, 1)) :: newacc,
		   lstvar)
		end

	    | CPS.BRANCH (P.boxed, [v], _, c1, c2) =>
		dobrboxed (v,c1,c2,acc)
	    | CPS.BRANCH (P.unboxed, [v], _, c1, c2) =>
		dobrboxed (v,c2,c1,acc)
	    | p as CPS.BRANCH (P.cmp cmp, [v1,v2], _, c1, c2) =>
		let
		    val dobranch = case cmp of
		            {kind=P.INT 31, ...} => dobrint
		          | {kind=P.UINT 32, ...} => dobrword32
			  | {kind=P.UINT 31, ...} => dobrword32
			  | {kind=P.FLOAT 64, ...} => dobrfloat
			  | {kind, ...} =>
				(PPCps.prcps p; error "dobranch ")
		    val (br,c1',c2') = case #oper cmp of
			    P.<   => (CT.B_LT,c1,c2)
			  | P.<=  => (CT.B_LE,c1,c2)
			  | P.>   => (CT.B_LE,c2,c1)
			  | P.>=  => (CT.B_LT,c2,c1)
			  | P.ltu => (CT.B_LTU,c1,c2)
			  | P.leu => (CT.B_LEU,c1,c2)
			  | P.gtu => (CT.B_LEU,c2,c1)
			  | P.geu => (CT.B_LTU,c2,c1)
			  | P.eql => (CT.B_EQ,c1,c2)
			  | P.neq => (CT.B_EQ,c2,c1)
		in
		    dobranch (br,v1,v2,c1',c2',acc)
		end
	    | CPS.BRANCH (P.peql, [v1,v2], _, c1, c2) =>
		dobrint (CT.B_EQ,v1,v2,c1,c2,acc)
	    | CPS.BRANCH (P.pneq, [v1,v2], _, c1, c2) =>
		dobrint (CT.B_EQ,v1,v2,c2,c1,acc)
	    | CPS.BRANCH (P.streq,[n,v,w],c,d,e) => let
		val children = [getVal n, getVal v, getVal w]
		val (lstvar, [b1,b2], newacc) = childseq ([d,e],acc)
	      in
		((5, lstvar, gd (CT.BR_STRING (b1,b2), children)):: newacc, 
		 lstvar)
	      end
	    | CPS.BRANCH (P.strneq,[n,v,w],c,d,e) =>
		  loop(acc,CPS.BRANCH(P.streq,[n,v,w],c,e,d))
	    | CPS.BRANCH _ => error "seqFromCps.BRANCH"
	    | _ => error "seqFromCps.FIX"
	in
	  loop (origacc,cexp)
	end (* fun seqFromCps *)


      (********************************************************************)

      (*
       * find_info builds usesarray and infoarray.
       * From there on, usesarray and infoarray are consistent and can be
       * used without further optimizations.
       *)

      fun find_info (seq, usesarray, infoarray) =
	let
	  fun recur (CT.EX (base, children, _)) =
	    (app recur children;
	     case base of
	       CT.VAR v =>
		 (
		  Array.update (usesarray, v, Array.sub (usesarray, v)+1)
		  handle zap => (print ("handle.find_info"
					^(makestring v)^"\n"); raise zap)
		    )
	     | _ => ()
		 )
	  fun doelem (deptype,var,expr) =
	    (recur expr;
	     Array.update (infoarray, var, {ctree=expr, deptype=deptype})
	     )
	in
	  app doelem seq
	end



      (********************************************************************)



      (*
       * optallocptr removes some incrementations of the allocptr by
       * grouping them together.  Of course, we have to modify the offsets
       * of all accesses to allocptr between.
       *)

      (*
       * Currently, the optimization stops at each branch.
       *)

      (* Note:
       * This optimization has been turned off because it does not
       * interact well with aligned floating point records. 
       * If the allocation pointer is guaranteed to be aligned
       * at the beginning of every function, then it will be easy to enable
       * this optimization.
       *)

      fun optallocptr (fstvar, nbvars, infoarray : CTree.ctreeInfo) =
	let
	  fun scan (v, orig, acc) =
	    if v>=nbvars then () else
	      let
		val {deptype,ctree} = Array.sub (infoarray, v)
		  handle zap => (print ("handle.scan"
					^(makestring v)^"\n"); raise zap)
	      in
		case deptype of
		  1 => search (v+1) (* raises overflow *)
		| 4 => search (v+1) (* raises overflow/divzero *)
		| 5 => search (v+1) (* flow control *)
		| 7 => search (v+1) (* returns freg *)
		| 8 => search (v+1) (* returns freg + raises overflow *)
		| 9 => search (v+1) (* returns freg + raises overflow/divzero *)
		| 10 => search (v+1) (* returns freg + looker *)
		| 11 => (* RSTORE/RSTOREDESC/RSET : uses allocptr *)
		    scan (v+1, orig, v::acc)
		| 13 => (* RSTORESTORE : uses allocptr and uses storeptr *)
		    scan (v+1, orig, v::acc)
		| 14 => (* RSETSTORE : uses allocptr and updates storeptr *)
		    scan (v+1, orig, v::acc)
		| 12 =>	(* RINC/MKSPECIAL : uses & updates allocptr *)
		    (case ctree of
		       (CT.EX (CT.RINC _,_,_)) =>
			 let
			   val {ctree=ex,...} = Array.sub (infoarray, orig)
			     handle zap => 
			       (print ("handle.scan2"
				       ^(makestring orig)^"\n"); raise zap)
			   val _ = Array.update (infoarray, orig, noinfo);
			   val (CT.EX (CT.RINC delta,_,_)) = ex
			   fun patch [] = ()
			     | patch (x::r) = let
				 val {ctree=e,deptype} = Array.sub (infoarray,x)
				   handle zap => 
				     (print ("handle.scan3"
					     ^(makestring x)^"\n"); raise zap)
				 val newe = case e of
				   CT.EX (CT.RSTORE (i,p),c,ord) =>
				     CT.EX (CT.RSTORE (i+delta,p),c,ord)
				 | CT.EX (CT.RSTOREDESC (b,i,d),c,ord) =>
				     CT.EX (CT.RSTOREDESC (b,i+delta,d),c,ord)
				 | CT.EX (CT.RSTORESTORE i,c,ord) =>
				     CT.EX (CT.RSTORESTORE (i+delta),c,ord)
				 | CT.EX (CT.RSET i,c,ord) =>
				     CT.EX (CT.RSET (i+delta),c,ord)
				 | CT.EX (CT.RSETSTORE i,c,ord) =>
				     CT.EX (CT.RSETSTORE (i+delta),c,ord)
				 | CT.EX (CT.RINC i,c,ord) =>
				     CT.EX (CT.RINC (i+delta),c,ord)
				 | _ => error "optallocptr.patch"
			       in
				 Array.update (infoarray, x,
					       {ctree=newe,deptype=deptype});
				 patch r
			       end
			 in
			   patch (v::acc);
			   scan (v+1, v, [])
			 end
		     | _ => search (v+1)
			 )
		| _ => scan (v+1, orig, acc)
	      end
	  and search v =
	    if v>=nbvars then () else
	      let
		val {ctree,...} = Array.sub (infoarray, v)
		  handle zap => (print ("handle.search"
					^(makestring v)^"\n"); raise zap)
	      in
		case ctree of
		  CT.EX (CT.RINC _,_,_) =>
		    scan (v+1, v, [])
		| _ =>
		    search (v+1)
	      end
	in
	  search fstvar
	end



      (********************************************************************)



      (* dependance (var, expr) : returns true if var is referenced in expr *)
      fun dependance (var, expr:CTree.expr) =
	let
	  fun dep (CT.EX (CT.VAR v,children,_)) =
	    v=var orelse List.exists dep children
	    | dep (CT.EX (_,children,_)) = List.exists dep children
	in
	  dep expr
	end


      fun treeify (fstvar, nbvars, usesarray, infoarray) =
	let
	  fun buildtree (buildvar, expr) =
	    let
	      fun build (expr as (CT.EX (base,children,order))) =
		case base of
		  CT.VAR v =>
		    let
		      val {ctree=dexpr, deptype} = Array.sub (infoarray, v)
			handle zap => (print ("handle.build"
					      ^(makestring v)^"\n"); raise zap)
		      val nbuses = Array.sub (usesarray, v)
			handle zap => (print ("handle.build2"
					      ^(makestring v)^"\n"); raise zap)
		      fun testswap cur =
			if cur = buildvar then
			  true
			else
			  let
			    fun testnext [] = error "treeify.testnext, nothing"
			      | testnext [(f,_)] = testswap f
			      | testnext ((f1,_)::(seql as ((f2,_)::_))) =
				if f2>buildvar then
				  testswap f1
				else
				  testnext seql
			    val {ctree=ex as (CT.EX (base,_,_)), deptype=dep} =
			      Array.sub (infoarray, cur)
			      handle zap => (print ("handle.testswap"
						    ^(makestring cur)^"\n"); raise zap)
			  in
			    if (canswap (deptype,dep) andalso
				not (dependance (v, ex))) then
			      case base of
				CT.SWITCH seql => testnext seql
			      | CT.BR_BOXED (s1, s2) => testnext [s1,s2]
			      | CT.BR_INT (_, s1, s2) => testnext [s1,s2]
			      | CT.BR_WORD32 (_, s1, s2) => testnext [s1,s2]
			      | CT.BR_FLOAT (_, s1, s2) => testnext [s1,s2]
			      | CT.BR_STRING (s1,s2) => testnext [s1,s2]
			      | _ => testswap (cur+1)
			    else
			      false
			  end
		    in
		      if (nbuses = 1 andalso
			  deptype<>6 andalso
			  testswap (v+1)) then
			(Array.update (infoarray, v, noinfo);	(* remove it *)
			 build dexpr				  (* recurse *)
			 )
		      else
			expr (* can't move, so keep a CT.VAR *)
		    end
		| _ =>
		    let
		      (* in the REVERSE of the order of evaluation *)
		      val newchildren = revevalordermap (build,order,children)
		    in
		      CT.EX (base,newchildren,order)
		    end
	    in
	      build expr
	    end

	  fun do_next (var) =
	    if var<fstvar then () else
	      let
		val {ctree,deptype} = Array.sub (infoarray, var)
		  handle zap => (print ("handle.do_next"
					^(makestring var)^"\n"); raise zap)
	      in
		if deptype = ~1 orelse deptype = 6 then
		  ()
		else
		  Array.update (infoarray, var, {ctree=buildtree (var,ctree),
						 deptype=deptype});
		  do_next (var-1)
	      end
	in
	  do_next (nbvars-1)
	end



      (********************************************************************)


      val varfmls = map newVar vl				 (* starts at 0 *)
      val fstvar = !curvar
      val (expl,lastvar) = seqFromCps ([],cpsexpr)
      val nbvars = !curvar
      val usesarray = Array.array (nbvars, 0)
      (* we want the variables that are not in expl, i.e, the
         parameters to the function, to be unmoveable. *)
      val infoarray = Array.array (nbvars, unmoveable)
    in
      (* now compute the uses *)
      find_info (expl, usesarray, infoarray);
      curinfoarray := infoarray;
      printSeq ("before: ",(fstvar,lastvar),!curinfoarray);

      if Word.andb (Word.fromInt(!Control.CG.misc4), 0w256) = 0w0 then () else
	(optallocptr (fstvar, nbvars, infoarray);
	 curinfoarray := infoarray;
	 printSeq ("optallocptr: ",(fstvar,lastvar),!curinfoarray)
	);

      if Word.andb (Word.fromInt(!Control.CG.misc4), 0w512) <> 0w0 then () else
	(treeify (fstvar, nbvars, usesarray, infoarray);
	 curinfoarray := infoarray;
	 printSeq ("treeification: ",(fstvar,lastvar),!curinfoarray)
	);

      (((fstvar,lastvar), infoarray), varfmls, !fragList)
    end

  val ctreeify = makeFunseq

end (* structure CTreeify *)
