(* alpha32.sml 
 *
 * COPYRIGHT (c) 1995 AT&T Bell Laboratories.
 *
 * generates machine code from the mltree.
 *)

functor Alpha32
  (structure Flowgen : FLOWGRAPHgen
     sharing Flowgen.I = Alpha32Instr) : MLTREECOMP = 
struct
  structure Flowgraph = Flowgen.F
  structure F = Flowgen
  structure T = MLTree
  structure I = Alpha32Instr
  structure C = Alpha32Cells
  structure M = Alpha32MCProps

  (*********************************************************

       Trap Shadows, Floating Exceptions, and Denormalized
	Numbers on the DEC Alpha

		Andrew W. Appel and Lal George
		  Nov 28, 1995

  See section 4.7.5.1 of the Alpha Architecture Reference Manual.

  The Alpha has imprecise exceptions, meaning that if a floating
  point instruction raises an IEEE exception, the exception may
  not interrupt the processor until several successive instructions have
  completed.  ML, on the other hand, may want a "precise" model
  of floating point exceptions.

  Furthermore, the Alpha hardware does not support denormalized numbers
  (for "gradual underflow").  Instead, underflow always rounds to zero.
  However, each floating operation (add, mult, etc.) has a trapping
  variant that will raise an exception (imprecisely, of course) on
  underflow; in that case, the instruction will produce a zero result
  AND an exception will occur.  In fact, there are several variants
  of each instruction; three variants of MULT are:

  MULT  s1,s2,d       truncate denormalized result to zero; no exception
  MULT/U  s1,s2,d     truncate denormalized result to zero; raise UNDERFLOW
  MULT/SU  s1,s2,d    software completion, producing denormalized result

  The hardware treats the MULT/U and MULT/SU instructions identically,
  truncating a denormalized result to zero and raising the UNDERFLOW
  exception.  But the operating system, on an UNDERFLOW exception,
  examines the faulting instruction to see if it's an /SU form, and if so,
  recalculates s1*s2, puts the right answer in d, and continues,
  all without invoking the user's signal handler.

  Because most machines compute with denormalized numbers in hardware,
  to maximize portability of SML programs, we use the MULT/SU form.
  (and ADD/SU, SUB/SU, etc.)  But to use this form successfully,
  certain rules have to be followed.  Basically, d cannot be the same
  register as s1 or s2, because the opsys needs to be able to 
  recalculate the operation using the original contents of s1 and s2,
  and the MULT/SU instruction will overwrite d even if it traps.

  More generally, we may want to have a sequence of floating-point
  instructions.  The rules for such a sequence are:

  1. The sequence should end with a TRAPB (trap barrier) instruction.
     (This could be relaxed somewhat, but certainly a TRAPB would
      be a good idea sometime before the next branch instruction or
      update of an ML reference variable, or any other ML side effect.)
  2. No instruction in the sequence should destroy any operand of itself
     or of any previous instruction in the sequence.
  3. No two instructions in the sequence should write the same destination
     register.

  We can achieve these conditions by the following trick in the
  Alpha code generator.  Each instruction in the sequence will write
  to a different temporary; this is guaranteed by the translation from
  ML-RISC.  At the beginning of the sequence, we will put a special
  pseudo-instruction (we call it DEFFREG) that "defines" the destination
  register of the arithmetic instruction.  If there are K arithmetic
  instructions in the sequence, then we'll insert K DEFFREG instructions
  all at the beginning of the sequence.
  Then, each arithop will not only "define" its destination temporary
  but will "use" it as well.  When all these instructions are fed to
  the liveness analyzer, the resulting interference graph will then
  have inteference edges satisfying conditions 2 and 3 above.

  Of course, DEFFREG doesn't actually generate any code.  In our model
  of the Alpha, every instruction generates exactly 4 bytes of code
  except the "span-dependent" ones.  Therefore, we'll specify DEFFREG
  as a span-dependent instruction whose minimum and maximum sizes are zero.

  At the moment, we do not group arithmetic operations into sequences;
  that is, each arithop will be preceded by a single DEFFREG and
  followed by a TRAPB.  To avoid the cost of all those TRAPB's, we
  should improve this when we have time.  Warning:  Don't put more 
  than 31 instructions in the sequence, because they're all required
  to write to different destination registers!  

  What about multiple traps?  For example, suppose a sequence of
  instructions produces an Overflow and  a Divide-by-Zero exception?
  ML would like to know only about the earliest trap, but the hardware
  will report BOTH traps to the operating system.  However, as long
  as the rules above are followed (and the software-completion versions
  of the arithmetic instructions are used), the operating system will
  have enough information to know which instruction produced the
  trap.  It is very probable that the operating system will report ONLY
  the earlier trap to the user process, but I'm not sure.

  For a hint about what the operating system is doing in its own
  trap-handler (with software completion), see section 6.3.2 of
  "OpenVMS Alpha Software" (Part II of the Alpha Architecture
  Manual).  This stuff should apply to Unix (OSF1) as well as VMS.

  ****************************************************************)

  fun error msg = ErrorMsg.impossible ("Alpha32." ^ msg)

  val itow = Word.fromInt
  val wtoi = Word.toInt

  val emit = F.emitInstr

  fun newReg () = C.newReg()
  fun newFreg() = C.newFreg()

  fun cond T.LT  = I.CC_LT
    | cond T.LTU = I.CC_LTU
    | cond T.LE  = I.CC_LE
    | cond T.LEU = I.CC_LEU
    | cond T.EQ  = I.CC_EQ
    | cond T.GEU = I.CC_GEU
    | cond T.NEQ = I.CC_NEQ

  fun swapcc I.CC_LT  = I.CC_GT
    | swapcc I.CC_LTU = I.CC_GTU
    | swapcc I.CC_LE  = I.CC_GE
    | swapcc I.CC_LEU = I.CC_GEU
    | swapcc I.CC_EQ  = I.CC_EQ
    | swapcc I.CC_GEU = I.CC_LEU
    | swapcc I.CC_NEQ = I.CC_NEQ
    | swapcc _ = error "swapcc"	


  fun loadImmed (n, rd) = let
      val low = wtoi(Word.andb(itow n, 0w65535))(* unsigned low 16 bits *)
      val high = n div 65536
      val (lowsgn, highsgn) =			(* Sign-extend *)
         if low <= 32767 then (low, high) else (low -65536, high+1)
  in
    emit(I.LDA(rd, C.zeroR, I.IMMop lowsgn));
    if highsgn = 0 then () else emit(I.LDAH(rd, rd, I.IMMop highsgn))
  end
	   
  fun loadImmed32 (n, rd) =
    let
      open Word32
      val low = andb(n, 0w65535)		(* unsigned low 16 bits *)
      val high = n div 0w65536
      val (lowsgn, highsgn) = 		(* Sign-extend *)
         if low <= 0w32767 then (low, high)
                          else (low-0w65536, high+0w1)
      val highsgn' = andb(highsgn, 0w65535)
    in
      emit(I.LDA(rd, C.zeroR, I.IMMop (toInt lowsgn)));
      
      if highsgn' = 0w0
	  then ()
      else if highsgn' < 0w32768
	  then emit(I.LDAH(rd, rd, I.IMMop (toInt highsgn)))
      else (* highsgn >= 0w2768 *)
          (emit(I.LDAH(rd, rd, I.IMMop (toIntX (highsgn - 0w65536))));
	   emit(I.ZAP(rd,I.IMMop 0xf0,rd)))
    end
    
  fun orderedFArith (exp1, exp2, T.LR) = (fregAction exp1, fregAction exp2)
    | orderedFArith (exp1, exp2, T.RL) = let
        val f2 = fregAction exp2
      in
	(fregAction exp1, f2)
      end

  and stmAction exp = let
    fun jump(exp, labs) = emit(I.JMPL((C.asmTmpR, (regAction exp, 0)), labs))

    fun fbranch(cc, exp1, exp2, lab, order) = let
      val (f1, f2) = orderedFArith(exp1, exp2, order)
      val tmpR = newFreg()
    in
      emit(I.DEFFREG(tmpR));
      case (cond cc)
       of I.CC_LT => emit(I.CMPTLT(f1, f2, tmpR))
	| I.CC_LE => emit(I.CMPTLE(f1, f2, tmpR))
	| I.CC_EQ => emit(I.CMPTEQ(f1, f2, tmpR))
	| _ => error "fbranch"
      (*esac*);
      emit(I.TRAPB);
      emit(I.FBNE(tmpR, lab))
    end

    fun branch(cond, exp1, exp2, lab, order) = let
	fun zapHi r = emit(I.ZAP(r, I.IMMop 0xf0, r))
	val tmpR = newReg()
	val (r1, o2) = 
	  case order 
	   of T.LR => (regAction exp1, opndAction exp2)
	    | T.RL => let val o2' = opndAction exp2
		in
		  (regAction(exp1), o2')
		end
	fun emitBr(cmp, br) = (emit(cmp(r1, o2, tmpR)); emit(br (tmpR, lab)))
	fun emitUnsignedBr(cmp, br) = 
	   (case (r1, o2)
	     of (r1, I.REGop r2) => (zapHi r1; zapHi r2; emitBr(cmp, br))
	      | (r1, o2) => (zapHi r1; emitBr(cmp, br))
	   (*esac*))
    in
	case cond 
	 of I.CC_LTU => emitUnsignedBr(I.CMPULT, I.BNE)
	  | I.CC_LEU => emitUnsignedBr(I.CMPULE, I.BNE)
	  | I.CC_GTU => emitUnsignedBr(I.CMPULE, I.BEQ)
	  | I.CC_GEU => emitUnsignedBr(I.CMPULT, I.BEQ)
	  | I.CC_LT  => emitBr(I.CMPLT,  I.BNE)
	  | I.CC_LE  => emitBr(I.CMPLE,  I.BNE)
	  | I.CC_GT  => emitBr(I.CMPLE,  I.BEQ)
	  | I.CC_GE  => emitBr(I.CMPLT,  I.BEQ)
	  | I.CC_EQ  => emitBr(I.CMPEQ,  I.BNE)
	  | I.CC_NEQ => emitBr(I.CMPEQ,  I.BEQ)
    end
  in
    case exp
     of T.BR lab => emit(I.BR(C.zeroR, lab))
      | T.JMP exp => jump(exp, [])
      | T.GOTO(exp, labs) => jump(exp, labs)
      | T.BCC(T.NEQ, T.ANDB(exp, T.LI 1), T.LI 0, lab, ord) => 
	  emit(I.BLBS(regAction exp, lab))
      | T.BCC(cc, exp, T.LI n, lab, ord) => 
          branch(cond cc, exp, T.LI n, lab, ord)
      | T.BCC(cc, T.LI n, exp, lab, ord) => 
          branch(swapcc(cond cc), exp, T.LI n, lab, ord)
      | T.BCC(cc, e1, e2, lab, ord) => branch(cond cc, e1, e2, lab, ord)
      | T.FBCC arg  => fbranch arg
      | T.CALL(exp, def, use) => let
	  fun addList([],acc) = acc
	    | addList(T.REG r::regs,acc) = addList(regs, C.addReg(r,acc))
	    | addList(T.FREG f::regs,acc) = addList(regs, C.addFreg(f,acc))
        in
	  emit(I.JSR((C.gclinkregR,(regAction exp,0)),
		     addList(def,C.empty),
		     addList(use,C.empty)))
        end
      | T.BARRIER _ => emit(I.TRAPB)
      | T.TESTLIMIT(e1, e2) => 
	 emit(I.SUBQ(regAction e1, I.REGop(regAction e2), C.exhaustedR))
      | T.CHECKLIMIT lab => emit(I.BGE(C.exhaustedR,lab))
      | T.STORE8(ea, r) => let
	  val rs = regAction r
	  val (rd, disp) = eaAction ea
	  val t1 = newReg()
	  val t2 = newReg()
	  val t3 = newReg()
        in
	  emit(I.LDQ_U(t1, (rd, disp)));
	  emit(I.LDA(t2, rd, I.IMMop disp));
	  emit(I.INSBL(rs, I.REGop(t2), t3));
	  emit(I.MSKBL(t1, I.REGop(t2), t1));
	  emit(I.BIS(t1, I.REGop(t3), t1));
	  emit(I.STQ_U(t1, (rd, disp)))
        end
      | T.STORE32(ea, r) => emit(I.STL(regAction r, eaAction ea))
      | T.STORED(ea, f) => emit(I.STT(fregAction f, eaAction ea))
      | T.MV(dst, exp) => let
	  val rd = C.regMap dst
	  val rs = case exp of T.REG _ => C.regMap exp | _ => regActionRd(exp, rd)
        in
	  if rs = rd then () else emit(I.BIS(rs, I.REGop C.zeroR, rd))
	end
      | T.FMV(fdst, exp) => let
	  val fd = C.fregMap fdst
	  val fs = 
	    case exp of T.FREG _ => C.fregMap exp | _ => fregActionFd(exp, fd)
	in
	  if fs = fd then () else emit(I.CPYS(fs, fs, fd))
	end
      | T.LBASE(exp, lab) => 
	  emit(I.LADDR(regAction exp,
		       I.NEGLAB(lab,M.constBaseRegOffset),
		       C.baseptrR))

      | T.LDREGMASK regs => let
	  val regs' = map C.regMap regs
	  fun hasPseudoRegs [] = false
	    | hasPseudoRegs (T.REG n ::regs) = n > 31 orelse hasPseudoRegs regs
	    | hasPseudoRegs (_::regs) = hasPseudoRegs regs
	  fun loadregmask () =
	    (emit(I.LDA(C.maskRegR, C.zeroR, I.LOMASKop regs'));
	     emit(I.LDAH(C.maskRegR, C.maskRegR, I.HIMASKop regs')))
	in
	  if hasPseudoRegs regs then
	    loadregmask()
	  else
	    loadImmed (Alpha32Mask.regmask regs', C.maskRegR);
	  ()
	end
      | T.SEQ(exp1, exp2) => (stmAction exp1; stmAction exp2)
      | T.REG r => ()
      | _ => (T.prTree (T.CODE exp);
	      error("stmAction: missing rules"))
  end

  and opndAction (T.LI value) =				
      if value <= 255 andalso value >= 0 then I.IMMop value
      else let
	  val tmpR = newReg()
        in
	  loadImmed (value, tmpR);
	  I.REGop tmpR
        end
    | opndAction(T.LI32 value) =
      if Word32.<=(value, 0w255) then I.IMMop (Word32.toInt value)
      else let 
	  val tmpR = newReg () 
	in
	  loadImmed32 (value, tmpR);
	  I.REGop tmpR
	end
    | opndAction exp = I.REGop (regAction exp)

  and regAction (r as T.REG _) = C.regMap r
    | regAction exp = regActionRd(exp, newReg())

  and regActionRd(exp, rd) = let
    fun orderedRR(e1, e2, T.LR) = (regAction e1, regAction e2)
      | orderedRR(e1, e2, T.RL) = let
          val r2 = regAction e2
        in
	  (regAction e1, r2)
        end

    fun orderedArith(arith, e1, e2, ord) = let
      val (reg, opnd) = 
	 case ord
	  of T.LR => (regAction e1, opndAction e2)
	   | T.RL => let 
	       val opnd = opndAction e2
	     in
	       (regAction e1, opnd)
	     end
    in
      emit(arith(reg, opnd, rd));
      rd
    end

    fun commOrderedArith(arith, e1 ,e2, ord) = let
      fun f(e1 as T.LI _, e2) = orderedArith(arith, e2, e1, ord)
	| f(e1 as T.LI32 _, e2) = orderedArith(arith, e2, e1, ord)
	| f(e1, e2) = orderedArith(arith, e1, e2, ord)
    in
      f(e1, e2)
    end
      

    fun orderedArithTrap arg = orderedArith arg before emit(I.TRAPB)
    fun commOrderedArithTrap arg = commOrderedArith arg before emit(I.TRAPB)

    fun orderedMullTrap (e1, e2, ord, rd) = let
      val (reg, opnd) = case ord
	of T.LR => (regAction e1, opndAction e2)
         | T.RL => let
	     val opnd = opndAction e2
           in
	     (regAction e1, opnd)
	   end

      fun emitMulvImmed (reg, 0, rd) = emit(I.LDA(rd, C.zeroR, I.IMMop 0))
	| emitMulvImmed (reg, 1, rd) = emit(I.ADDL(reg, I.REGop C.zeroR, rd))
	| emitMulvImmed (reg, multiplier, rd) = let
	    fun log2 0w1 = 0 | log2 n = 1 + (log2 (Word.>> (n, 0w1)))

	    fun exp2 n = Word.<<(0w1, n)

	    fun bitIsSet (x,n) = Word.andb(x,exp2 n) <> 0w0

	    fun loop (~1) = ()
	      | loop n =
		  (if bitIsSet(itow multiplier, itow n) then
		     emit(I.ADDLV(reg, I.REGop rd, rd))
		   else ();
		   if n>0 then emit(I.ADDLV(rd, I.REGop rd, rd)) else ();
		   loop (n-1))
	  in
	    emit(I.ADDLV(reg, I.REGop reg, rd));
	    loop ((log2 (itow multiplier)) - 1)
	  end
    in
      case opnd 
       of (I.IMMop multiplier) => emitMulvImmed (reg, multiplier, rd)
	| _ => emit (I.MULLV (reg, opnd, rd))
      (*esac*);
      emit(I.TRAPB);
      rd
    end
  in
    case exp
     of T.LI n              => (loadImmed(n, rd); rd)
      | T.LI32 w            => (loadImmed32(w, rd); rd)
      | T.ADD(e1, e2)       => commOrderedArith(I.ADDL, e1, e2, T.LR)
      | T.SUB(e1, e2, ord)  => orderedArith(I.SUBL, e1, e2, ord)
      | T.MULU(e1, e2)	    => commOrderedArith(I.MULL, e1, e2, T.LR)
      | T.ADDT(e1, e2)	    => commOrderedArithTrap(I.ADDLV, e1, e2, T.LR)
      | T.SUBT(e1, e2, ord) => orderedArithTrap(I.SUBLV, e1, e2, ord)
      | T.ANDB(e1, e2)	    => commOrderedArith(I.AND, e1, e2, T.LR)
      | T.ORB(e1, e2) 	    => commOrderedArith(I.BIS, e1, e2, T.LR)
      | T.XORB(e1, e2)	    => commOrderedArith(I.XOR, e1, e2, T.LR)
      | T.MULT(e1, e2) 	    => orderedMullTrap(e1, e2, T.LR, rd)
      | T.SLL(e1, e2, ord)  => orderedArith(I.SLL, e1, e2, T.LR)
      | T.SRA(e1, e2, ord)  => let
	    val (reg, opnd) = (regAction e1, opndAction e2)
          in
	    (* sign extend longword argument *)
	    emit(I.SGNXL (reg,reg)); 
	    emit(I.SRA(reg, opnd, rd));
	    rd
          end
      | T.SRL(e1, e2, ord)  => let
	  val (reg, opnd) = (regAction e1, opndAction e2)
        in
	  emit(I.ZAP(reg,I.IMMop 0xf0,reg));
	  emit(I.SRL(reg, opnd, rd));
	  rd
        end
      | T.DIVT arg => let
	  val (r1, r2) = orderedRR arg
	  val ftmp1 = newFreg() val ftmp2 = newFreg() val ftmp3 = newFreg()
	  val ftmp4 = newFreg() val ftmp5 = newFreg() val ftmp6 = newFreg()
        in
	  emit(I.STQ(r1, (C.stackptrR, M.tmpOffset1)));
	  emit(I.STQ(r2, (C.stackptrR, M.tmpOffset2)));
	  emit(I.LDT(ftmp1, (C.stackptrR, M.tmpOffset1)));
	  emit(I.LDT(ftmp2, (C.stackptrR, M.tmpOffset2)));
	  emit(I.CVTQT(C.zeroR, ftmp1, ftmp3));
	  emit(I.CVTQT(C.zeroR, ftmp2, ftmp4));
	  emit(I.DIVT(ftmp3, ftmp4, ftmp5));
	  emit(I.CVTTQ(C.zeroR, ftmp5, ftmp6));
	  emit(I.STT(ftmp6, (C.stackptrR, M.tmpOffset1)));
	  emit(I.LDQ(rd, (C.stackptrR, M.tmpOffset1)));
	  emit(I.TRAPB);
	  rd
        end

      | T.DIVU arg => let
	  val (r1, r2) = orderedRR arg
	  val ftmp1 = newFreg() val ftmp2 = newFreg() val ftmp3 = newFreg()
          val ftmp4 = newFreg() val ftmp5 = newFreg() val ftmp6 = newFreg()
        in
	  emit(I.ZAP(r1, I.IMMop 0xf0, r1));
	  emit(I.STQ(r1, (C.stackptrR, M.tmpOffset1)));
	  emit(I.ZAP(r2, I.IMMop 0xf0, r2));
	  emit(I.STQ(r2, (C.stackptrR, M.tmpOffset2)));
	  emit(I.LDT(ftmp1, (C.stackptrR, M.tmpOffset1)));
	  emit(I.LDT(ftmp2, (C.stackptrR, M.tmpOffset2)));
	  emit(I.CVTQT(C.zeroR, ftmp1, ftmp3));
	  emit(I.CVTQT(C.zeroR, ftmp2, ftmp4));
	  emit(I.DIVT(ftmp3, ftmp4, ftmp5));
	  emit(I.CVTTQ(C.zeroR, ftmp5, ftmp6));
	  emit(I.STT(ftmp6, (C.stackptrR, M.tmpOffset1)));
	  emit(I.LDQ(rd, (C.stackptrR, M.tmpOffset1)));
	  emit(I.TRAPB);
	  rd
	end
      | T.LOAD32 exp => (emit(I.LDL(rd, eaAction exp)); rd)
	(* Load and sign-extend a byte from a  non-aligned address  *)
      | T.LOAD8 exp => let
	  val tmpR0 = newReg()
	  val tmpR1 = newReg()
	  val (rt, disp) = eaAction exp
	in
	  emit(I.LDQ_U(tmpR0, (rt, disp)));
	  emit(I.LDA(tmpR1, rt, I.IMMop(disp)));
	  emit(I.EXTBL(tmpR0, I.REGop tmpR1, rd));
	  rd
	end
      | T.LADDR(lab, k) => 
          (emit(I.LADDR(C.baseptrR,
		       I.POSLAB(lab,k-M.constBaseRegOffset),
		       rd));
	   rd)
      | T.SEQ(e1, e2) => (stmAction e1; regAction e2)
      | _ => (T.prTree (T.CODE exp);
	      error "regAction")
  end (* regActionRd *)

  and eaAction exp = let
    fun makeEA(r, n) = 
      if ~32768 <= n andalso n <= 32767 then (r, n)
      else let
	  val tmpR = newReg()
	  val low = wtoi(Word.andb(itow n, 0w65535))(* unsigned low 16 bits *)
	  val high = n div 65536
	  val (lowsgn, highsgn) =			 (* Sign-extend *)
	    if low <= 32767 then (low, high) else (low -65536, high+1)
	in
	  (emit(I.LDAH(tmpR, r, I.IMMop(highsgn)));
	   (tmpR, lowsgn))
	end
  in
    case exp
     of T.ADD(exp, T.LI n)    => makeEA(regAction exp, n)
      | T.ADD(T.LI n, exp)    => makeEA(regAction exp, n)
      | T.SUB(exp, T.LI n, _) => makeEA(regAction exp, ~n)
      | exp                   => makeEA(regAction exp, 0)
  end (* eaAction *)

  and fregAction (f as T.FREG _) = C.fregMap f
    | fregAction exp = fregActionFd(exp, newFreg())

  and fregActionFd(exp, fd) = let
    (* macho comment goes here *)
    fun doFloatArith(farith, e1, e2, fd, order) = let
      val (f1, f2) = orderedFArith(e1, e2, order)
    in
      emit(I.DEFFREG fd);
      emit(farith(f1, f2, fd));
      emit(I.TRAPB);
      fd
    end
  in
    case exp 
     of T.FREG _	     => C.fregMap exp
      | T.FADDD(e1, e2)      => doFloatArith(I.ADDT, e1, e2, fd, T.LR)
      | T.FMULD(e1, e2)      => doFloatArith(I.MULT, e1, e2, fd, T.LR)
      | T.FSUBD(e1, e2, ord) => doFloatArith(I.SUBT, e1, e2, fd, ord)
      | T.FDIVD(e1, e2, ord) => doFloatArith(I.DIVT, e1, e2, fd, ord)
      | T.FABSD exp          => (emit(I.CPYS(C.zeroR, fregAction exp, fd)); fd)
      | T.FNEGD exp          => let
          val fs = fregAction exp 
	in
	  emit(I.CPYSN(fs, fs, fd));  fd
	end
      | T.CVTI2D exp         => 
         (emit(I.STQ(regAction exp, (C.stackptrR, M.tmpOffset1)));
	  emit(I.LDT(fd, (C.stackptrR, M.tmpOffset1)));
	  emit(I.CVTQT(C.zeroR, fd, fd)); 
	  fd)
      | T.LOADD exp	     => (emit(I.LDT(fd, eaAction exp)); fd)
      | T.SEQ(e1, e2)        => (stmAction e1; fregAction e2)
      | exp => error "fregAction"
  end

  fun mltreeComp mltree = let
    fun mltc(T.MARK)             = F.mark()
      | mltc(T.LABEL lab)        = F.defineLabel lab
      | mltc(T.JMPTABLE arg)     = F.jmpTable arg
      | mltc(T.REALCONST arg)    = F.mlRealConst arg
      | mltc(T.STRINGCONST arg)  = F.mlStringConst arg
      | mltc T.BEGINCLUSTER      = F.beginCluster()
      | mltc T.ENDCLUSTER        = F.endCluster[C.maxReg(),C.maxFreg()]
      | mltc(T.ESCAPEBLOCK regs) = F.exitBlock regs
      | mltc(T.CODE mlrisc)      = (stmAction mlrisc)
  in
    if Word.andb (itow(!Control.CG.misc4), 0w2048) = 0w0 then ()
    else T.prTree mltree;
    mltc mltree
  end 
end
