(* hppa.sml
 * 
 * COPYRIGHT (c) 1996 AT&T Bell Laboratories.
 *
 * generates machine code from the mltree.
 *)
functor Hppa
  (structure Flowgen : FLOWGRAPHgen
     sharing Flowgen.I = HppaInstruction) : MLTREECOMP = 
struct
  structure Flowgraph = Flowgen.F
  structure F = Flowgen
  structure T = MLTree
  structure I = HppaInstruction
  structure C = HppaCells
  structure M = struct
    val constBaseRegOffset = 8192
    (* runtime system dependent constants. *)
    val float64TmpOffset = 0				(* must be < 16 *)
    val float32TmpOffset = float64TmpOffset		(* must be < 16 *)
    val cvti2dOffset = ~4		

    val udivOffset = ~16				(* must be < 1024 *)
    val divOffset = ~20					(*    ditto	  *)
    val mulOffset = ~24					(*    ditto	  *)
  end

  fun error msg = ErrorMsg.impossible ("Alpha32." ^ msg)

  val itow = Word.fromInt
  val wtoi = Word.toInt

  val emit = F.emitInstr

  fun newReg () = C.newReg()
  fun newFreg() = C.newFreg()

  datatype ea = DISPea of int * int  | INDXea of int * int

  fun im5 n = n < 16 andalso n >= ~16
  fun im11 n = n < 1024 andalso n >= ~1024
  fun im14 n = n < 8192 andalso n >= ~8192

  fun split n = let
    val w = Word.fromInt(n)
  in
    (Word.toInt(Word.~>>(w, 0w11)), Word.toInt(Word.andb(w, 0wx7ff)))
  end
  val zeroR = 0
  fun emitMove(rs, rd) = emit(I.ARITH{a=I.OR,r1=rs, r2=zeroR, t=rd})

  fun loadImmedRd(n, rd) = 
    if im14 n then (emit(I.LDO{i=I.IMMED n, b=0, t=rd}); rd)
    else let
        val (hi, lo) = split n
	val tmpR = newReg()
      in
	emit(I.LDIL{i=I.IMMED hi, t=tmpR});
	emit(I.LDO{i=I.IMMED lo, b=tmpR, t=rd});
	rd
      end

  fun loadImmed n = loadImmedRd(n, newReg())

  fun loadWord32Rd(w, rd) = let
    val toInt = Word32.toInt
  in
    if Word32.<(w, 0w8192) then emit(I.LDO{i=I.IMMED(toInt w), b=0, t=rd})
    else let 
        val tmpR = newReg()
	val hi = Word32.~>>(w, 0w11)
	val lo = Word32.andb(w, 0wx7ff)
      in
	emit(I.LDIL{i=I.IMMED(toInt hi), t=tmpR});
	emit(I.LDO{i=I.IMMED(toInt lo), b=tmpR, t=rd})
      end;
    rd
  end
  fun loadWord32 w = loadWord32Rd(w, newReg())

  (* The call instructions put the return address in C.returnPtr *)
  fun call(go, defs, uses) = 
    emit(I.CALL{t=go, defs=C.addReg(C.returnPtr, defs), uses=uses});


  fun invokeMilliCode(offset, exp1, exp2, ord, t) = let
    fun addList([], set) = set
      | addList(r::regs, set) = addList(regs, C.addReg(r,set))
    val tmpR = newReg()
    val defs = C.addReg(C.milliret, C.empty)
    val uses = addList([C.milliarg1, C.milliarg2, tmpR], C.empty)
    val (r1, r2) = orderedRR(exp1, exp2, ord)
  in
    emitMove(r1, C.milliarg1);
    emitMove(r2, C.milliarg2);
    emit(I.ARITHI{ai=I.LDW, r=C.stackptrR, i= offset, t=tmpR});
    call(tmpR, defs, uses);
    emitMove(C.milliret, t);
    t
  end


  and orderedRR(exp1, exp2, T.LR) = (regAction exp1, regAction exp2)
    | orderedRR(exp1, exp2, T.RL) = let val r2 = regAction exp2
      in 
	(regAction exp1, r2)
      end
  
  and orderedFF(exp1, exp2, T.LR) = (fregAction exp1, fregAction exp2)
    | orderedFF(exp1, exp2, T.RL) = let val f2 = fregAction exp2
      in (fregAction exp1, f2)
      end

  and eaAction(T.ADD(exp, T.LI n)) = DISPea(regAction exp, n)
    | eaAction(T.ADD(T.LI n, exp)) = DISPea(regAction exp, n)
    | eaAction(T.ADD(exp1, exp2)) = INDXea(regAction exp1, regAction exp2)
    | eaAction(T.SUB(exp, T.LI n, _)) = DISPea(regAction exp, ~n)
    | eaAction exp = DISPea(regAction exp, 0)
    
  and stmAction exp = let
    fun store(ea, reg, instr) = let
      val (b, d) = 
	  case eaAction ea
	    of DISPea (bd as (base, disp)) => 
		 if im14 disp then bd 
		 else let 
		     val (hi21, lo11) = split disp
		     val tmpR1 = newReg()
		     val tmpR2 = newReg()
		   in
		     emit(I.LDIL{i=I.IMMED hi21, t=tmpR1});
		     emit(I.ARITH{a=I.ADD, r1=base, r2=tmpR1, t=tmpR2});
		     (tmpR2, lo11)
		   end
	     | INDXea(r1,r2) => let 
		 val t = newReg()
	       in
		 emit (I.ARITH {a=I.ADD, r1=r1, r2=r2, t=t});
		 (t, 0)
	       end
    in
      emit (I.STORE {st=instr, b=b, d=d, r=regAction reg})
    end

    fun fstore(ea, freg) = let
      val r = fregAction freg
    in
      case eaAction ea
	of DISPea(b,d) => 
	    if im5 d then
	      emit(I.FSTORE {fst=I.FSTDS, b=b, d=d, r=r})
	    else
	      emit(I.FSTOREX{fstx=I.FSTDX, b=b, x=loadImmed d, r=r})
	 | INDXea(b,x) => emit(I.FSTOREX{fstx=I.FSTDX, b=b, x=x, r=r})
    end

    fun branch(bc, r1, r2, t) = let
	val flab = Label.newLabel ""
	fun emitBranch(cmp, ic, r1, r2) = 
	  (emit(I.BCOND{cmp=cmp, bc=ic, r1=r1, r2=r2, t=t, f=flab});
	   F.defineLabel flab)
    in
      (case bc
	of T.LT  => emitBranch(I.COMBT, I.LT, r1, r2)
	 | T.LE  => emitBranch(I.COMBT, I.LE, r1, r2)
	 | T.EQ  => emitBranch(I.COMBT, I.EQ, r1, r2)
	 | T.LTU => emitBranch(I.COMBT, I.LTU, r1, r2)
	 | T.LEU => emitBranch(I.COMBT, I.LEU, r1, r2)
	 | T.GEU => emitBranch(I.COMBT, I.LEU, r2, r1)
	 | T.NEQ  => emitBranch(I.COMBF, I.EQ,  r1, r2)
      (*esac*))
    end
  in
    case exp
     of T.BR lab    => emit(I.B lab)
      | T.JMP exp   => emit(I.BV {b=regAction exp, x=zeroR, labs=[]})
      | T.GOTO(exp, labs) => emit(I.BV{b=regAction exp, x=zeroR, labs=labs})
      | T.BCC(cc, exp, T.LI n, lab, ord) => let
          val r = regAction exp
        in
	  if im5 n then let
	      val flab = Label.newLabel ""
	      fun emitBranch(cmpi, ic) = 
	        (emit(I.BCONDI{cmpi=cmpi, bc=ic, i=n, r2=r, t=lab, f=flab});
	         F.defineLabel flab)
	    in
	      case cc
	       of T.LT  => emitBranch(I.COMIBF, I.LE)
		| T.LE  => emitBranch(I.COMIBF, I.LT)
		| T.EQ  => emitBranch(I.COMIBT, I.EQ)
		| T.LTU => emitBranch(I.COMIBF, I.LEU)
		| T.LEU => emitBranch(I.COMIBF, I.LTU)
		| T.GEU => emitBranch(I.COMIBT, I.LEU)
		| T.NEQ  => emitBranch(I.COMIBF, I.EQ)
	    end
	  else 
	    branch(cc, r, loadImmed n, lab)
	end
      (* | T.BCC(cc, T.LI n, exp, lab, order) => *)
      | T.BCC(cc, exp1, exp2, lab, order) => let
	  val (r1, r2) = orderedRR(exp1, exp2, order)
        in
	  branch(cc, r1, r2, lab)
	end
      | T.FBCC(cc, exp1, exp2, lab, order) => let
	  fun fpcond T.LT = I.GE
	    | fpcond T.LE = I.GT
	    | fpcond T.EQ = I.NE
	    | fpcond T.NEQ = I.EQ
	    | fpcond _ = error "fcond"
	  val (f1,f2) = orderedFF(exp1, exp2, order)
	  val fallThrough = Label.newLabel ""
	in
	  emit(I.FCMP(fpcond cc, f1, f2));
	  emit(I.FTEST);
	  emit(I.FBCC{t=lab, f=fallThrough});
	  F.defineLabel fallThrough
	end
      | T.CALL(exp, def, use) => let 
	  fun addList([], acc) = acc
	    | addList(T.REG r::regs, acc) = addList(regs, C.addReg(r, acc))
	    | addList(T.FREG f::regs, acc) = addList(regs, C.addFreg(f, acc))
	    | addList(_::_, _) = error ""
	in
	  (* All called addresses must be byte-8 aligned *)
	  (* We could regard the call as a branch instruction whose
	   * sole branchtarget is the FALLTRHOUGH
	   *)
	  call(regAction exp, addList(def, C.empty), addList(use,C.empty))
	end

      | T.TESTLIMIT(exp1, exp2) => let
	  val (r1, r2) = orderedRR(exp1, exp2, T.LR)
	in
	  emit(I.COMCLR{cc=I.GT, r1=r1, r2=r2, t=C.exhaustedR});
	  emit(I.LDO{i=I.IMMED 1, b=0, t=C.exhaustedR})
	end
      | T.CHECKLIMIT lab =>  let 
	  val flab = Label.newLabel""
	in
	  emit(I.BCOND{cmp=I.COMBT, bc=I.EQ, r1=C.exhaustedR, r2=zeroR, 
		       t=lab, f=flab});
	  F.defineLabel flab
	end
      | T.STORE8(ea, r)  => store(ea, r, I.STB)
      | T.STORE32(ea, r) => store(ea, r, I.STW)
      | T.STORED(ea, f)  => fstore(ea, f)
      | T.MV(dst, exp) => let
	  val rd = C.regMap dst
	  val rs = case exp of T.REG _ => C.regMap exp | _ => regActionRd(exp, rd)
	in
	  if rs = rd then () else emitMove(rs, rd)
	end
      | T.FMV(fdst, exp) => let
	  val fd = C.fregMap fdst
	  val fs = 
	    case exp of T.FREG _ => C.fregMap exp | _ => fregActionFd(exp, fd)
	in
	  if fs = fd then () else emit(I.FUNARY{fu=I.FCPY, f=fs, t=fd})
	end
      | T.LBASE(exp, lab) => 
	  emit(I.LADDR{b=regAction exp,
		       i=I.LabExp(I.NEGLAB(lab,M.constBaseRegOffset)), 
		       t=C.baseptrR})

      | T.LDREGMASK regs  => let
	  val regs' = map C.regMap regs
	  fun hasPseudoRegs [] = false
	    | hasPseudoRegs (n ::regs) = n > 31 orelse hasPseudoRegs regs
	in
	  if hasPseudoRegs regs' then 
	    (emit(I.LDIL{i=I.HIMaskop regs', t=C.maskRegR});
	     emit(I.LDO{b=C.maskRegR, i=I.LOMaskop regs', t=C.maskRegR}))
	  else (loadImmedRd (HppaMask.regmask regs', C.maskRegR); ())
	end
      | T.SEQ(exp1, exp2) => (stmAction exp1; stmAction exp2)
      | T.REG r => ()
      | _ => (T.prTree (T.CODE exp); error "stmtAction: missing rules")
  end

  and regAction(r as T.REG _) = C.regMap r
    | regAction exp = regActionRd(exp, newReg())

  and regActionRd(exp, rd) = let
    datatype opnd = IMMDop of int | REGop of int

    fun opndAction(T.LI n) = 
        if im11 n then IMMDop n else REGop(loadImmed n)
      | opndAction(T.LI32 w) = 
	if Word32.<=(w, 0w1024) then IMMDop(Word32.toInt w)
	else REGop(loadWord32 w)
      | opndAction exp = REGop(regAction exp)


    fun immedArith(exp1, exp2, order, immdOp, arithOp) = let
      val (opnd, r2) = 
	case order 
	 of T.LR => (opndAction exp1, regAction exp2)
	  | T.RL => let val opnd' = opndAction exp1
	    in
	      (opnd', regAction exp2)
	    end
    in
      case opnd 
       of IMMDop n => emit(I.ARITHI{ai=immdOp, r=r2, i=n, t=rd})
        | REGop r1 => emit(I.ARITH{a=arithOp, r1=r1, r2=r2, t=rd})
      (*esac*);
      rd
    end

    fun commImmedArith(exp1, exp2 as T.LI _, ord, immdOp, arithOp) =
         immedArith(exp2, exp1, ord, immdOp, arithOp)
      | commImmedArith(exp1, exp2 as T.LI32 _, ord, immdOp, arithOp) =
	 immedArith(exp2, exp1, ord, immdOp, arithOp)
      | commImmedArith arg = immedArith arg

    local
      fun shift (immdSht, varSht) = let
	fun f(exp, T.LI n, _) = 
	    if n < 0 orelse n > 31 then error "regActionRd:shift"
	    else let
	        val rs = regAction exp
	      in
		emit(I.SHIFT{s=immdSht, r=rs, p=31-n, len=32-n, t=rd});
		rd
	      end
	  | f(exp1, exp2, order) = let
	      val (r1, r2) = orderedRR(exp1, exp2, order)
	      val tmp = newReg()
	    in
	      emit(I.ARITHI{ai=I.SUBI, i=31, r=r2, t=tmp});
	      emit(I.MTCTL{r=tmp, t=11});
	      emit(I.SHIFTV{sv=varSht, r=r1, len=32, t=rd});
	      rd
	    end
      in
	f
      end
    in
      val sll = shift (I.ZDEP, I.ZVDEP)
      val srl = shift (I.EXTRU, I.VEXTRU)
      val sra = shift (I.EXTRS, I.VEXTRS)
    end

    fun arith(exp1, exp2, oper) =
      (emit(I.ARITH{a=oper, r1=regAction exp1, r2=regAction exp2, t=rd});
       rd)

    fun load(ea, rd, instri, instrx) = 
      (case eaAction ea
	of DISPea(b, d) => 
	      if im14 d then emit(I.ARITHI{ai=instri, i=d, r=b, t=rd})
	      else emit(I.ARITH{a=instrx, r1=b, r2=loadImmed d, t=rd})
	 | INDXea(b,x) => emit(I.ARITH{a=instrx, r1=b, r2=x, t=rd})
       (*esac*);
      rd)
    (* unsigned multiply is performed in floating point registers *)
    fun unsignedMultiply(exp1, exp2, t) = let
      val r1 = regAction exp1
      val r2 = regAction exp2
      val f1 = newFreg()
      val f2 = newFreg()
      val f3 = newFreg()
    in
      emit(I.STORE{st=I.STW, b=C.stackptrR, d=M.float32TmpOffset, r=r1});
      emit(I.FLOAD{fl=I.FLDWS, b=C.stackptrR, d=M.float32TmpOffset, t=f1});
      emit(I.STORE{st=I.STW, b=C.stackptrR, d=M.float32TmpOffset, r=r2});
      emit(I.FLOAD{fl=I.FLDWS, b=C.stackptrR, d=M.float32TmpOffset, t=f2});
      emit(I.FARITH{fa=I.XMPYU, r1=f1, r2=f2, t=f3});
      emit(I.FSTORE{fst=I.FSTWS, b=C.stackptrR, d=M.float32TmpOffset, r=f3});
      emit(I.ARITHI{ai=I.LDW, r=C.stackptrR, i=M.float32TmpOffset, t=t});
      t
    end

  in
    case exp
     of T.LI n => (loadImmedRd(n, rd); rd)
      | T.LI32 w => (loadWord32Rd(w, rd); rd)
      | T.ADD(exp1, exp2)      => commImmedArith(exp1, exp2, T.LR, I.ADDI, I.ADD)
      | T.ADDT(exp1, exp2)     => commImmedArith(exp1, exp2, T.LR, I.ADDIO, I.ADDO)
      | T.SUB(exp, T.LI n, _)  => immedArith(T.LI(~n), exp, T.LR, I.ADDIO, I.ADDO)
      | T.SUBT(exp, T.LI n, _)    => immedArith(T.LI(~n), exp, T.LR, I.ADDIO, I.ADDO)
      | T.SUB(exp1, exp2, ord) => immedArith(exp1, exp2, ord, I.SUBI, I.SUB)
      | T.SUBT(exp1, exp2, ord) => immedArith(exp1, exp2, ord, I.SUBIO, I.SUBO)
      | T.SLL arg	       => sll arg
      | T.SRL arg	       => srl arg
      | T.SRA arg              => sra arg
      | T.ANDB(exp1, exp2)     => arith(exp1, exp2, I.AND)
      | T.ORB(exp1, exp2)      => arith(exp1, exp2, I.OR)
      | T.XORB(exp1, exp2)     => arith(exp1, exp2, I.XOR)
      | T.DIVU(exp1, exp2, ord)=> invokeMilliCode(M.udivOffset, exp1, exp2, ord, rd)
      | T.DIVT(exp1, exp2, ord)=> invokeMilliCode(M.divOffset, exp1, exp2, ord, rd)
      | T.MULT(exp1, exp2)     => invokeMilliCode(M.mulOffset, exp1, exp2, T.LR, rd)
      | T.MULU(exp1, exp2)     => unsignedMultiply(exp1, exp2, rd)
      | T.LOAD8(ea)	       => load(ea, rd, I.LDB, I.LDBX)
      | T.LOAD32(ea)           => load(ea, rd, I.LDW, I.LDWX)
      | T.LADDR(lab, k)        => 
          (emit(I.LADDR{b=C.baseptrR,
			i=I.LabExp(I.POSLAB(lab,k-M.constBaseRegOffset)),
			t=rd});
	   rd)
      | T.SEQ(exp1, exp2)      => (stmAction exp1; regAction exp2)
      | _  => (T.prTree(T.CODE exp); error "regActionRd: missing rules")
  end (* regActionRd *)

  and fregAction (f as T.FREG _) = C.fregMap f
    | fregAction exp = fregActionFd(exp, newFreg())

  and fregActionFd(exp, fd) = let
    fun orderedFarith(exp1, exp2, ord, arithOp) = let
      val (f1, f2) = orderedFF(exp1, exp2, ord)
    in
      emit(I.FARITH{fa=arithOp, r1=f1, r2=f2, t=fd});
      emit(I.FSTORE{fst=I.FSTDS, b=C.stackptrR, d=M.float64TmpOffset, r=fd});
      fd
    end
      
  in
    case exp 
     of T.LOADD(ea) => 
        (case eaAction ea
	  of INDXea(r1, r2) => emit(I.FLOADX{flx=I.FLDDX, b=r1, x=r2, t=fd})
	   | DISPea(r, n) => 
	      if im5 n then
		emit(I.FLOAD{fl=I.FLDDS, b=r, d=n, t=fd})
	      else 
		emit(I.FLOADX{flx=I.FLDDX, b=r, x=loadImmed n, t=fd})
	(*esac*);
	fd)
      | T.FADDD(exp1, exp2)	 => orderedFarith(exp1, exp2, T.LR, I.FADD)
      | T.FSUBD(exp1, exp2, ord) => orderedFarith(exp1, exp2, ord, I.FSUB)
      | T.FMULD(exp1, exp2)	 => orderedFarith(exp1, exp2, T.LR, I.FMPY)
      | T.FDIVD(exp1, exp2, ord) => orderedFarith(exp1, exp2, ord, I.FDIV)
      | T.FABSD exp => (emit(I.FUNARY{fu=I.FABS, f=fregAction exp, t=fd}); fd)
      | T.FNEGD exp => (emit(I.FARITH{fa=I.FSUB, r1=0, r2=fregAction exp, t=fd}); fd)
      | T.CVTI2D exp => 
	 (emit(I.STORE{st=I.STW, b=C.stackptrR, d=M.cvti2dOffset, 
		     r=regAction exp});
	  emit(I.FLOAD{fl=I.FLDWS, b=C.stackptrR, d=M.cvti2dOffset, t=fd});
	  emit(I.FUNARY{fu=I.FCNVXF, f=fd, t=fd});
	  fd)
      | T.SEQ(e1, e2) => (stmAction e1; fregAction e2)
      | _ => (T.prTree(T.CODE exp); error "fregAction: missing rule")
  end

  fun mltreeComp mltree = let
    fun mltc(T.MARK)             = F.mark()
      | mltc(T.LABEL lab)        = F.defineLabel lab
      | mltc(T.JMPTABLE arg)     = F.jmpTable arg
      | mltc(T.REALCONST arg)    = F.mlRealConst arg
      | mltc(T.STRINGCONST arg)  = F.mlStringConst arg
      | mltc T.BEGINCLUSTER      = F.beginCluster()
      | mltc T.ENDCLUSTER        = F.endCluster[C.maxReg(),C.maxFreg()]
      | mltc(T.ESCAPEBLOCK regs) = F.exitBlock regs
      | mltc(T.CODE mlrisc)      = (stmAction mlrisc)
  in
    if Word.andb (itow(!Control.CG.misc4), 0w2048) = 0w0 then ()
    else T.prTree mltree;
    mltc mltree
  end 
end