(* alpha32Props.sml
 *
 * COPYRIGHT (c) 1996 Bell Laboratories.
 *)

structure Alpha32Props =
  struct
    structure I = Alpha32Instr
    structure C = Alpha32Cells
    structure S = SortedList

    fun error msg = ErrorMsg.impossible ("alpha32Props."^msg)

    fun safe f s = fn x => f x handle _ => error ("uncaught exception in "^s)

    datatype kind = IK_JUMP | IK_NOP | IK_INSTR
    datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES

    fun instrKind(I.BR _)      = IK_JUMP
      | instrKind(I.BEQ _)     = IK_JUMP
      | instrKind(I.BGE _)     = IK_JUMP
      | instrKind(I.BGT _)     = IK_JUMP
      | instrKind(I.BLE _)     = IK_JUMP
      | instrKind(I.BLT _)     = IK_JUMP
      | instrKind(I.BNE _)     = IK_JUMP
      | instrKind(I.BLBS _)     = IK_JUMP
      | instrKind(I.BLBC _)     = IK_JUMP
      | instrKind(I.FBEQ _)     = IK_JUMP
      | instrKind(I.FBGE _)     = IK_JUMP
      | instrKind(I.FBGT _)     = IK_JUMP
      | instrKind(I.FBLE _)     = IK_JUMP
      | instrKind(I.FBLT _)     = IK_JUMP
      | instrKind(I.FBNE _)     = IK_JUMP
      | instrKind(I.JMPL _)    = IK_JUMP
      | instrKind(I.BRANCH _)  = IK_JUMP
      | instrKind _            = IK_INSTR

    fun branchTargets(I.BR(_,lab))        = [LABELLED lab]
      | branchTargets(I.BEQ(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BGE(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BGT(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BLE(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BLT(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BNE(_,lab))       = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BLBS(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.BLBC(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBEQ(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBGE(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBGT(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBLE(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBLT(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.FBNE(_,lab))      = [LABELLED lab,FALLTHROUGH]
      | branchTargets(I.JMPL(_,[]))       = [ESCAPES]
      | branchTargets(I.JMPL(_,labs))     = map LABELLED labs
      | branchTargets(I.BRANCH(lab1,lab2))= [LABELLED lab1] 
      | branchTargets _ = error "branchTargets"

    val nop = fn () => I.BIS(C.zeroR,I.REGop C.zeroR, C.zeroR)

    (** Note that the allocation pointer optimization is incompatible
     ** with the optimization in creating records.
     ** Consider (ALPHA notation) :
     ** 	st 	allocptr,72,i1		([],[allocptr,i1])
     **		add	allocptr,60,i2		([MEM,i2],[allocptr])
     **		ld      i2,12,l4		([l4],[i2,MEM])
     ** 
     ** The ld and st may get swapped as there is no dependency 
     ** between them.
     **)

    (* Resource usage *)
    fun defUseR instr =
      let
	fun MemLd (reg, (rbase, _)) = ([reg], [rbase])
	fun MemSt (reg, (rbase, _)) = ([], [rbase, reg])
	fun Oper (rs, I.REGop rs1, rd ) = ([rd], [rs, rs1])
	  | Oper (rs, _, rd) = ([rd], [rs])
	fun FMem (freg, (rd, _)) = ([], [rd])
	fun trap (def,use) =
                      (def,C.limitptrR::C.allocptrR::C.exnptrR::use)
      in
	case instr of
	  I.LDA (reg, rbase, _)  	    => ([reg],[rbase])
	| I.LDAH (reg, rbase, _) 	    => ([reg],[rbase])
	| I.LDL arg   	=> MemLd arg
	| I.LDQ arg   	=> MemLd arg
	| I.LDQ_U arg  	=> MemLd arg
	| I.STL arg   	=> MemSt arg
	| I.STQ	arg	=> MemSt arg
	| I.STQ_U arg  	=> MemSt arg

	| I.BR (reg, _) => ([reg],[])
	| I.JMPL (arg,_)=> MemLd arg
	| I.JSR((rd,(rs,_)),def,use) => (rd:: #1 def,rs:: #1 use)
	| I.BEQ (reg, _)=> ([], [reg])
	| I.BGE (reg, _)=> ([], [reg])
	| I.BGT (reg, _)=> ([], [reg])
	| I.BLE (reg, _)=> ([], [reg])
	| I.BLT (reg, _)=> ([], [reg])
	| I.BNE (reg, _)=> ([], [reg])
   	| I.BLBS (reg, _)=> ([], [reg])
   	| I.BLBC (reg, _)=> ([], [reg])

	| I.ZAP arg 	=> Oper arg
	| I.ADDL arg 	=> Oper arg
	| I.ADDLV arg 	=> trap(Oper arg)
	| I.ADDQ arg 	=> Oper arg
	| I.SUBL arg 	=> Oper arg
	| I.SUBLV arg 	=> trap(Oper arg)
	| I.SUBQ arg 	=> Oper arg
	| I.MULL arg 	=> Oper arg
	| I.MULLV arg 	=> trap(Oper arg)
	| I.CMPULE arg 	=> Oper arg
	| I.CMPULT arg 	=> Oper arg
	| I.CMPEQ arg 	=> Oper arg
	| I.CMPLE arg 	=> Oper arg
	| I.CMPLT arg 	=> Oper arg
 	| I.SGNXL (s,d) => Oper (s,I.REGop 31,d)

	| I.AND arg 	=> Oper arg
	| I.BIS arg 	=> Oper arg
	| I.XOR arg 	=> Oper arg
	| I.SRA arg 	=> Oper arg
	| I.SRL arg 	=> Oper arg
	| I.SLL arg 	=> Oper arg

	| I.INSBL arg 	=> Oper arg
	| I.EXTBL arg 	=> Oper arg
	| I.EXTQH arg 	=> Oper arg
	| I.MSKBL arg 	=> Oper arg
	| I.MSKLH arg 	=> Oper arg

	| I.LDT arg   	=> FMem arg
	| I.STT arg   	=> FMem arg

	| I.LADDR (base, _, rd) => ([rd],[base])

        | I.CVTTQ _	=> trap([],[])
        | I.ADDT _	=> trap([],[])
        | I.SUBT _	=> trap([],[])
        | I.MULT _	=> trap([],[])
        | I.DIVT _	=> trap([],[])
        | I.TRAPB 	=> trap([],[])
	    
	| _  		=> ([],[])
      end
	handle _ => error "defUseR"

    (* Use of FP registers *)
    fun defUseF instr =
      let
	fun Oper (rs1, rs2, rd) = ([rd],[rs1, rs2])
        fun arithOper(rs1, rs2, rd) = ([rd], [rs1, rs2, rd])
      in
	case instr of
	  I.DEFFREG freg => ([freg], [])
	| I.FBEQ(freg,_) => ([],[freg])
	| I.FBGE(freg,_) => ([],[freg])
	| I.FBGT(freg,_) => ([],[freg])
	| I.FBLE(freg,_) => ([],[freg])
	| I.FBLT(freg,_) => ([],[freg])
	| I.FBNE(freg,_) => ([],[freg])
	| I.LDT(freg, _) 	=> ([freg],[]) 
	| I.STT(freg, _) 	=> ([],[freg]) 
	| I.CPYS arg	=> Oper arg
	| I.CPYSN arg	=> Oper arg
	| I.CVTQT arg	=> Oper arg
	| I.CVTTQ arg	=> Oper arg
	| I.CMPTEQ arg  => Oper arg
	| I.CMPTLT arg  => Oper arg
	| I.CMPTLE arg  => Oper arg
	| I.ADDT arg	=> arithOper arg
	| I.SUBT arg	=> arithOper arg
	| I.MULT arg	=> arithOper arg
	| I.DIVT arg	=> arithOper arg
	| I.JSR(_,def,use) => (#2 def,#2 use)
	| _            	=> ([],[])
      end
	handle _ => error "defUseF"

    (* Use of cells other than registers *)
    fun defUseM instr =
      let
	val MEM = 0
	val STACK=1
	val TRAP=2

	fun allocptrOpt() = Word.andb(Word.fromInt(!Control.CG.misc4), 0w256) = 0w0

	fun loadOp(_,(reg,_)) =
            if reg = C.stackptrR then ([],[STACK])
				 else ([],[MEM])

	fun storeOp(_,(reg,_)) =
            if reg = C.stackptrR then ([STACK],[])
	    else if reg = C.allocptrR andalso not (allocptrOpt()) then ([],[])
	    else ([MEM],[])

       fun addOp (rs,_,_) = 
	   if allocptrOpt() then ([],[])
	   else if rs = C.allocptrR then ([MEM],[]) else ([],[])
      in
	case instr of
	  I.ADDL arg    => addOp arg
	| I.LDL arg   	=> loadOp arg
	| I.LDQ arg  	=> loadOp arg
	| I.LDQ_U arg  	=> loadOp arg
	| I.STL arg   	=> storeOp arg
	| I.STQ arg	=> storeOp arg
	| I.STQ_U arg  	=> storeOp arg

	| I.LDT arg   	=> loadOp arg
	| I.STT arg   	=> storeOp arg

	| I.ADDLV _	=> ([TRAP],[MEM]) (* These use memory to ensure that	  *)
	| I.SUBLV _	=> ([TRAP],[MEM]) (* exactly the appropriate side effects *)
	| I.MULLV _	=> ([TRAP],[MEM]) (* happen before a trap.		  *)
	| I.ADDT _	=> ([TRAP],[MEM])
	| I.SUBT _	=> ([TRAP],[MEM])
	| I.MULT _	=> ([TRAP],[MEM])
	| I.DIVT _	=> ([TRAP],[MEM])
	| I.CVTTQ _	=> ([TRAP],[MEM])

	| I.TRAPB	=> ([],[MEM,TRAP])

	| _  		=> ([],[])
      end
        handle _ => error "defUseM"

    fun defUse instr = let
			 val (rd,ru) = defUseR instr
			 val (fd,fu) = defUseF instr
			 val (md,mu) = defUseM instr
		       in
			 ((rd,fd,md),(ru,fu,mu))
		       end
		       handle _ => error "defUse"

    (** register allocation functions **)

    local 
      val initialSpillOffset = 120
      val spillOffset = ref initialSpillOffset
      fun newOffset n =
	  if n > 4096
	  then error "newOffset - spill area is too small"
	  else spillOffset := n
    in
      fun spillInit () = spillOffset := initialSpillOffset
      fun spill1 n = let val offset = !spillOffset
	in
	  newOffset(offset+4);
	  ([I.STL(n,(C.stackptrR,offset))], [I.LDL(n,(C.stackptrR,offset))])
	end

      fun spill2 n = let
	  val offset = !spillOffset
	  val fromInt = Word.fromInt
	  val aligned = Word.toInt(Word.andb(fromInt offset+0w7, fromInt ~8))
	in
	  newOffset(aligned+8);
	  ([I.STT(n,(C.stackptrR,aligned))], [I.LDT(n,(C.stackptrR,aligned))])
	end
    
      fun spill3 _ = error "spill3"
    end

    structure GR = GetReg(val nRegs = C.nrRegs
			  val available = C.availRegs)

    structure FR = GetReg(val nRegs = C.nrFregs
			  val available= C.availFregs)
    val getreg1   = GR.getreg 
    val getreg2   = FR.getreg
    fun getreg3 _ =  error "getreg3"

    fun moveInstr(I.BIS(_,I.REGop 31,_))  = true
      | moveInstr(I.ADDL(_,I.IMMop 0,_))  = true 
      | moveInstr(I.BIS(31,_,_))          = true 
      | moveInstr(I.SUBL(_,I.IMMop 0, _)) = true
      | moveInstr(I.ADDL(_,I.REGop 31,_)) = true
      | moveInstr(I.SUBL(_,I.REGop 31,_)) = true
      | moveInstr(I.LDA(_,_,I.IMMop 0))   = true
      | moveInstr(I.LDAH(_,_,I.IMMop 0))  = true   
      | moveInstr(I.CPYS(f1,f2,_))        = f1 = f2
      | moveInstr _			  = false

    val moveInstr = safe moveInstr "moveInstr"

    (** basic block scheduling **)

    val branchDelayedArch = false
     (* 
      * These numbers are true of the DECchip 21064-AA implementation. A real 
      * scheduler should deal with 1) multiple issue/cycle 2) producer/consumer 
      * latency table  
      *)
    (* Load class *)
    fun latency (I.LDL _)  	= 5
      | latency (I.LDQ_U _)	= 5
      | latency (I.LDQ _)  	= 5
      | latency (I.LDT _)  	= 5
      (* Shift/Logical *)
      | latency (I.SRA _)	= 2
      | latency (I.SRL _)	= 2
      | latency (I.SLL _)	= 2
      | latency (I.INSBL _)	= 2
      | latency (I.EXTBL _)	= 2
      | latency (I.EXTQH _)	= 2
      | latency (I.MSKBL _)	= 2
      | latency (I.MSKLH _)	= 2
      (* Integer compare *)
      | latency (I.CMPULE _) = 3
      | latency (I.CMPULT _) = 3
      | latency (I.CMPEQ _)	= 3
      | latency (I.CMPLE _)	= 3
      | latency (I.CMPLT _)	= 3
      (* Integer Multiply *)
      | latency (I.MULL _)  	= 21
      | latency (I.MULLV _)  	= 21
      (* Floating point *)
      | latency (I.CPYS _)  	= 6
      | latency (I.CPYSN _)  	= 6
      | latency (I.CVTQT _)  	= 6
      | latency (I.CVTTQ _)  	= 6
      | latency (I.CMPTEQ _)        = 6
      | latency (I.CMPTLT _)        = 6
      | latency (I.CMPTLE _)        = 6
      | latency (I.ADDT _)  	= 6
      | latency (I.SUBT _)  	= 6
      | latency (I.MULT _)  	= 6
      (* Floating point divide *)
      | latency (I.DIVT _)  	= 63

      | latency _ 		= 1

    fun needsNop _ 		       = 0

    fun isSdi(I.LADDR _)   = true
      | isSdi(I.DEFFREG _) = true
      | isSdi _            = false

    fun minSize(I.DEFFREG _) = 0
      | minSize _            = 4

    fun maxSize(I.DEFFREG _) = 0
      | maxSize(I.LADDR _)   = 8
      | maxSize _            = 4


	(*** MUST BE CHANGED TO MAKE USE OF loc ***)
    fun sdiSize(I.DEFFREG _, _, _) = 0
      | sdiSize(I.LADDR(_,labexp,_),labMap,loc) =
      let
	fun labexpVal(I.POSLAB(lab,k)) = k+labMap lab
	  | labexpVal(I.NEGLAB(lab,k)) = k-labMap lab
	val labVal = labexpVal labexp
      in
	if labVal < ~32768 orelse labVal > 32767 then 8 else 4
      end
      | sdiSize(I.BRANCH(lab, newlab),labMap,loc) =
	let
	  val labVal = labMap lab
	  val newlabVal = labMap newlab
	  val diff = labVal - newlabVal 
	in
	  if diff < ~4194304 orelse diff > 4194303
	    then
	      if labVal < ~32768 orelse labVal > 32767
		then
		  12
	      else
		8
	  else
	    4
	end
      | sdiSize _ = error "sdiSize"
    val sdiSize = safe sdiSize "sdiSize"


    (* All sdis must use a dedicated physical register as a temporaries,
     * or suffer the consequences of graph coloring.
     *)
    val asmTmpReg = C.asmTmpR

    fun expand(I.DEFFREG _, 0, _) = []
      | expand(I.LADDR(base,labexp,rd),size,lookup:int->int) = 
	 (case size of
	    4  => [I.LDA(rd, base, I.LOLABop labexp)]
	  | 8  => [I.LDA(rd, base, I.LOLABop labexp),
		   I.LDAH(rd, rd, I.HILABop labexp)]
	  | _  => error "expand:LADDR")
      | expand(I.BRANCH(lab, newlab),size,lookup:int->int) =
	 (case size of
	    4 => [I.BR (C.zeroR, lab)]
	  | 8 => [I.LDA(asmTmpReg, C.zeroR, I.LOLABop(I.POSLAB(lab,0))),
		  I.JMPL((C.zeroR, (asmTmpReg, 0)), [lab])]
	  |12 => [I.LDA(asmTmpReg, C.zeroR, I.LOLABop(I.POSLAB(lab,0))),
		  I.LDAH(asmTmpReg, asmTmpReg, I.HILABop(I.POSLAB(lab,0))),
		  I.JMPL((C.zeroR, (asmTmpReg, 0)),[lab])]
	  | _ => error "expand: BRANCH")
      | expand _ = error "expand"
    val expand = safe expand "expand"

    fun mayNeedNops _         = 0

    (* resources: r0-30+f0-30+mem+stack+trap
     *)
    val numResources = C.nrRegs + C.nrFregs + 3
    (* Mapping resources to distinct 
     * integers  *)
    local
      fun regRIds ([], acc) = acc
	| regRIds (31 :: regs, acc) = regRIds(regs, acc)
	| regRIds (r::regs, acc) = regRIds(regs, r::acc)

      fun fregRIds ([], acc) = acc
	| fregRIds (31 :: regs, acc) = fregRIds(regs, acc)
	| fregRIds (r::regs, acc) = fregRIds(regs, (r + C.nrRegs) ::acc)

      fun miscRIds mregs = map (fn m => m + C.nrRegs + C.nrFregs) mregs
    in
      fun bdefUse [rMap,fMap] = let
	  val mapR = map (fn r => Array.sub(rMap,r))
	  val mapF = map (fn f => Array.sub(fMap,f))
        in
	  fn I.LADDR _  => error "bdefUse LADDR"
	   | I.BRANCH _ => error "bdefUse BRANCH"  
	   | insn => let val ((rd,fd,ed),(ru,fu,eu)) = defUse insn
	     in
		 (regRIds(mapR rd,fregRIds(mapF fd,miscRIds ed)),
		  regRIds(mapR ru,fregRIds(mapF fu,miscRIds eu)))
	     end
        end
    end (*local*)
  end
