;;; -*- Package: SPARC -*-
;;;
;;; **********************************************************************
;;; This code was written as part of the Spice Lisp project at
;;; Carnegie-Mellon University, and has been placed in the public domain.
;;; If you want to use this code or any part of Spice Lisp, please contact
;;; Scott Fahlman (FAHLMAN@CMUC). 
;;; **********************************************************************
;;;
;;; $Header: bit-bash.lisp,v 1.6 92/03/11 21:41:04 wlott Exp $
;;;
;;; Stuff to implement bit bashing.
;;;
;;; Written by William Lott.
;;;

(in-package "SPARC")


;;;; Blitting macros.  Used only at assemble time.

#+assembler
(eval-when (compile eval)

;;; The main dispatch.  Assumes that the following TNs are bound:
;;;   dst, dst-offset  --  where to put the stuff.
;;;   src, src-offset  --  where to get the stuff.
;;;   dst-bit-offset, src-bit-offset
;;;   length  -- the number of bits to move.
;;;   temp1, temp2 -- random descriptor temp
;;;   ntemp1, ntemp2, ntemp3  --  random non-descriptor temps.

(defmacro do-copy ()
  '(let ((wide-copy (gen-label))
	 (copy-right (gen-label))
	 (copy-left (gen-label)))
     (compute-bit/word-offsets src src-offset src-bit-offset)
     (compute-bit/word-offsets dst dst-offset dst-bit-offset)
     
     (inst add temp1 dst-bit-offset length)
     (inst cmp temp1 (fixnum 32))
     (inst b :ge wide-copy)
     (inst cmp src dst)

     (narrow-copy)

     (emit-label wide-copy)
     (inst b :ltu copy-right)
     (inst nop)
     (inst b :gtu copy-left)
     (inst cmp src-bit-offset dst-bit-offset)
     (inst b :lt copy-right)
     (inst nop)
     (inst b :eq done)
     (emit-label copy-left)
     (wide-copy-left)
     (emit-label copy-right)
     (wide-copy-right)))

(defmacro compute-bit/word-offsets (ptr offset bit)
  `(progn
     (inst and ,bit ,offset (fixnum 31))
     (inst sra ntemp1 ,offset 7)
     (inst sll ntemp1 2)
     (inst add ,ptr ntemp1)))

     
(defmacro narrow-copy ()
  '(let ((aligned (gen-label))
	 (only-one-src-word (gen-label))
	 (merge-and-write (gen-label)))
     (inst cmp src-bit-offset dst-bit-offset)
     (inst b :eq aligned)
     (inst ld ntemp1 src)

     ;; It's not aligned, so get the source bits and align them.
     (inst sra ntemp2 src-bit-offset 2)
     (inst add temp1 src-bit-offset length)
     (inst subcc temp1 (fixnum 32))
     (inst b :le only-one-src-word)
     (inst sll ntemp1 ntemp2)

     (inst ld ntemp3 src 4)
     (inst sub ntemp2 zero-tn ntemp2)
     (inst srl ntemp3 ntemp2)
     (inst or ntemp1 ntemp3)

     (emit-label only-one-src-word)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 length)
     (inst ld ntemp3 ntemp3)
     (inst sra ntemp2 dst-bit-offset 2)
     (inst and ntemp1 ntemp3)
     (inst srl ntemp1 ntemp2)
     (inst b merge-and-write)
     (inst srl ntemp3 ntemp2)

     (emit-label aligned)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 length)
     (inst ld ntemp3 ntemp3)
     (inst sra ntemp2 dst-bit-offset 2)
     (inst srl ntemp3 ntemp2)
     (inst and ntemp1 ntemp3)

     (emit-label merge-and-write)
     ;; ntemp1 has the bits we need to deposit, and ntemp3 has the mask.
     ;; Both are aligned with dst.
     (inst ld ntemp2 dst)
     (inst not ntemp3)
     (inst and ntemp2 ntemp3)
     (inst or ntemp1 ntemp2)
     (inst b done)
     (inst st ntemp1 dst)))


(defmacro wide-copy-left ()
  '(let ((aligned (gen-label)))
     ;; Check to see if they are aligned.
     (inst cmp src-bit-offset dst-bit-offset)
     (inst b :eq aligned)

     ;; Calc src-shift
     (inst sub src-shift dst-offset src-offset)
     (inst and src-shift (fixnum 31))

     (macrolet
	 ((middle (&rest before-last-inst)
	    `(progn
	       (inst ld ntemp1 src)
	       (inst ld ntemp2 src 4)
	       (inst add src 4)
	       (inst sra ntemp3 src-shift 2)
	       (inst srl ntemp2 ntemp2 ntemp3)
	       (inst sub ntemp3 zero-tn ntemp3)
	       (inst sll ntemp1 ntemp1 ntemp3)
	       ,@before-last-inst
	       (inst or ntemp1 ntemp2)))
	  (get-src (where)
	     (ecase where
	       (:left
		'(let ((dont-load (gen-label))
		       (done-load (gen-label)))
		   (inst cmp src-bit-offset dst-bit-offset)
		   (inst b :ltu dont-load)
		   (middle (inst b done-load))
		   (emit-label dont-load)
		   (inst sra ntemp3 src-shift 2)
		   (inst srl ntemp1 ntemp3)
		   (emit-label done-load)))
	       (:middle '(middle))
	       (:right
		'(let ((dont-load (gen-label))
		       (done-load (gen-label)))
		   (inst cmp src-shift final-bits)
		   (inst b :geu dont-load)
		   (middle (inst b done-load))
		   (emit-label dont-load)
		   (inst sra ntemp3 src-shift 2)
		   (inst neg ntemp3)
		   (inst sll ntemp1 ntemp3)
		   (emit-label done-load))))))
       (wide-copy-left-aux)
       (inst b done)
       (inst nop))

     (macrolet
	 ((get-src (where)
	    (declare (ignore where))
	    '(progn
	       (inst ld ntemp1 src)
	       (inst add src 4))))
       (emit-label aligned)
       (wide-copy-left-aux))))

(defmacro wide-copy-left-aux ()
  '(let ((left-aligned (gen-label))
	 (loop (gen-label))
	 (check-right (gen-label))
	 (final-bits temp1)
	 (interior temp2))
     (inst cmp dst-bit-offset)
     (inst b :eq left-aligned)
     (inst nop)
     
     (get-src :left)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 dst-bit-offset)
     (inst ld ntemp3 ntemp3)
     (inst ld ntemp2 dst)
     (inst add dst 4)
     (inst and ntemp2 ntemp3)
     (inst not ntemp3)
     (inst and ntemp1 ntemp3)
     (inst or ntemp2 ntemp1)
     (inst st ntemp2 dst -4)

     (emit-label left-aligned)

     (inst add final-bits length dst-bit-offset)
     (inst and final-bits (fixnum 31))
     (inst sub ntemp1 length final-bits)
     (inst srl ntemp1 7)
     (inst cmp ntemp1)
     (inst b :eq check-right)
     (inst sll interior ntemp1 2)

     (emit-label loop)
     (get-src :middle)
     (inst st ntemp1 dst)
     (check-for-interrupts)
     (inst subcc interior 4)
     (inst b :gt loop)
     (inst add dst 4)

     (emit-label check-right)
     (inst cmp final-bits)
     (inst b :eq done)
     (inst nop)

     (get-src :right)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 final-bits)
     (inst ld ntemp3 ntemp3)
     (inst ld ntemp2 dst)
     (inst and ntemp1 ntemp3)
     (inst not ntemp3)
     (inst and ntemp2 ntemp3)
     (inst or ntemp1 ntemp2)
     (inst b done)
     (inst st ntemp1 dst)))
     
(defmacro wide-copy-right ()
  ;; SRC is smaller than DSR, so we are copying to the right.  That means we
  ;; must start at the end and work towards the start, otherwise we will
  ;; overwrite stuff we need later.
  ;; 
  '(let ((aligned (gen-label))
	 (final-bits temp1))
     ;; Compute final-bits, the number of bits in the final dst word.
     (inst add ntemp3 dst-bit-offset length)
     (inst and final-bits ntemp3 (fixnum 31))

     ;; Increase src and dst so they point to the end of the buffers instead
     ;; of the beginning.
     ;; 
     (inst srl ntemp3 7)
     (inst sll ntemp3 2)
     (inst add dst ntemp3)

     (inst add ntemp3 src-bit-offset length)
     (inst sub ntemp3 final-bits)
     (inst srl ntemp3 7)
     (inst sll ntemp3 2)

     ;; Check to see if they are aligned.
     (inst sub src-shift dst-bit-offset src-bit-offset)
     (inst andcc src-shift (fixnum 31))
     (inst b :eq aligned)
     (inst add src ntemp3)

     (macrolet
	 ((merge (&rest before-last-inst)
	    `(progn
	       (inst sra ntemp3 src-shift 2)
	       (inst srl ntemp2 ntemp2 ntemp3)
	       (inst sub ntemp3 zero-tn ntemp3)
	       (inst sll ntemp1 ntemp1 ntemp3)
	       ,@before-last-inst
	       (inst or ntemp1 ntemp2)))
	  (get-src (where)
	    (ecase where
	      (:left
	       '(let ((dont-load (gen-label))
		      (done-load (gen-label)))
		  (inst cmp src-bit-offset dst-bit-offset)
		  (inst b :ltu dont-load)
		  (inst ld ntemp2 src 4)
		  (inst ld ntemp1 src)
		  (merge (inst b done-load))
		  (emit-label dont-load)
		  (inst sra ntemp3 src-shift 2)
		  (inst srl ntemp1 ntemp2 ntemp3)
		  (emit-label done-load)))
	      (:middle
	       '(progn
		  (inst ld ntemp1 src)
		  (inst ld ntemp2 src 4)
		  (merge)
		  (inst sub src 4)))
	      (:right
	       '(let ((dont-load (gen-label))
		      (done-load (gen-label)))
		  (inst cmp src-shift final-bits)
		  (inst b :geu dont-load)
		  (inst ld ntemp1 src)
		  (inst ld ntemp2 src 4)
		  (merge (inst b done-load))
		  (emit-label dont-load)
		  (inst sra ntemp3 src-shift 2)
		  (inst neg ntemp3)
		  (inst sll ntemp1 ntemp3)
		  (emit-label done-load))))))
       (wide-copy-right-aux)
       (inst b done)
       (inst nop))

     (macrolet
	 ((get-src (where)
	    `(progn
	       (inst ld ntemp1 src)
	       ,@(unless (eq where :right)
		   '((inst sub src 4))))))
       (emit-label aligned)
       (wide-copy-right-aux))))

(defmacro wide-copy-right-aux ()
  '(let ((right-aligned (gen-label))
	 (loop (gen-label))
	 (check-left (gen-label))
	 (interior temp2))

     ;; Check to see if the destination is word aligned on the right.
     (inst cmp final-bits)
     (inst b :eq right-aligned)
     (inst nop)
     
     ;; It isn't aligned on the right.
     (get-src :right)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 final-bits)
     (inst ld ntemp3 ntemp3)
     (inst ld ntemp2 dst)
     (inst and ntemp1 ntemp3)
     (inst andn ntemp2 ntemp3)
     (inst or ntemp2 ntemp1)
     (inst st ntemp2 dst)

     (emit-label right-aligned)
     (inst sub dst 4)
     (inst sub src 4)

     (inst sub ntemp1 length final-bits)
     (inst srl ntemp1 7)
     (inst cmp ntemp1)
     (inst b :eq check-left)
     (inst sll interior ntemp1 2)

     (emit-label loop)
     (get-src :middle)
     (inst st ntemp1 dst)
     (check-for-interrupts)
     (inst subcc interior 4)
     (inst b :gt loop)
     (inst sub dst 4)

     (emit-label check-left)
     (inst cmp dst-bit-offset)
     (inst b :eq done)
     (inst nop)

     (get-src :left)
     (inst li ntemp3 (make-fixup "_bit_bash_high_masks" :foreign))
     (inst add ntemp3 dst-bit-offset)
     (inst ld ntemp3 ntemp3)
     (inst ld ntemp2 dst)
     (inst andn ntemp1 ntemp3)
     (inst and ntemp2 ntemp3)
     (inst or ntemp1 ntemp2)
     (inst st ntemp1 dst)))
     
(defmacro check-for-interrupts ()
  nil)

) ; eval-when (compile eval)



;;;; The actual routines.

(define-assembly-routine (copy-to-system-area
			  (:policy :fast-safe)
			  (:translate copy-to-system-area)
			  (:arg-types * tagged-num
				      system-area-pointer tagged-num
				      tagged-num))
			 ((:arg src-arg descriptor-reg a0-offset)
			  (:arg src-offset any-reg a1-offset)
			  (:arg dst sap-reg nl0-offset)
			  (:arg dst-offset any-reg a2-offset)
			  (:arg length any-reg a3-offset)
			  (:res res descriptor-reg null-offset)
			  
			  (:temp src interior-reg lip-offset)
			  (:temp temp1 descriptor-reg a4-offset)
			  (:temp temp2 descriptor-reg a5-offset)
			  (:temp src-shift descriptor-reg cname-offset)
			  (:temp src-bit-offset descriptor-reg lexenv-offset)
			  (:temp dst-bit-offset descriptor-reg l0-offset)
			  (:temp ntemp1 non-descriptor-reg nl1-offset)
			  (:temp ntemp2 non-descriptor-reg nl2-offset)
			  (:temp ntemp3 non-descriptor-reg nl3-offset)
			  (:temp retaddr non-descriptor-reg nargs-offset))

  (declare (ignore res))

  ;; Save the return address.
  (inst sub retaddr src code-tn)

  (inst sub src src-arg other-pointer-type)
  (inst and ntemp1 dst 3)
  (inst xor dst ntemp1)
  (inst sll ntemp1 5)
  (inst add dst-offset ntemp1)
  (do-copy)
  done

  ;; Restore the return address.
  (inst add src retaddr code-tn))

(define-assembly-routine (copy-from-system-area
			  (:policy :fast-safe)
			  (:translate copy-from-system-area)
			  (:arg-types system-area-pointer tagged-num
				      * tagged-num tagged-num))
			 ((:arg src sap-reg nl0-offset)
			  (:arg src-offset any-reg a0-offset)
			  (:arg dst-arg descriptor-reg a1-offset)
			  (:arg dst-offset any-reg a2-offset)
			  (:arg length any-reg a3-offset)
			  (:res res descriptor-reg null-offset)
			  
			  (:temp dst interior-reg lip-offset)
			  (:temp temp1 descriptor-reg a4-offset)
			  (:temp temp2 descriptor-reg a5-offset)
			  (:temp src-shift descriptor-reg cname-offset)
			  (:temp src-bit-offset descriptor-reg lexenv-offset)
			  (:temp dst-bit-offset descriptor-reg l0-offset)
			  (:temp ntemp1 non-descriptor-reg nl1-offset)
			  (:temp ntemp2 non-descriptor-reg nl2-offset)
			  (:temp ntemp3 non-descriptor-reg nl3-offset)
			  (:temp retaddr non-descriptor-reg nargs-offset))
  (progn res) ; don't complain that it's unused.

  ;; Save the return address.
  (inst sub retaddr dst code-tn)

  (inst and ntemp1 src 3)
  (inst xor src ntemp1)
  (inst sll ntemp1 5)
  (inst add src-offset ntemp1)
  (inst sub dst dst-arg other-pointer-type)
  (do-copy)
  done

  ;; Restore the return address.
  (inst add dst retaddr code-tn))

(define-assembly-routine (system-area-copy
			  (:policy :fast-safe)
			  (:translate system-area-copy)
			  (:arg-types system-area-pointer tagged-num
				      system-area-pointer tagged-num
				      tagged-num))
			 ((:arg src sap-reg nl1-offset)
			  (:arg src-offset any-reg a0-offset)
			  (:arg dst sap-reg nl0-offset)
			  (:arg dst-offset any-reg a1-offset)
			  (:arg length any-reg a2-offset)
			  (:res res descriptor-reg null-offset)
			  
			  (:temp temp1 descriptor-reg a4-offset)
			  (:temp temp2 descriptor-reg a5-offset)
			  (:temp src-shift descriptor-reg cname-offset)
			  (:temp src-bit-offset descriptor-reg lexenv-offset)
			  (:temp dst-bit-offset descriptor-reg l0-offset)
			  (:temp ntemp1 non-descriptor-reg nl2-offset)
			  (:temp ntemp2 non-descriptor-reg nl3-offset)
			  (:temp ntemp3 non-descriptor-reg nl4-offset))
  (progn res) ; don't complain that it's unused.

  (inst and ntemp1 src 3)
  (inst xor src ntemp1)
  (inst sll ntemp1 5)
  (inst add src-offset ntemp1)
  (inst and ntemp1 dst 3)
  (inst xor dst ntemp1)
  (inst sll ntemp1 5)
  (inst add dst-offset ntemp1)
  (do-copy)
  done)

(define-assembly-routine (bit-bash-copy
			  (:policy :fast-safe)
			  (:translate bit-bash-copy)
			  (:arg-types * tagged-num * tagged-num tagged-num))
			 ((:arg src-arg descriptor-reg a0-offset)
			  (:arg src-offset any-reg a1-offset)
			  (:arg dst-arg descriptor-reg a2-offset)
			  (:arg dst-offset any-reg a3-offset)
			  (:arg length any-reg a4-offset)
			  (:res res descriptor-reg null-offset)
			  
			  (:temp src non-descriptor-reg nl0-offset)
			  (:temp dst non-descriptor-reg nl1-offset)
			  (:temp temp1 descriptor-reg a5-offset)
			  (:temp temp2 descriptor-reg l0-offset)
			  (:temp src-shift descriptor-reg cname-offset)
			  (:temp src-bit-offset descriptor-reg lexenv-offset)
			  (:temp dst-bit-offset descriptor-reg l1-offset)
			  (:temp ntemp1 non-descriptor-reg nl2-offset)
			  (:temp ntemp2 non-descriptor-reg nl3-offset)
			  (:temp ntemp3 non-descriptor-reg nl4-offset))
  (progn res) ; don't complain that it's unused.

  (let ((done (gen-label)))
    (pseudo-atomic ()
      (inst sub src src-arg other-pointer-type)
      (inst sub dst dst-arg other-pointer-type)
      (macrolet
	  ((check-for-interrupts ()
	     '(let ((label (gen-label)))
		(inst andcc zero-tn alloc-tn 1)
		(inst b :eq label)
		(inst nop)
		(inst sub src src-arg)
		(inst sub dst dst-arg)
		(inst unimp pending-interrupt-trap)
		(inst add alloc-tn 4) ; Turn pseudo-atomic back on.
		(inst add src src-arg)
		(inst add dst dst-arg)
		(emit-label label))))
	(do-copy))
      (emit-label done))))
