//------------------------------------------------------------------------------------
//                                                                      
//                   I N T E L   P R O P R I E T A R Y                   
//                                                                       
//      COPYRIGHT (c)  1998-99 BY  INTEL  CORPORATION.  ALL RIGHTS          
//      RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//      BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//      RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//      LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//      MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//      THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                       
//                         INTEL  CORPORATION                            
//                                                                      
//                      2200 MISSION COLLEGE BLVD                        
//                                                                       
//                SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                       
//------------------------------------------------------------------------------------
// refdes_macros.uc
// common macros used by SA1200 reference designs
//
// Version = 1.0.NoBldNum
//
// system: SA1200
// subsystem: microcode
// usage: example
// author: dfh 10/2/98
// revisions:
//
//
// ------------------------------SA1200 microcode----------------------------


//------------------------------RECEIVE SCHEDULER macros----------------------------


// Fbus_SetupRdyProg
//
//		program fbi to retrieve ready bits from macs (simplified)
//		and autopush them to receive and transmit schedulers
//		Note: this is done in coordination w/ tx_scheduler.uc use of autopush
//
//	parameters:
// 
// 	RDYBUS_TEMPLATE_PROG1
//	tx rdybus polling, autopush in <31:16>
//
//	tx macs, tx autopush constant to RDYBUS_TEMPLATE_PROG1 31:15
//			example:	0x9cd7	= tx0-1, txauto
//						0x1c37	= tx0-6, txauto
//
//	rx rdybus polling, autopush in <15:0>
//
//	rx macs, rx autopush constant to RDYBUS_TEMPLATE_PROG1 15:0
//			example:	0x0cdb  = rec0-1, recauto
//						0x0c3b	= rec0-6, recauto
//	sync_count	rate ready bits are sampled
//			example:	42 = fast
//						60 = slow
//
#macro Fbus_SetupRdyProg
.local temp

#ifdef DEF_RDYBUS_TEMPLATE_PROG1
    	immed32[temp, DEF_RDYBUS_TEMPLATE_PROG1]
#else
	immed32[temp, 0x9cd70cdb]
#endif
	alu[$xfer4, --, B, temp]
	csr[write, $xfer4, RDYBUS_TEMPLATE_PROG1], ctx_swap
	
	immed_w0[temp, 0x1f1f]
	immed_w1[temp, 0x1f1f] 
	alu[$xfer5, --, B, temp]
	csr[write, $xfer5, RDYBUS_TEMPLATE_PROG2]				;nop
	csr[write, $xfer5, RDYBUS_TEMPLATE_PROG3], ctx_swap		;nop


#ifdef DEF_RDYBUS_TEMPLATE_CTL
	immed32[temp, DEF_RDYBUS_TEMPLATE_CTL]
#else
	immed32[temp, 0xb00]				; default: multi-mac, bi-directional, master
#endif
	alu[$xfer4, --, B, temp]
	csr[write, $xfer4, RDYBUS_TEMPLATE_CTL]

#ifdef DEF_RDYBUS_SYNCH_COUNT_DEFAULT
	immed32[temp, DEF_RDYBUS_SYNCH_COUNT_DEFAULT]
#else
	immed32[temp, 60]				; default: 60
#endif
	alu[$xfer5, --, B, temp]
	csr[write, $xfer5, RDYBUS_SYNCH_COUNT_DEFAULT], ctx_swap
.endlocal
#endm


// RxSched_SendReceiveRequest									; normal case 8 insns
//
//		assemble and send the receive request to fbi
//
#macro RxSched_SendReceiveRequest[portnum, threadnum]

// present format of RCV_REQ (3/13/98 transactor) is
// +-+------+-----+----+-----+-----+-----+----+-----+------+-------+------+
// | |fetch9| msg |stat| elem| elem|seq# |1or2|misc | sig  | thread| port |
// | |      | pkt |    | #2  | #1  |     |    |     | sched|       |      |
// | |   29 |28:27| 26 |25:22|21:18|17:16| 15 |14:12|  11  | 10:6  | 5:0  |
// +-+------+-----+----+-----+-----+-----+----+-----+------+-------+------+
// this will cause FBI to set RCV_CNTL

; write receive request register
; this code does one element xfers
 
	alu[--, --, B, fbi_req_outstanding]						; test for previous receive request outstanding 
	br=0[rec_req_avail_check#], defer[1]					; if request is not outstanding skip the ctx_arb
	immed[fbi_req_outstanding, 1]							; set request outstanding
	ctx_arb[fbi]											; wait on

rec_req_avail_check#:
	br_inp_state[rec_req_avail, _got_rr_avail#], defer[2]
#ifdef FETCH9
	alu_shf[this_rr, portnum, OR, 1, <<29]					; insert fetch9 and port num into new receive request
#else
	alu[this_rr, --, B, portnum]
#endif
	alu_shf[this_rr, this_rr, OR, threadnum, <<6]			; insert currrent_thread	
	
	// need a ctx_arb here to allow the other other thread to run
	// without this ctx_arb have problems processing min-sized packets with small inner-packet gap
	ctx_arb[voluntary]
 	br[rec_req_avail_check#] 

_got_rr_avail#:

write_rr#:
	alu_shf[$xfer0, this_rr, OR, threadnum, <<18]			; insert element num (element num = current thread)
    csr[write, $xfer0, rcv_req], sig_done					; send req to FBI
#endm


#macro RxSched_GetRecRdy[rec_rdy_true]
get_rec_rdy#:
	alu[rec_rdy_true, @rec_rdy, AND~, port_mask_current]	; block ports just assigned last iteration
	alu[rec_rdy_true, rec_rdy_true, AND~, port_mask_prev]
	alu[--, rec_rdy_true, AND, port_mask_fairness]
	br>0[have_ports_rdy#], guess_branch						; only update fairness mask if no other ports ready
	immed[port_mask_fairness, 0xffff]
have_ports_rdy#:
#endm


// RxSched_SendExtendedReceiveRequest									; normal case 8 cycles
//		send receive request with additional input arg of other_fields
//
#macro RxSched_SendExtendedReceiveRequest[portnum, threadnum, other_fields]

// present format of RCV_REQ (3/13/98 transactor) is
// +-+------+-----+----+-----+-----+-----+----+-----+------+-------+------+
// | |fetch9| msg |stat| elem| elem|seq# |1or2|misc | sig  | thread| port |
// | |      | pkt |    | #2  | #1  |     |    |     | sched|       |      |
// | |   29 |28:27| 26 |25:22|21:18|17:16| 15 |14:12|  11  | 10:6  | 5:0  |
// +-+------+-----+----+-----+-----+-----+----+-----+------+-------+------+
// this will cause FBI to set RCV_CNTL

; write receive request register
; this code does one element xfers
	alu[--, --, B, fbi_req_outstanding]							; test for previous receive request outstanding 
	br=0[rec_req_avail_check#], defer[1]						; if request is not outstanding skip the ctx_arb
	immed[fbi_req_outstanding, 1]								; set request outstanding
	ctx_arb[fbi]												; wait on
rec_req_avail_check#:
	br_inp_state[rec_req_avail, _got_rr_avail#], defer[2]
	alu_shf[other_fields, other_fields, OR, portnum]			; insert port num into new receive request
	alu_shf[other_fields, other_fields, OR, threadnum, <<6]		; insert currrent_thread	

	ctx_arb[voluntary]
 	br[rec_req_avail_check#] ;, defer[1]

_got_rr_avail#:
write_rr#:
    alu_shf[$xfer0, other_fields, OR, threadnum, <<18]		; insert element num into new receive request
	csr[write, $xfer0, rcv_req], sig_done						; send req to FBI
#endm



// RxSched_RequestSlowPort										; ~27 insns
//
//		schedule the receive request for a 100M port
//
#macro RxSched_RequestSlowPort[current_threadx2]
	RxSched_GetRecRdy[rec_rdy_true]									; get filtered rec ready bits

// find a ready port
find_port#:
	find_bset_with_mask[port_mask_fairness, rec_rdy_true], clr_results	; 5 insns, find first port ready to start
	alu[thread_done_copy, thread_done_copy, OR, @thread_done_capture]
	immed[@thread_done_capture, 0]									; clear captured bits
	alu_shf[current_inv_threadx2, current_threadx2, B-A, 1, <<5]	; setup left indirect shift
	load_bset_result1[current_port]									; otherwise get a new ready port

	br>0[proceed#], defer[2]										; if a port was found, proceed to send rec req
	alu_shf[current_port, current_port, AND~, 1, <<8]				; clear the valid bit
	alu_shf[current_thread, --, B, current_threadx2, >>1]			; current threadx2 / 2

	ctx_arb[voluntary]
	br[req_done#]
proceed#:
req_send#:
// update thread_done_copy	
	alu[--, current_inv_threadx2, B, 0]								; clear the wait encode 2 bits of 
	alu[thread_done_copy, thread_done_copy, AND~, 0x3, <<indirect]	; the thread_done_copy
// update port masks
	alu_shf[current_inv_port, current_port, B-A, 1, <<5]			; 32 - current_port for left indirect shift
	alu[--, current_inv_port, B, 0]									; setup left indirect shift
	alu_shf[port_mask_current, port_mask_current, OR, 1, <<indirect]; block the port next iteration
	alu[--, current_inv_port, B, 0]									; setup left indirect shift
	alu_shf[port_mask_fairness, port_mask_fairness, AND~, 1, <<indirect]; block the port next iteration

	RxSched_SendReceiveRequest[current_port, current_thread]		; 8 insns. send the receive request
req_done#:
#endm


// RxSched_UpdatePortMask										; update 6 insns. no update 3 insns
//
//		if fbi ready count has incremented, move current mask to prev mask
//		new ready true = fbi rec rdy and not prev mask
//
#macro RxSched_UpdatePortMask[port_mask_prev, port_mask_current]
 	alu[--, @rec_rdy_count, -, rec_rdycnt_copy]					; compare new count with old count
	br=0[port_mask_done#], defer[1], guess_branch				; if no change don't update port_masks
	alu[rec_rdycnt_copy, --, B, @rec_rdy_count]					; save a copy of fbi rec_rdy_count
	alu[port_mask_prev2, --, B, port_mask_prev1]				; use to mask of rdy bits this iteration
	alu[port_mask_prev1, --, B, port_mask_current]				; use to mask of rdy bits this iteration
	alu[port_mask_prev, port_mask_prev1, OR, port_mask_prev2]	; use to mask of rdy bits this iteration
	immed[port_mask_current, 0]									; if change, start with fresh port mask
	alu[rec_rdy_true, rec_rdy_true, AND~, port_mask_prev]

port_mask_done#:
#endm


//--------------------------------RECEIVE macros-----------------------------


// Rx_Receive
//	read receive control, check for cancel or error, branch on SOP
//		outputs:
//			xfer_rcv_ctl	read transfer reg gets rcv_ctl
//		next label:
//			discard_label	go here of cancel or error
//
#macro Rx_Receive[xfer_rcv_ctl, cancel_label, fail_discard_label]
	ctx_arb[start_receive]								; wait for fbi signal when receive control can be read
 
    csr[read, xfer_rcv_ctl, RCV_CNTL], ctx_swap			; read the rcv_ctrl register


// format of RCV_CTL is
// +-----+-----+-----+------+----+-----+-----+----+----+-----------+---+---+
// | msg |port |seq# |rxfail|err |elem2|elem1|1or2|seq | validbytes|EOP|SOP|
// |31:30|29:24|23:20|  19  | 18 |17:14|13:10| 9  | 8  |    7:2    | 1 | 0 |
// +-----+-----+-----+------+----+-----+-----+----+----+-----------+---+---+
//

// check for cancel, caused by port not being ready
//
    alu_shf [--, 0x3, -, xfer_rcv_ctl, >>30]			; isolate msg
	br=0[cancel_label], defer[1]
	immed[exception, MAC_CANCEL]							; the port was not ready, there is no packet

// rxfail acts like an EOP
	alu[--, 1, AND, xfer_rcv_ctl, >>19]					; test for MAC error
	br>0[fail_discard_label], defer[1]
	immed[exception, MAC_RXFAIL]					; bad packet from the mac device
#endm


 
// Rx_SetBufAddrs
//	set sdram address and address offset to be used in storing packet in sdram and descriptor in sram
//  used at SOP
//
//	inputs:
//		xfer_descriptor		read xfer reg with descriptor from previous pop
///	outputs:
//		pkt_buf_addr		sdram address for packet data
//		buf_handle			new relative buf address  for packet link info
//
#macro Rx_SetBufAddrs[packet_buf_addr, buf_handle, xfer_descriptor]
.local buffer_base descriptor_base
started#:
// test free buffer pool delivered by the pop 
//
    alu[--, --, B, xfer_descriptor]										; if zero, the free list is empty
    br=0[no_buf_available#], defer[1]									; discard packet, wait for buffers to free up

// calculate sram and sdram addresses
#if (PACKET_FREELIST == 0)
    immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
#endif
#if (PACKET_FREELIST == 1)
    immed32[buffer_base, SDRAM_PKT_BUFFER_HIGH_BASE]
#endif

	immed[descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE, <<16]
	alu_shf[buf_handle, descriptor_base, B-A, xfer_descriptor, <<16]	; calc buffer offset relative to base (buffer size = 2 lw)
	alu[buf_handle, buf_handle, +, 1, <<16]								; add 1 to not use pointer word
	alu[buf_handle, 1, OR, buf_handle]									; for sop, insert packet element count 1
// 8 bytes sram corresponds to 128 sdram offsets(x16), but multiply x8 because sdram is quadword addressed
	br[done#], defer[1]
// note: xfer_descriptor is allocated in chunks of 4 words, therefore this represents a shift <<6 + <<2
// to get 256x8B or 2KB
    alu_shf [packet_buf_addr, buffer_base, +, buf_handle, >>10]			; adjust offset for buffer size 256quadwords
no_buf_available#:
// retry until transmit frees a descriptor
// note: third argument here is ignored for sram pop
	sram[pop, xfer_descriptor, xfer_descriptor, 0, FREELIST_BTYPE_SRAM8_SDRAM2K], ctx_swap
	br[started#]
done#:
.endlocal
#endm

#macro Rx_SetPktBufAddr[packet_buf_addr, buf_handle, rec_state]
	// calculate sram and sdram addresses
	#ifdef ALT_BANKS
		#ifdef REC_STATE_SAVE
			alu[--, 1, AND, rec_state, >>29]
			br>0[freelist_hi#]
			br[set_addr#], defer[1]
  			immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
freelist_hi#:
    		immed32[buffer_base, SDRAM_PKT_BUFFER_HIGH_BASE]
set_addr#:

		#else	// not REC_STATE_SAVE
			#if (PACKET_FREELIST == 0)
  				immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
			#endif
			#if (PACKET_FREELIST == 1)
 				immed32[buffer_base, SDRAM_PKT_BUFFER_HIGH_BASE]
			#endif
		#endif	// not REC_STATE_SAVE
	#else		// not ALT_BANKS
	    immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
	#endif 		// not ALT_BANKS

	alu_shf[packet_buf_addr, buffer_base, +, buf_handle, >>10]			; adjust offset <<3 for buffer size
    // tlaw -- <<3 = *8
	alu_shf[packet_buf_addr, packet_buf_addr, +16, buf_handle, <<3]		; add mpacket_count * 8 quadwords (64 bytes)
	alu[buf_handle, 1, +, buf_handle]									; increment mpacket count
#endm

// tlaw - Nov 13, 2000
#macro Rx_SetPktBufAddr_w_base[packet_buf_addr_base, packet_buf_addr, buf_handle, rec_state]
	// calculate sram and sdram addresses
	#ifdef ALT_BANKS
		#ifdef REC_STATE_SAVE
			alu[--, 1, AND, rec_state, >>29]
			br>0[freelist_hi#]
			br[set_addr#], defer[1]
  			immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
freelist_hi#:
    		immed32[buffer_base, SDRAM_PKT_BUFFER_HIGH_BASE]
set_addr#:

		#else	// not REC_STATE_SAVE
			#if (PACKET_FREELIST == 0)
  				immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
			#endif
			#if (PACKET_FREELIST == 1)
 				immed32[buffer_base, SDRAM_PKT_BUFFER_HIGH_BASE]
			#endif
		#endif	// not REC_STATE_SAVE
	#else		// not ALT_BANKS
	    immed32[buffer_base, SDRAM_PKT_BUFFER_BASE]
	#endif 		// not ALT_BANKS

	alu_shf[packet_buf_addr_base, buffer_base, +, buf_handle, >>10]			; adjust offset <<3 for buffer size
    // tlaw -- <<3 = *8
	alu_shf[packet_buf_addr, packet_buf_addr_base, +16, buf_handle, <<3]		; add mpacket_count * 8 quadwords (64 bytes)
	alu[buf_handle, 1, +, buf_handle]									; increment mpacket count
#endm



// Rx_SaveFastState
//	save buf_handle, status	for another thread to pickup
//		inputs:
//			buf_handle	31:16	relative_buf_addr 
//						15:0	mpacket_count
//			status			
//
#macro Rx_SaveFastState[buf_handle, status, mseq]
.local tempa tempc mseqp2
	#ifdef FAST_PORT1
		immed[tempa, SRAM_MPACKET_MAILBOX_1]
	#else ;FAST_PORT2
		immed[tempa, SRAM_MPACKET_MAILBOX_2]
	#endif
	alu[mseqp2, mseq, +, 2]
	alu[mseqp2, 0x1e, AND, mseqp2]					; range is 0-31
	alu[status, status, AND~, 0x1f, <<24]			; clear out old mpacket count
	alu[tempc, buf_handle, and, 0x1f]				; extract mpacket count
	alu[status, status, or, tempc, <<24]			; stick mpakcet count into 28:24
	immed32[tempc, 0x7ff]
	alu[status, status, AND~, tempc, <<8]			; clear out old rel buf addr
	alu[tempc, tempc, AND, buf_handle, >>16]		; extract rel buf addr from 31:16, and into 10:0
	alu[status, status, or, tempc, <<8]				; stick rel buf addr in 18:8
	alu[$xfer0, status, OR, 1, <<31]				; set valid bit

	sram[write, $xfer0, tempa, mseqp2, 1], ctx_swap	; write longword containing state	
.endlocal
#endm




// Rx_RestoreFastState
//	restore buf_handle, status from another thread on fast source port
//		outputs:
//			buf_handle	31:16	relative_buf_addr 
//						15:0	mpacket_count
//			status			
//
#macro Rx_RestoreFastState[buf_handle, status]
.local tempa tempb tempc
	#ifdef FAST_PORT1
		immed[tempa, SRAM_MPACKET_MAILBOX_1]
	#endif
	#ifdef FAST_PORT2
		immed[tempa, SRAM_MPACKET_MAILBOX_2]
	#endif
	alu_shf[tempb, 0x1e, AND, $xfer7, >>13]			; get mpacket sequence
	alu[$xfer0, --, B, 1, <<31]						; bit to test

attempt_restore#:
	sram[bit_wr, $xfer0, tempa, tempb, test_and_clear_bits], ctx_swap	; retrieve vbit/buf_handle	
	alu[--, 1, AND, $xfer0, >>31]					; if bit 31 off, state is invalid
	br=0[attempt_restore#], defer[1]
	alu[buf_handle, 0x1f, AND, $xfer0, >>24]		; move from 28:24 to 4:0

	immed32[tempa, 0x7ff00]
	alu[tempa, tempa, AND, $xfer0]					; extract rel_buf_addr from 18:8
	alu[buf_handle, buf_handle, or, tempa, <<8]		; shift rel_buf_addr up to 26:16]
	alu[status, --, B, $xfer0]
.endlocal
#endm


// Rx_InvalidateFastState
//	perform test and set with invalidate
//
#macro Rx_TestAndInvalidateFastState[mseq]
.local tempa
	#ifdef FAST_PORT1
		immed[tempa, SRAM_MPACKET_MAILBOX_1]
	#endif
	#ifdef FAST_PORT2
		immed[tempa, SRAM_MPACKET_MAILBOX_2]
	#endif
	alu[$xfer5, --, B, 1, <<31]							; bit to test and clear

attempt_restore#:
	sram[bit_wr, $xfer5, tempa, mseq, test_and_clear_bits], ctx_swap	; retrieve vbit/buf_handle	
	alu[--, 1, AND, $xfer5, >>31]						; if bit 31 off, state is invalid
	br=0[attempt_restore#]
.endlocal
#endm


// Rx_InvalidateFastState
//	invalidate, no test and set
//
#macro Rx_InvalidateFastState[mseq]
.local tempa
	#ifdef FAST_PORT1
		immed[tempa, SRAM_MPACKET_MAILBOX_1]
	#endif
	#ifdef FAST_PORT2
		immed[tempa, SRAM_MPACKET_MAILBOX_2]
	#endif
	alu[$xfer0, --, B, 1, <<31]								; bit to test and clear
	sram[bit_wr, $xfer0, tempa, mseq, clear_bits], ctx_swap	; retrieve vbit/buf_handle	
.endlocal
#endm


// Rx_ValidateNextFastState
//
#macro Rx_ValidateNextFastState[mseq]
.local tempa mseqp2
	alu[mseqp2, mseq, +, 2]
	alu[mseqp2, 0x1e, AND, mseqp2]					; range is 0-31
	#ifdef FAST_PORT1
		immed[tempa, SRAM_MPACKET_MAILBOX_1]
	#else ;FAST_PORT2
		immed[tempa, SRAM_MPACKET_MAILBOX_2]
	#endif
	alu[$xfer1, --, B, 1, <<31]						; set valid bit
	sram[write, $xfer1, tempa, mseqp2, 1]			; write	0 to slot mailbox
.endlocal
#endm


// state_save
// save state to a scratch location, set valid bit 31 on.
//
//		xfer_reg			write transfer reg to be used
//		state				state to be saved
//		const_scratch_base	scratch location
//		offset				offset from const_scratch_base				  
//
#macro state_save[xfer_reg, state, const_scratch_base, offset]
	immed[tempa, const_scratch_base]
	alu[xfer_reg,  state, OR, 1, <<31]
	scratch[write, xfer_reg, tempa, offset, 1], ctx_swap
#endm


// state_restore
// restore state from scratch, state is valid if bit 31 is on.
//
//		xfer_reg			write transfer reg for test and clear valid bit 31
//							read transfer reg for return data 
//		state				state to be restored
//		const_scratch_base	scratch location
//		offset				offset from const_scratch_base				  
//
#macro state_restore[xfer_reg, state, const_scratch_base, offset]
	immed[tempa, const_scratch_base]
	alu_shf[xfer_reg, --, B, 1, <<31]
attempt_restore#:
	scratch[bit_wr, xfer_reg, tempa, offset, test_and_clear_bits], ctx_swap
	alu_shf[--, 1, AND, xfer_reg, >>31]
	br=0[attempt_restore#]
	alu[state, xfer_reg, AND~, 1, <<31]				// clear the valid bit
#endm


// enqueue packets from FAST PORT in order
// read the fbi fast port sequence number until it matches our sequence number
//
#macro FastPort_SeqCheck[CSR_NAME, seq_num]
sleep_wait_for_seq_change#:
	csr[read, $sequence_num, CSR_NAME], ctx_swap				; read the current seq number
	alu[--, seq_num, -, $sequence_num]							; is my number up?
	br=0[seq_check_done#]
	br[sleep_wait_for_seq_change#]
seq_check_done#:
#endm



// define REC_PORT_BLOCK to cause receive threads to block the port
// if not defined, alternatively you can block ports in the receive scheduler by adding 17 instructions
// into the scheduler loop to save port to thread binding, set a bit in port_block mask when assigning,
// and to get port to thread binding, clear a bit port_block mask when receive thread done
//
// if receive threads maximum processing time is less than network time between packets
// port blocking for slow ports might be avoided
// 
//#define REC_PORT_BLOCK

// Rx_SaveSlowState
//
// bit 31 of buf_handle  is used as valid bit
//
#macro Rx_SaveSlowState[buf_handle, status, output_port]
.local temp6 temp7 temp8
	immed[temp6, REC_SLOW_PORT_STATE]
	alu[temp7, 0x1e, AND, status, >>23]				; input port
	immed32[temp8, 0x7ff]
	alu[status, status, AND~, temp8, <<8]			; clear out old rel buf addr
	alu[temp8, temp8, AND, buf_handle, >>16]		; peel off rel buf addr
	alu[status, status, or, temp8, <<8]				; insert rel buf addr in bits 18:8
	alu[temp8, --, b, 0x1f]
	alu[status, status, AND~, temp8, <<24]			; clear input port, bits 27:24 
	alu[temp8, temp8, and, buf_handle]				; peel off mpacket count
	alu[status, status, or, temp8, <<24]			; insert mpacket count in 28:24 (over input port, not needed)
	ld_field[status, 0001, output_port]				; insert output port over byte cnt, eop, sop (which are no longer needed)

	alu[$xfer0, status, OR, 1, <<31]				; turn bit 31 on to validate state and unblock port
	sram[write, $xfer0, temp6, temp7, 1], ctx_swap	; write 1 longword containing state	
//	alu[$xfer0, buf_handle, AND~, 1, <<31]			; turn bit 31 off to validate state and unblock port
//	alu[$xfer1, --, B, status]
//	sram[write, $xfer0, temp6, temp7, 2], ctx_swap	; write 2 longwords containing state	
.endlocal
#endm


// tlaw - Nov 13, 2000
#macro Rx_SaveSlowState_w_pkt_label[buf_handle, status, pkt_label]
.local temp6 temp7 temp8
	immed[temp6, REC_SLOW_PORT_STATE]
	alu[temp7, 0x1e, AND, status, >>23]				; input port
	immed32[temp8, 0x7ff]
	alu[status, status, AND~, temp8, <<8]			; clear out old rel buf addr
	alu[temp8, temp8, AND, buf_handle, >>16]		; peel off rel buf addr
	alu[status, status, or, temp8, <<8]				; insert rel buf addr in bits 18:8
	alu[temp8, --, b, 0x1f]
	alu[status, status, AND~, temp8, <<24]			; clear input port, bits 27:24 
	alu[temp8, temp8, and, buf_handle]				; peel off mpacket count
	alu[status, status, or, temp8, <<24]			; insert mpacket count in 28:24 (over input port, not needed)
//	ld_field[status, 0001, output_port]				; insert output port over byte cnt, eop, sop (which are no longer needed)
// tlaw - override output_port
    // 00 - normal
	// 01 - inbound
	// 10 - outbound
	alu[temp8, --, b, 0x3]
	alu[status, status, AND~, temp8]			; clear old bit 1:0
    alu[status, status, or, pkt_label]			; pkt_label

	alu[$xfer0, status, OR, 1, <<31]				; turn bit 31 on to validate state and unblock port
	sram[write, $xfer0, temp6, temp7, 1], ctx_swap	; write 1 longword containing state	
//	alu[$xfer0, buf_handle, AND~, 1, <<31]			; turn bit 31 off to validate state and unblock port
//	alu[$xfer1, --, B, status]
//	sram[write, $xfer0, temp6, temp7, 2], ctx_swap	; write 2 longwords containing state	
.endlocal
#endm

// Rx_EOPUnBlockSlowPort
//	unblock slow port
//	note: this is used between packets
//		to guarantee blocking between mpackets, Rx_SaveSlowState and Rx_RestoreSlowState are used
//
#macro Rx_EOPUnBlockSlowPort[status]
#ifdef REC_PORT_BLOCK
.local tempx tempy
	immed[tempx, REC_SLOW_PORT_STATE]
	alu[tempy, 0x1e, AND, status, >>23]				; input port
	alu[$xfer0, --, b, 1, <<31]						; set bit 31 to unblock port
	sram[write, $xfer0, tempx, tempy, 1]			; write 1 longword	
.endlocal
#endif
#endm


// Rx_RestoreSlowState
//
// bit 31 of buf_handle  is used as valid bit
//
#macro Rx_RestoreSlowState[output_port, buf_handle, status, receive_ctl]
.local temp3 temp4 temp5
	immed[temp3, REC_SLOW_PORT_STATE]
	alu[temp4, 0x1e, AND, receive_ctl, >>23]			; input port
	alu[$xfer0, --, B, 1, <<31]							; prepare to set bit 31
attempt_restore#:
	sram[bit_wr, $xfer0, temp3, temp4, test_and_clear_bits], ctx_swap	; retrieve 1st longword, mark the state invalid 
	alu[--, 1, AND, $xfer0, >>31]						; test bit 31
	br=0[attempt_restore#]								; if on, state is invalid and port is blocked, retry

	alu[status, --, B, $xfer0]
	immed32[temp5, 0x7ff]
	alu[buf_handle, temp5, AND, status, >>8]			; peel off rel buf addr
	alu[buf_handle, --, b, buf_handle, <<16] 			; shift buf_handle up to 26:16
	alu[temp5, --, b, 0x1f]
	alu[temp5, temp5, AND, status, >>24]				; peel off mpacket count
	alu[buf_handle, buf_handle, or, temp5]				; insert mpacket count in 4:0			
	ld_field_w_clr[output_port, 0001, status]			; set output port
	alu[status, status, AND~, 0xf, <<24]				; clear old the mpacket count
	alu[status, status, or, temp4, <<23]				; insert input port (temp4 set above = input port * 2)
.endlocal
#endm

// tlaw
#macro Rx_RestoreSlowState_w_pkt_label[pkt_label, buf_handle, status, receive_ctl]
.local temp3 temp4 temp5
	immed[temp3, REC_SLOW_PORT_STATE]
	alu[temp4, 0x1e, AND, receive_ctl, >>23]			; input port
	alu[$xfer0, --, B, 1, <<31]							; prepare to set bit 31
attempt_restore#:
	sram[bit_wr, $xfer0, temp3, temp4, test_and_clear_bits], ctx_swap	; retrieve 1st longword, mark the state invalid 
	alu[--, 1, AND, $xfer0, >>31]						; test bit 31
	br=0[attempt_restore#]								; if on, state is invalid and port is blocked, retry

	alu[status, --, B, $xfer0]
	immed32[temp5, 0x7ff]
	alu[buf_handle, temp5, AND, status, >>8]			; peel off rel buf addr
	alu[buf_handle, --, b, buf_handle, <<16] 			; shift buf_handle up to 26:16
	alu[temp5, --, b, 0x1f]
	alu[temp5, temp5, AND, status, >>24]				; peel off mpacket count
	alu[buf_handle, buf_handle, or, temp5]				; insert mpacket count in 4:0			
//	ld_field_w_clr[output_port, 0001, status]			; set output port
    alu[pkt_label, status, AND, 0x3]				; pkt_label bit 1:0

	alu[status, status, AND~, 0xf, <<24]				; clear old the mpacket count
	alu[status, status, or, temp4, <<23]				; insert input port (temp4 set above = input port * 2)
.endlocal
#endm

// Rx_SOPBlockSlowPort
//	take block slow port
//	note: this is used between packets
//		to guarantee blocking between mpackets, Rx_SaveSlowState and Rx_RestoreSlowState are used
//
#macro Rx_SOPBlockSlowPort[receive_ctl]
#ifdef REC_PORT_BLOCK
.local temp1 temp2
	immed[temp1, REC_SLOW_PORT_STATE]
	alu[temp2, 0x1e, AND, receive_ctl, >>23]		; input port
	alu[$xfer0, --, B, 1, <<31]						; prepare to clear bit 31
attempt_restore#:
	sram[bit_wr, $xfer0, temp1, temp2, test_and_clear_bits], ctx_swap	; mark the state invalid, block the port
	alu[--, 1, AND, $xfer0, >>31]					; if bit 31 was off, state is invalid, port blocked
	br=0[attempt_restore#]

.endlocal
#endif
#endm


//----------------------------IP CHECKSUM macros-------------------------


// IP_VerifyChecksum_HwLE
//	ip header verify checksum, halfword aligned, little-endian
//
//		verify ip header checksum
//		20 byte ip header (10 halfwords)
//		registers starting on a halfword boundary
//		little-endian header arguments in 6 registers
//		22 uwords
//
//	parameters:
//		hw0		halfword 0 from bytes 2-3 of len_version (leftmost, little-endian)
//		hw12, hw34, hw56, hw78		two halfwords each, 1-8
//		hw9		halfword 9 in bytes 0-1 (rightmost, little-endian)
//
//
//	resulting condition code: 
//		non-zero = bad checksum , branch to errlabel
//	
//
#macro IP_VerifyChecksum_HwLE[hw0, hw12, hw34, hw56, hw78, hw9, errlabel]
.local temp accum

	add_ea32_eb[accum, hw0, hw12]				; accum = swap(hw0 right hw) + swap(hw12)
	add_ea_c[accum, hw34, accum]				; accum = swap(hw34) + accum +carry
	add_ea_c[accum, hw56, accum]				; accum = swap(hw78) + accum +carry
	add_ea_c[accum, hw78, accum]				; accum = swap(hw78) + accum +carry
	add_ea10_c[accum, hw9, accum]				; accum = swap(hw9) + accum +carry
    add_c[accum, accum, 0]						; add in previous carry
    ld_field_w_clr[temp, 1100, accum]			; get high 16 of the total
    alu_shf[accum, temp, +, accum, <<16]		; add low 16 bits to upper 16
	alu[temp, 1, +carry, temp, <<16]			; add last carry +1, temp B op=0
	alu[accum, accum, +, temp, <<16]			; add 1<<16 to 0xffff to get zero result
	br!=0[errlabel]								; bad checksum if not zero

.endlocal
#endm


// IP_VerifyChecksum_Hw
//  ip verify checksum. halfword aligned
//
//	if LITTLE_ENDIAN is #defined, do little-endian. otherwise do big-endian
//		
//	assumes no options, 20 byte header
//	if link layer was Ethernet, IP header aligns on a halfword boundary
//  therefore no options ip header consists  right 2 bytes of hw0,
//	followed by next 16 bytes in hw12, hw34, hw 56, hw 78
//  followed by left two bytes of hw9
//
//  branch to errlabel if bad checksum
//
#macro IP_VerifyChecksum_Hw[hw0, hw12, hw34, hw56, hw78, hw9, errlabel]
#ifdef LITTLE_ENDIAN
	IP_VerifyChecksum_HwLE[hw0, hw12, hw34, hw56, hw78, hw9, errlabel]
#else
.local temp accum
	alu[accum, hw12, +16, hw0]
	alu[accum, hw34, +carry, accum]
	alu[accum, hw56, +carry, accum]
	alu[accum, hw78, +carry, accum]
	alu[accum, accum, +carry, hw9, >>16]
	alu[accum, accum, +carry, 0]				; add in previous carry
    ld_field_w_clr[temp, 1100, accum]			; get high 16 of the total
    alu_shf[accum, temp, +, accum, <<16]		; add low 16 bits to upper 16
	alu[temp, 1, +carry, temp, <<16]			; add last carry +1, temp B op=0
	alu[accum, accum, +, temp, <<16]			; add 1<<16 to 0xffff to get zero result
	br!=0[errlabel]								; bad checksum if not zero
.endlocal
#endif
#endm



