// xmit_fill_8x100.uc
//		3 tx_fill threads, 8 ports x 100Mbit
//		used on 2 microengines
//		1 microengine takes even tfifo elements, the other takes odd tfifo elements
//		each element is statically bound to a port.

//		each microengine: define personality  ODD or not ODD
//
		
tx_fill#:
.local pwp_addr temp tfifo_entry queue bank_mask bank queue_descriptor_base pkt_buffer_base buf_descriptor_base const_0 const_fc bit15on bit20on port_info head_ptr tail_ptr ele_remaining buf_offset status_byte prev_entry
//-------------------------------macros--------------------------------------------


// if port is not ready, this is called to skip the associated tfifo element
//
#macro Tx_SkipElement[tfifo_entry]
.local bank _tfifo_entry_m1 _temp

	// when sdram is setting valid bits, we have to issue a dummy request
	// 
	#ifdef SDRAM_VALIDATE
		#ifdef ODD									; cause skips to go to different banks
			#ifdef BANK_SIZE_16MB
				alu_shf[bank, --, B, 1, <<21]		; bank_mask = 22:21
			#else									; not BANK_SIZE_16MB
				alu_shf[bank, --, B, 1, <<20]		; bank_mask - 21:20
			#endif									; not BANK_SIZE_16MB 
		#else										; not ODD	
			immed[bank, 0]
		#endif										; not ODD
		alu_shf[--, bit15on, or, tfifo_entry, <<7]	; put element no. in 10:7
		sdram[t_fifo_wr, --, pkt_buffer_base, bank, 1], indirect_ref, sig_done	; must send 1 auto validate
	#endif											; SDRAM_VALIDATE

	alu[$tfifo_ctl_wd0, tfifo_entry, OR, 1, <<7]			; set skip bit
	alu_shf[temp, --, B, tfifo_entry, <<1]					; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]							;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1],  ctx_swap		; write the status word

	ctx_arb[inter_thread]									; wait for our turn to validate

wait_for_xmit_ptr#:
	// The xmit_ptr must be pointing to either fifo element "tfifo_entry" or "tfifo_entry - 1"
	csr[read, $xmit_ptr, XMIT_PTR], ctx_swap
	alu[--, $xmit_ptr, -, tfifo_entry]
	br=0[write_validate#] 

	//check to see if ptr = tfifo_entry - 1
	alu[_tfifo_entry_m1, tfifo_entry, -, 1]
	alu[_tfifo_entry_m1, _tfifo_entry_m1, and, 0xf]			; limit to 0..15
	alu[--, $xmit_ptr, -, _tfifo_entry_m1]
	br!=0[wait_for_xmit_ptr#] 

write_validate#:

	alu_shf[--, bit15on, OR, tfifo_entry, <<5]				; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref					; tell fbi to run with that element no.

	//signal next thread telling it that it is its turn to validate
	br=ctx[1, ctx1#]
	br=ctx[2, ctx2#]
ctx3#:
	fast_wr[THREAD_1, inter_thd_sig]
	br[end_tx_skip#]
ctx2#:
	fast_wr[THREAD_3, inter_thd_sig]
	br[end_tx_skip#]
ctx1#:
	fast_wr[THREAD_2, inter_thd_sig]
end_tx_skip#:
.endlocal
#endm

// Tx_ReadAssignment
//		get next transmit assignment from the tx_scheduler
//
//		input/output	abs_assign		skip bit, 7:4 = output port, 3:0 = queue	
//		outputs:
//						tfifo_entry		transmit fifo elements (0-15) = port * 2
//						queue			queue (data is shifted up to bits 7:4


#macro Tx_ReadAssignment[a_tfifo_entry, queue, abs_assign, took_label, skip_label]

wait_for_assignment#:
	ctx_arb[voluntary]								; let other threads go
	alu[--, --, b, abs_assign]
	br>=0[have_assignment#], defer[1]				; defer[2] in the assumption that we have an assignment

	alu[queue, const_0, +8, abs_assign]	            ; peal off queue number (= zero)

	br[wait_for_assignment#]						; loop waiting for assignment

have_assignment#:

	alu[--, abs_assign, AND, skip_bit_on]							; test for skip
	br=0[took_label], defer[2]
	sem_flip[abs_assign]											; let sched assign next
	#ifdef FAST_PORT1
		// If FAST_PORT1 is define then we are running eval card.  8 10/100 ports map to even fifo elements.
		alu[a_tfifo_entry, 0, +4, queue, >>3]                       ; tfifo_entry = port * 2
	#else
		// If FAST_PORT1 NOT defined, then running validation system.  16 10/100 ports map directly to fifo elements (1:1).
		alu[a_tfifo_entry, 0, +4, queue, >>4]                       ; tfifo_entry = port
	#endif
skip#:
	Tx_SkipElement[a_tfifo_entry]
	br[skip_label]
#endm

// port_info
// 	these absolute registers are used to hold descriptive information of long packets locally
// 	thus reducing sram accesses
// 	it can handle state info for up to 16 ports
// 	with 2 transmit engines, 32 ports could be supported (for 16x2, some work on jump offset needs to be done)


// Tx_RestorePortInfo							; 7 insns								
//	check for long packet continuation
//
//	inputs:	queue		8:4 port number 
//	output: port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 bank	
//						15:0 buf_offset 	
//
#macro Tx_RestorePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]			; each target is offset by port*2
	jump[temp, restore_0#], 
		targets[restore_0#, restore_1#, restore_2#, restore_3#, restore_4#, restore_5#, restore_6#, restore_7#,
			restore_8#, restore_9#, restore_10#, restore_11#, restore_12#, restore_13#, restore_14#, restore_15#],
		defer[3]
	nop
	nop
	nop
restore_0#:
	br[end#], defer[1]
	alu[port_info, --, B, @port0_in_prog]
restore_1#:
	br[end#], defer[1]
	alu[port_info, --, B, @port1_in_prog]
restore_2#:
	br[end#], defer[1]
	alu[port_info, --, B, @port2_in_prog]
restore_3#:
	br[end#], defer[1]
	alu[port_info, --, B, @port3_in_prog]
restore_4#:
	br[end#], defer[1]
	alu[port_info, --, B, @port4_in_prog]
restore_5#:
	br[end#], defer[1]
	alu[port_info, --, B, @port5_in_prog]
restore_6#:
	br[end#], defer[1]
	alu[port_info, --, B, @port6_in_prog]
restore_7#:
	br[end#], defer[1]
	alu[port_info, --, B, @port7_in_prog]
restore_8#:
	br[end#], defer[1]
	alu[port_info, --, B, @port8_in_prog]
restore_9#:
	br[end#], defer[1]
	alu[port_info, --, B, @port9_in_prog]
restore_10#:
	br[end#], defer[1]
	alu[port_info, --, B, @port10_in_prog]
restore_11#:
	br[end#], defer[1]
	alu[port_info, --, B, @port11_in_prog]
restore_12#:
	br[end#], defer[1]
	alu[port_info, --, B, @port12_in_prog]
restore_13#:
	br[end#], defer[1]
	alu[port_info, --, B, @port13_in_prog]
restore_14#:
	br[end#], defer[1]
	alu[port_info, --, B, @port14_in_prog]
restore_15#:
	alu[port_info, --, B, @port15_in_prog]
end#:	
#endm



// Tx_SavePortInfo									; 7 insns
//	save elements remaining, status byte and buffer offset in global port in progress info
//
//	inputs
//		status_byte		byte enables for last element			to 31:26
//		ele_remaining	elements left to send in the packet		to 23:19
//		bank			freelist id								to 18:16
//		buf_offset		elements offset in sdram (elements)		to 15:0
//		queue			8:4 port number 
//
#macro Tx_SavePortInfo[status_byte, ele_remaining, bank, buf_offset, queue] 
	alu[temp, 0x1e, AND, queue, >>3]			; each target is offset by port*2
	alu_shf[ele_remaining, ele_remaining, -, 1]				; decrement elements remaining
	jump[temp, save_0#],  
		targets[save_0#, save_1#, save_2#, save_3#, save_4#, save_5#, save_6#, save_7#,
				save_8#, save_9#, save_10#, save_11#, save_12#, save_13#, save_14#, save_15#],
		defer[3]
#ifdef BANK_SIZE_16MB
	alu_shf[port_info, buf_offset, OR, bank, >>5]
#else														; if 8MB BANK size
	alu_shf[port_info, buf_offset, OR, bank, >>4]
#endif
	alu_shf[port_info, port_info, OR, status_byte, <<24]
	alu_shf[port_info, port_info, OR, ele_remaining, <<19]
save_0#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
save_1#:
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
save_2#:
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
save_3#:
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
save_4#:
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
save_5#:
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
save_6#:
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
save_7#:
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
save_8#:
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
save_9#:
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
save_10#:
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
save_11#:
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
save_12#:
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
save_13#:
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
save_14#:
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
save_15#:
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_UpdatePortInfo									; 7 insns								
//	decrement elements remaining in port_info and save it in global port in progress info
//
//	inputs:	
//			port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 bank	
//						15:0 buf_offset 	
//			queue		8:4 port number
//
#macro Tx_UpdatePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]			; each target is offset by port*2
	jump[temp, update_0#], 
		targets[update_0#, update_1#, update_2#, update_3#, update_4#, update_5#, update_6#, update_7#,
			update_8#, update_9#, update_10#, update_11#, update_12#, update_13#, update_14#, update_15#],
		defer[3]
	alu_shf[port_info, port_info, -, 1, <<19]		; decrement elements remaining
	nop
	nop
update_0#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
update_1#:
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
update_2#:
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
update_3#:
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
update_4#:
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
update_5#:
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
update_6#:
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
update_7#:
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
update_8#:
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
update_9#:
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
update_10#:
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
update_11#:
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
update_12#:
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
update_13#:
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
update_14#:
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
update_15#:
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_ReadLock
//	get queue descriptor 2 words and packet link (1 or 2 words)
//
//	inputs:
//		pl_wordcount			get 1 or 2 packet_link words
//	outputs:
//		$queue_descriptor0		31:16 head relative address, 15:0 tail relative address
//		$queue_descriptor1		packet count
//		$packet_link0			31:16 NPL, 15:0 element count or multicast pointer
//		$packet_link1			15:0 status with last element byte enables
//		head_ptr				queue head pointer
//		
// packet_link format:
//										$packet_link0
//   		 +---------------------------------------------------------------------------+
//			 |						  NPL (next packet link)						     |
//			 |							     31:0									     |
//			 +---------------------------------------------------------------------------+
//
//										$packet_link1
// from		 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
// rec_state |FPORT|qselect|inport|F seq |freelist|qw/byte count|eop|qtype|discard|outport|
//			 |     |       |      |      |  bank  |             |   |     |       |  ele# |
//			 | 31  | 30:28 |27:24 |23:20 |  19:16 |     15:10   | 9 |  8  |   7   |  6:0  |
//			 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
//
#macro Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]
	sram[read_lock, $queue_descriptor0, queue_descriptor_base, queue, 2], priority, 
					ctx_swap						; read the queue descriptor 2 words
	alu_shf[head_ptr, --, B, $queue_descriptor0, >>16]		; isolate next packet link
#endm



// Tx_SendLastData
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		status_byte			last element quadword and byte enables
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendLastData[bank, buf_offset, status_byte, tfifo_entry]
.local indirect qw_offset
	alu_shf[qw_offset, bank, OR, buf_offset, <<3]
	alu[indirect, 0x7, AND, status_byte, >>5]							; extract quadword count from status
	#ifdef SDRAM_VALIDATE
		alu[indirect, bit20_15on, OR, indirect, <<16]				; place quadword count in 19:16
	#else
		alu[indirect, bit20on, OR, indirect, <<16]					; place quadword count in 19:16
	#endif
	alu_shf[--, indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
	sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, priority, sig_done		; no signal if sdram validate
	// note: ctx_arb[sdram] for this happens in Tx_Validate when no SDRAM_VALIDATE.
	// 		 when SDRAM_VALIDATE is defined, the sdram signal is not picked up.
.endlocal
#endm



// Tx_SendData
//		transfer 8 quadfords from sdram to tfifo element
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendData[bank, buf_offset, tfifo_entry]
.local indirect qw_offset
	alu_shf[qw_offset, bank, OR, buf_offset, <<3]
	#ifdef SDRAM_VALIDATE ; if SDRAM_VALIDATE then set bit 15, which tells the FBI to validate tfifo entry
		alu[indirect, bit20_15on, OR, 7, <<16]					; place quadword count 7 in 19:16
	#else
		alu[indirect, bit20on, OR, 7, <<16]						; place quadword count 7 in 19:16
	#endif
	alu_shf[--, indirect, OR, tfifo_entry, <<7]					; put element no. in 10:7
	sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, priority, sig_done				; no signal if sdram validate
.endlocal
#endm



// Tx_ClearPortVector												; 5 insns
//	clear the ports with packets vector bit location to flag a queue going empty
//
//	inputs:
//		queue		8:4 port number
//					3:1 qselect for that port
//			
#macro Tx_ClearPortVector[queue]
.local portnum
	alu_shf[portnum, --, B, queue, >>4]								; remember queue1 was shifted <<1 before
	alu_shf[portnum, portnum, B-A, 1, <<5]							; indirect shift left 32 - portnum
	alu[--, portnum, B, 0]
	alu_shf[$xfer3, --, B, 1, <<indirect]
	scratch [bit_wr, $xfer3, pwp_addr, 0, clear_bits]					; clear bit for this port
.endlocal
#endm



// Tx_WriteUnlock													; 13 insns
//	insert new head pointer and update packet count for a queue
//
//	inputs:
//		queue					offset from queue_descriptor_base
//		tail_ptr				15:0 previous tail
//		$packet_link0			new head
//		$queue_descriptor1		packet count
//
//	constant input:
//		queue_descriptor_base	array of all queues, each occupies 2 longwords
//
#macro Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]
.local que_packet_count
	alu[que_packet_count, $queue_descriptor1, -, 1]					; decrement the element count
	br>0[packets_remaining#]
		Tx_ClearPortVector[queue]									; 5 insns flag scheduler port has no packets
packets_remaining#:
	ld_field[tail_ptr, 1100, $packet_link0, <<16]					; merge existing tail with new head_ptr
	alu_shf[$queue_descriptor0, --, B, tail_ptr];
	ld_field_w_clr[$queue_descriptor1, 0011, que_packet_count]
	sram[write_unlock, $queue_descriptor0, queue_descriptor_base, queue, 2], priority
.endlocal
#endm



#macro Tx_Unlock[queue]
	sram[unlock, --, queue_descriptor_base, queue, 1], priority
#endm



// Tx_Validate_8x100														; 8 insns + tfifo_wr ~30 
//	write status and transmit validate
//
//	inputs:
//		status_byte		byte enables
//		queue			8:4 port
//						3:1 qselect
//
#macro Tx_Validate_8x100[$tfifo_ctl_wd0, a_tfifo_entry]
.local port _tfifo_entry_m1
	alu_shf[temp, 0, +8, a_tfifo_entry, <<1]						; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1], ctx_swap							; write the status word

	ctx_arb[inter_thread]	;wait for our turn

xmit_ptr_wait#:

	csr[read, $xmit_ptr, XMIT_PTR]
	csr[read, $tx_rdy_copy, XMIT_RDY_LO], ctx_swap

	//wait for xmit_ptr to point to a_tfifo_entry or a_tfifo_entry - 1
	alu[--, $xmit_ptr, -, a_tfifo_entry]
	br=0[port_wait_loop#] 

	//check to see if ptr = tfifo_entry - 1
	alu[_tfifo_entry_m1, a_tfifo_entry, -, 1]
	alu[_tfifo_entry_m1, _tfifo_entry_m1, and, 0xf]		; limit to 0..15
	alu[--, $xmit_ptr, -, _tfifo_entry_m1]
	br!=0[xmit_ptr_wait#] 

port_wait_loop#:
	//  if port not ready wait for port to be ready
	#ifdef FAST_PORT1
		alu[port, --, b, a_tfifo_entry, >>1]                            ; port = fifo_entry / 2
	#else
		alu[port, --, b, a_tfifo_entry]                          	  	; port = fifo_entry
	#endif
	alu[--, port, OR, 0]
	alu[--, 1, AND, $tx_rdy_copy, >>indirect]
	br>0[write_validate#]
	csr[read, $tx_rdy_copy, XMIT_RDY_LO], ctx_swap
	br[port_wait_loop#]

write_validate#:
	#ifndef SDRAM_VALIDATE
		ctx_arb[sdram]													; wait for packet data to be transferred to tfifo
	#endif
	alu_shf[--, bit15on, OR, a_tfifo_entry, <<5]					; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.

	//signal next thread telling it that it is its turn to write a valid bit
	br=ctx[1, ctx1#]
	br=ctx[2, ctx2#]
ctx3#:
	fast_wr[THREAD_1, inter_thd_sig]
	br[end_tx_validate#]
ctx2#:
	fast_wr[THREAD_3, inter_thd_sig]
	br[end_tx_validate#]
ctx1#:
	fast_wr[THREAD_2, inter_thd_sig]
end_tx_validate#:
.endlocal
#endm



// Tx_FreeBuf														; 3 insns
//	free the packet buffer
//
#macro Tx_FreeBuf[queue_descriptor0, bank]
.local descriptor_ptr
	#ifdef PROFILE
		immed[tempa, TOTAL_TRANSMITS]
		scratch[incr, --, tempa, 0, 1]
	#endif
	alu_shf[descriptor_ptr, 1, B-A, queue_descriptor0, >>16]	; relative address
	#ifdef BANK_SIZE_16MB
		alu_shf[--, bit20on, OR, bank, >>5]						; merge ov bit with freelist id/bank
	#else														; if 8MB BANK size
		alu_shf[--, bit20on, OR, bank, >>4]						; merge ov bit with freelist id/bank
	#endif
	sram[push, --, descriptor_ptr, buf_descriptor_base, 0], indirect_ref
.endlocal
#endm



//-------------------------------end macros----------------------------------------

		
.xfer_order $xfer0 $xfer1 $xfer2 $xfer3 $xfer4 $xfer5
.operand_synonym $queue_descriptor0 $xfer0			; queue head and tail pointers from/to queue
.operand_synonym $queue_descriptor1 $xfer1			; queue element count from/to queue
.operand_synonym $packet_link0 $xfer2				; packet links descriptors from queue
.operand_synonym $packet_link1 $xfer3
.operand_synonym $tfifo_ctl_wd0 $xfer4				; status to tfifo
.operand_synonym $tfifo_ctl_wd1 $xfer5
.operand_synonym $tx_rdy_copy $xfer6
.operand_synonym $xmit_ptr $xfer7


StartUp#:

	#ifdef EVEN
		#define THREAD_0 16
		#define THREAD_1 17
		#define THREAD_2 18
		#define THREAD_3 19
	#else ;ODD
		#define THREAD_0 20
		#define THREAD_1 21
		#define THREAD_2 22
		#define THREAD_3 23
	#endif


	immed[queue_descriptor_base, SRAM_QUEUE_DESCRIPTOR_BASE]
    immed32[pkt_buffer_base, SDRAM_PKT_BUFFER_BASE]
	immed[buf_descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE]
	immed[$tfifo_ctl_wd1, 0]										; second status word always 0
	#ifdef BANK_SIZE_16MB
		alu_shf[bank_mask, --, B, 3, <<21]							; bank_mask = 22:21
	#else	
		alu_shf[bank_mask, --, B, 3, <<20]							; bank_mask - 21:20
	#endif


// registers to be used
//	queue_descriptor_base	SRAM_QUEUE_DESCRIPTOR_BASE
//	pkt_buffer_base			SDRAM_PKT_BUFFER_BASE
//	buf_descriptor_base		SRAM_buf_descriptor_base
//	tempa					local variable
//	queue					queue offset
//	tfifo_entry				tfifo element

	immed[@port0_in_prog, 0]					; for long packet, hold port info
	immed[@port1_in_prog, 0]
	immed[@port2_in_prog, 0]
	immed[@port3_in_prog, 0]
	immed[@port4_in_prog, 0]
	immed[@port5_in_prog, 0]
	immed[@port6_in_prog, 0]
	immed[@port7_in_prog, 0]
	immed[@port8_in_prog, 0]
	immed[@port9_in_prog, 0]
	immed[@port10_in_prog, 0]
	immed[@port11_in_prog, 0]
	immed[@port12_in_prog, 0]
	immed[@port13_in_prog, 0]
	immed[@port14_in_prog, 0]
	immed[@port15_in_prog, 0]

	immed[pwp_addr, XMIT_PWP_VECTOR]				; scratch location for ports with packets queued
	immed[const_0, 0]
	immed[skip_bit_on, SKIP_BIT_SET]
	immed[const_fc, 0xfc]
	alu_shf[bit15on, --, B, 1, <<15]				; setup indirect ov bit to save a cycle
	alu_shf[bit20on, --, B, 1, <<20]				; setup indirect ov bit to save a cycle
	#ifdef SDRAM_VALIDATE ; if SDRAM_VALIDATE then set bit 15, which tells the FBI to validate tfifo entry
		alu_shf[bit20_15on, bit20on, or, bit15on]
		alu[temp, --, b, 0]
	#endif
	
	// initialize sdram locations so initial skips will pass the transactor check
	#ifdef ODD
		#ifdef BANK_SIZE_16MB
			alu_shf[bank, --, B, 1, <<21]			; bank_mask = 22:21
		#else	
			alu_shf[bank, --, B, 1, <<20]			; bank_mask - 21:20
		#endif
	#else
		immed[bank, 0]
	#endif
.xfer_order $$xfer0 $$xfer1
	immed[$$xfer0, 0]
	immed[$$xfer1, 0]
	sdram[write, $$xfer0, pkt_buffer_base, bank, 1]

	//have the first fill thread signal itself so that it will validate and not get stuck at ctx_arb[inter_thread]
	br=ctx[2, tx_validated#]
	br=ctx[3, tx_validated#]
	fast_wr[THREAD_1, inter_thd_sig]
	br[tx_validated#]											; go read first assignment

port_info_restored#:
	alu[ele_remaining, 0x1f, AND, port_info, >>19]						; extract elements remaining
	br>0[tx_not_sop#], defer[1]
	#ifdef BANK_SIZE_16MB
		alu[bank, bank_mask, AND, port_info, <<5]
	#else
		alu[bank, bank_mask, AND, port_info, <<4]
	#endif

#define PKT_LABEL_NORMAL	0
#define PKT_LABEL_INBOUND	1
#define PKT_LABEL_OUTBOUND	2

tx_sop#:
		Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]		; get head/tail descriptor, packet count
		sram[read, $packet_link0, buf_descriptor_base, head_ptr, 2],
					ctx_swap, priority, defer[1]					; read packet_link 2 words get next head, status 
		ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
		ld_field_w_clr[ele_remaining, 0001, $packet_link1]
		#ifdef BANK_SIZE_16MB
			alu[bank, bank_mask, AND, $packet_link1, <<5]
		#else
			alu[bank, bank_mask, AND, $packet_link1, <<4]
		#endif


		alu_shf[buf_offset, 0, +16, $queue_descriptor0, >>13]			; relative ele addr for sop

		// tlaw - Nov 16, 2000
		alu[pkt_label, 0x3, AND, $packet_link1, >>18]				; 19:18 bit for packet_label
		alu[status_byte, const_fc, AND, $packet_link1, >>8]	
		
		// tlaw - Nov 20, 2000
		// update portinfo here
		//		status_byte		byte enables for last element			to 31:26
		//		ele_remaining	elements left to send in the packet		to 23:19
		//		bank			freelist id								to 18:16
		//		buf_offset		elements offset in sdram (elements)		to 15:0
		//		queue			8:4 port number 
		.if (pkt_label == PKT_LABEL_INBOUND)
			// if status_byte > 31, status_byte - 32
			// else status_byte = 64 - status_byte
			//		ele_remaining - 1
			.if (status_byte >= 128)
			   alu[status_byte, status_byte, -, 4, <<5]
			.else
			   alu[status_byte, status_byte, B-A, 4, <<5]
			   alu[status_byte, status_byte, B-A, 8, <<5]
			   alu[ele_remaining, ele_remaining, -, 1]
			.endif
			
		.elif (pkt_label == PKT_LABEL_OUTBOUND)
			// add 32 bytes for OUTBOUDN pkt
			alu[status_byte, status_byte, +, 4, <<5]
			.if (status_byte >= 256)
			   alu[status_byte, status_byte, -, 8, <<5]
			   alu[ele_remaining, ele_remaining, +, 1]
			.endif
		.endif
																	; not sop
		.if(ele_remaining == 1)											; if at eop/sop
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			Tx_SendLastData[bank, buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			ctx_arb[voluntary]
			Tx_FreeBuf[$queue_descriptor0, bank]						; (3 insns) free the packet buffer				
			alu[status_byte, status_byte, OR, 3]						; eop sop for upcoming validate
		.else															; more than 1 remaining. not eop/ sop							
			Tx_Unlock[queue]		
			Tx_SendData[bank, buf_offset, tfifo_entry]						; send an mpacket
			Tx_SavePortInfo[status_byte, ele_remaining, bank, buf_offset, queue] ; save port info (decr ele_remaining)
			alu[status_byte, const_fc, OR, 1]							; not eop/ sop for upcoming validate
		.endif
		br[tx_sent#]
tx_not_sop#:															; not sop
		ctx_arb[voluntary]
		alu[buf_offset, 1, +16, port_info]								; get next buf_offset
		alu[port_info, 1, +, port_info]									; add 1 element to buf_offset in port_info
		ld_field_w_clr[status_byte, 0001, port_info, >>24]																; not sop
		.if(ele_remaining == 1)											; if at eop/ not sop
			Tx_SendLastData[bank, buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]	; get head/tail descriptor. packet count
			sram[read, $packet_link0, buf_descriptor_base, head_ptr, 1],
					ctx_swap, defer[1]					; read packet_link 1 word to get next head
			ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			Tx_FreeBuf[$queue_descriptor0, bank]						; (3 insns) free the packet buffer				
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			alu[status_byte, status_byte, OR, 2]						; eop not sop for upcoming validate
		.else															; not eop/ not sop
			Tx_SendData[bank, buf_offset, tfifo_entry]						; send an mpacket
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			alu[status_byte, --, B, const_fc]							; not eop/ not sop for upcoming validate
		.endif
tx_sent#:
	alu_shf[temp, --, B, status_byte, <<8]
	alu[$tfifo_ctl_wd0, temp, OR, queue, >>4]							; merge status_byte and port
transmit_01_done#:
transmit_2_done#:
transmit_3_done#:
transmit_done#:
	Tx_Validate_8x100[$tfifo_ctl_wd0, tfifo_entry]
tx_validated#:
	br=ctx[1, read1#]
	br=ctx[2, read2#]
	br=ctx[3, read3#]
read1#:								
	Tx_ReadAssignment[tfifo_entry, queue, @assign1, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler
read2#:
	Tx_ReadAssignment[tfifo_entry, queue, @assign2, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler
read3#:
	Tx_ReadAssignment[tfifo_entry, queue, @assign3, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler
	
tx_restore#:
	Tx_RestorePortInfo[port_info, queue]								; check for long packet continuation

	br[port_info_restored#]												; iterate
	
.endlocal
