// xmit_fill_1gig.uc
//		3 tx_fill threads, 1 port x 1Gbit
//		1 microengine takes even tfifo elements, the other takes odd tfifo elements
//		each element is statically bound to a port.

//		each microengine: define personality  EVEN or not EVEN
//
		
tx_fill#:
.local temp tfifo_entry queue bank_mask bank queue_descriptor_base pkt_buffer_base buf_descriptor_base const_0 const_1 const_2 port const_fc bit15on bit20on port_info head_ptr tail_ptr ele_remaining buf_offset status_byte prev_entry



	// set next_thread_ir = next thread number to be signaled by this thread upon fifo validation.
	//  the value is used as a fast_wr indirect reference.  data field is bits 14:5. bit 15 is set indicating
	//  override fast write data.

	br=ctx[1, ctx1#]
	br=ctx[2, ctx2#]

	#ifdef EVEN
ctx3#:
		alu[next_thread_ir, --, b, 17, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
		br[end_thread_init#]
ctx2#:
		alu[next_thread_ir, --, b, 19, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
		br[end_thread_init#]
ctx1#:
		alu[next_thread_ir, --, b, 18, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
	#else ; ODD
ctx3#:
		alu[next_thread_ir, --, b, 21, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
		br[end_thread_init#]
ctx2#:
		alu[next_thread_ir, --, b, 23, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
		br[end_thread_init#]
ctx1#:
		alu[next_thread_ir, --, b, 22, <<5]
		alu[next_thread_ir, next_thread_ir, or, 1, <<15]
	#endif
end_thread_init#:


//-------------------------------macros--------------------------------------------


// if port is not ready, this is called to skip the associated tfifo element
//
#macro Tx_SkipElement[tfifo_entry]
.local bank _tfifo_entry_m1 _temp

	// when sdram is setting valid bits, we have to issue a dummy request
	// 
	#ifdef SDRAM_VALIDATE
		//cause skips to go to different banks
		#ifdef ODD
			#ifdef BANK_SIZE_16MB
				alu_shf[bank, --, B, 1, <<21]			; bank_mask = 22:21
			#else										; not BANK_SIZE_16MB
				alu_shf[bank, --, B, 1, <<20]			; bank_mask - 21:20
			#endif										; not BANK_SIZE_16MB 
		#else											; not ODD	
			immed[bank, 0]
		#endif											; ODD
		alu_shf[--, bit15on, or, tfifo_entry, <<7]		; put element no. in 10:7
		sdram[t_fifo_wr, --, pkt_buffer_base, bank, 1], indirect_ref, sig_done	; must send 1 auto validate
	#endif												; SDRAM_VALIDATE

	alu[$tfifo_ctl_wd0, tfifo_entry, OR, 1, <<7]					; set skip bit
	alu_shf[temp, --, B, tfifo_entry, <<1]							; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1],  ctx_swap, defer[1]		; write the status word
	alu[_tfifo_entry_m1, tfifo_entry, -, 1]

	ctx_arb[inter_thread], defer[1]									; wait for our turn
	alu[_tfifo_entry_m1, _tfifo_entry_m1, and, 0xf]					; limit to 0..15


wait_for_xmit_ptr#:
	// The xmit_ptr must be pointing to either fifo element "tfifo_entry" or "tfifo_entry - 1"
	csr[read, $xmit_ptr, XMIT_PTR], ctx_swap
	alu[--, $xmit_ptr, -, tfifo_entry]
	br=0[check_port_rdy#] 											; if xmit_ptr = tfifo_entry, then check port rdy

	alu[--, $xmit_ptr, -, _tfifo_entry_m1]							; check to see if ptr = tfifo_entry - 1
	br!=0[wait_for_xmit_ptr#] 										; if xmit_ptr = tfifo_entry-1, then validate


check_port_rdy#:
	csr[read, $tx_rdy_copy, XMIT_RDY_LO], ctx_swap
	alu[--, $tx_rdy_copy, and, port_rdy_bit]						; port_rdy_bit set at startup to 1 << port_num
	br=0[write_validate#], defer[1]									; if port not rdy then don't reset counter

	alu[_temp, --, b, 5]											; if port is ready, then reset the 
	alu[@mpkt_tx_ok_cnt, --, b, _temp]								;  counter to 5



write_validate#:
	alu_shf[--, bit15on, OR, tfifo_entry, <<5]						; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.

	//signal next thread indicating that it is its turn to validate a tfifo element
	alu[--, --, b, next_thread_ir]									; next_thread_ir set during thread initialization
	fast_wr[0, inter_thd_sig], indirect_ref							; signal next thread
end_tx_skip#:
.endlocal
#endm

// Tx_ReadAssignment
//		get next transmit assignment from the tx_scheduler
//
//		input/output	abs_assign		skip bit, 7:4 = port/element, 3:1 = queue select within port	
//		outputs:
//						tfifo_entry		one of transmit fifo elements (0-15)
//						queue			output queue, bits 7:3 = output port number


#macro Tx_ReadAssignment_1f[tfifo_entry, port, queue, abs_assign, took_label, skip_label]


wait_for_assignment#:
	ctx_arb[voluntary]								; let other threads go
	alu[--, --, b, abs_assign]
	br>=0[have_assignment#], defer[2]				; defer[2] in the assumption that we have an assignment

	alu[queue, const_0, +8, abs_assign]	            ; peal off queue number (= zero)
	alu[tfifo_entry, const_0, +4, @fifo_elem]       ; limit tfifo_entry to 4 bits

	br[wait_for_assignment#]						; loop waiting for assignment

have_assignment#:
	alu[--, abs_assign, AND, skip_bit_on]			; test for skip
	br=0[took_label], defer[2]                      ; branch if we got an assignment
	sem_flip[abs_assign]							; let sched assign next
	alu[@fifo_elem, const_2, +4, @fifo_elem]        ; increment fifo element number for next thread

skip#:
	Tx_SkipElement[tfifo_entry]
	br[skip_label]
#endm



// port_info
// 	these absolute registers are used to hold descriptive information of long packets locally
// 	thus reducing sram accesses

// Tx_F_RestorePortInfo								
//
//	inputs:
//	output: port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 bank	
//						15:0 buf_offset 	
//
#macro Tx_F_RestorePortInfo[port_info]

	#ifdef FAST_PORT1
		alu[port_info, --, B, @fast_port1_info]		
	#endif
	#ifdef FAST_PORT2
		alu[port_info, --, B, @fast_port2_info]	
	#endif
#endm


// Tx_F_SavePortInfo
//	save elements remaining, status byte and buffer offset in global port in progress info
//
//	inputs
//		status_byte		byte enables for last element			to 31:26
//		ele_remaining	elements left to send in the packet		to 23:19
//		bank			freelist id								to 18:16
//		buf_offset		elements offset in sdram (elements)		to 15:0
//
#macro Tx_F_SavePortInfo[status_byte, ele_remaining, bank, buf_offset] 
	alu_shf[ele_remaining, ele_remaining, -, 1]				; decrement elements remaining
	alu_shf[port_info, buf_offset, OR, status_byte, <<16]   ; from 15:10 to 31:26
	alu_shf[port_info, port_info, OR, ele_remaining, <<19]
	alu_shf[port_info, port_info, OR, bank, >>4]            ; save bits bank[21:20] in bits port_info[17:16]

	#ifdef FAST_PORT1
		//save number of (mpackets - 1) so that we can push the same buffer address that the receive thread popped
		//pushing the correct buf address is difficult because we increment the offset for every mpacket
		alu[@fast_port1_mpacket_cnt_m1, --, b, ele_remaining]
		alu[@fast_port1_info, --, B, port_info]		             ; restore port info
	#endif
	#ifdef FAST_PORT2
		//save number of (mpackets - 1) so that we can push the same buffer address that the receive thread popped
		//pushing the correct buf address is difficult because we increment the offset for every mpacket
		alu[@fast_port2_mpacket_cnt_m1, --, b, ele_remaining]
		alu[@fast_port2_info, --, B, port_info]		             ; restore port info
	#endif
#endm

// Tx_F_UpdatePortInfo								
//	decrement elements remaining in port_info and save it in global port in progress info
//
//	inputs:	
//			port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 bank	
//						15:0 buf_offset 	
//
#macro Tx_F_UpdatePortInfo[port_info]	
	alu_shf[port_info, port_info, -, 1, <<19]		; decrement elements remaining
	#ifdef FAST_PORT1
		alu[@fast_port1_info, --, B, port_info]		
	#endif
	#ifdef FAST_PORT2
		alu[@fast_port2_info, --, B, port_info]		
	#endif
#endm


// fast port packet_link format:
//										$packet_link0
//   		 +---------------------------------------------------------------------------+
//			 |						  	element count								     |
//			 |							     31:0									     |
//			 +---------------------------------------------------------------------------+
//
//										$packet_link1
// from		 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
// rec_state |FPORT|qselect|inport|F seq |freelist|qw/byte count|eop|qtype|discard|outport|
//			 |     |       |      |      |  bank  |             |   |     |       |  ele# |
//			 | 31  | 30:28 |27:24 |23:20 |  19:16 |     15:10   | 9 |  8  |   7   |  6:0  |
//			 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
//



// Tx_SendLastData
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		status_byte			last element quadword and byte enables
//		tfifo_entry			tfifo element 0-15
#macro Tx_SendLastData[bank, buf_offset, status_byte, tfifo_entry]
.local indirect qw_offset
	alu_shf[qw_offset, bank, OR, buf_offset, <<3]
	alu[indirect, 0x7, AND, status_byte, >>13]		; extract byte count from status 15:10, also divide by 8 for conversion to quadwords
	#ifdef SDRAM_VALIDATE
		alu[indirect, bit20_15on, OR, indirect, <<16]					; place quadword count in 19:16
	#else
		alu[indirect, bit20on, OR, indirect, <<16]						; place quadword count in 19:16
	#endif
	alu_shf[--, indirect, OR, tfifo_entry, <<7]							; put element no. in 10:7
	sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, priority, sig_done	; transfer to tfifo, sig done
.endlocal
#endm



// Tx_SendData
//		transfer 8 quadfords from sdram to tfifo element
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendData[bank, buf_offset, tfifo_entry]
.local indirect qw_offset
	alu_shf[qw_offset, bank, OR, buf_offset, <<3]
	#ifdef SDRAM_VALIDATE
		alu[indirect, bit20_15on, OR, 7, <<16]			; place quadword count 7 in 19:16
	#else
		alu[indirect, bit20on, OR, 7, <<16]				; place quadword count 7 in 19:16
	#endif
	alu_shf[--, indirect, OR, tfifo_entry, <<7]			; put element no. in 10:7
	sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, priority, sig_done	; transfer to tfifo, sig done
.endlocal
#endm





// Tx_Validate														; 8 insns + tfifo_wr ~30 
//	write status and transmit validate for NON SOP packets only
//
// The MAC's tranmit FIFO's threshold for tx ready is 8*64 bytes.  So once we get a tx ready we 
// can send a max of 8 mpackets without checking the tx ready bit again.  The exception to this rule is when 
// we want to send SOP mpacket:  the MAC can not handle more than 2 packets in its tfifo at a time.  This
// routine is called for NON SOP packets only.  
//
// This routine first writes the fifo status word. Then it waits for an inter thread signal from the 
// fill thread that was assigned to fill fifo element tfifo_entry - 2.  The other tx scheduler fills 
// tfifo_entry - 1.
//
// Variable @mpkt_tx_ok_cnt is used to keep track of how many mpackets can be written to the MAC without
// reading the xmit_rdy bits.  The MAC's xmit_rdy threshold is set to 8 mpackets * 64 bytes.  We must 
// limit the number of mpackets we have in the tfifo to 7.  If both schedulers allowed 8 mpackets in the
// tfifo then we could overwrite data that is being output by the hardware.
//
// Before reading the xmit_rdy bits, this routine first must make sure that the xmit_ptr is not more than
// 4 fifo elements away from our tfifo_entry.  This must be done for a few reasons: 1. Make sure the
// MAC is still accepting data from our fifo.  2. Make sure we do not overwrite data in our tfifo.  
// 3.  In order to set @mpkt_tx_ok_cnt.
// For performance reasons we do not want to wait for the xmit_ptr to equal our tfifo_entry. 
//
// In comparing xmit_ptr to tfifo_entry there are 3 possibilities:  they are equal, the 
// xmit_ptr > tfifo_entry (wrap condition), and xmit_ptr < tfifo_entry (no wrap condition).
//
// xmit_ptr = tfifo_entry condition:
// Reset @mpkt_tx_ok_cnt to 6 because max mpackets in our tfifo = 7 - 1 for the element we are validating here.
//
// No wrap condition: eg. $xmit_ptr = 4, and tfifo_entry = 8
// check $xmit_ptr - (tfifo_entry - 4) >= 0.  which is to say: $xmit_ptr - tfifo_entry + 4 >= 0  
// At most we can have 7 mpackets for this port in our TFIFO at a time.   Therefore,
// if our tfifo currently has less than or equal to 2 mpackets for our port right now (we don't want
// to wait until xmit_ptr = tfifo_entry for performance reasons), then we can send at least 5 more.  And
// since we are sending an mpacket right now, 5 - 1 = 4 = @mpkt_tx_ok_cnt reset value.
// Q. Why check xmit_ptr - 4?   A. Since this port fills every other tfifo element, 
// max of 2 mpackets in our tfifo for our port right now * 2 (every other fifo element) = 4.
//
// Wrap condition: eg. $xmit_ptr could be 12, and tfifo_entry could be 0
// check $xmit_ptr - [(tfifo_entry + 16) - 4] >= 0.  or $xmit_ptr - tfifo_entry - 12 >= 0
// The 16 is to account for the wrap.  The 4 is for the same reasons listed in the no wrap condition above.
// @mpkt_tx_ok_cnt is reset to 4 for same reasons as no wrap condition.
//
//
//	inputs:
//		tfifo_ctl_wd0	: Status word to be written to the transmit FIFO
//		tfifo_entry		: IXP-1200 transmit FIFO element to be validated
//		port			: Port that the data is to be sent to
//
#macro Tx_Validate[$tfifo_ctl_wd0, tfifo_entry, port]
.local _tfifo_entry_m1 _temp
	alu_shf[temp, 0, +8, tfifo_entry, <<1]					; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]								;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1], ctx_swap		; write the status word

	#ifndef SDRAM_VALIDATE
		ctx_arb[sdram]										; wait for packet data to be transferred to tfifo
	#endif

	ctx_arb[inter_thread]									; wait for my turn to validate

	alu[@mpkt_tx_ok_cnt, @mpkt_tx_ok_cnt, -, const_1]		; check for room in MAC's tfifo
	br>0[write_validate#]									; if there is enough room, then validate

wait_for_xmit_ptr#: 
	// We don't know if the port is still ready.
	csr[read, $tx_rdy_copy, XMIT_RDY_LO]
	alu[_temp, --, b, 5]
	csr[read, $xmit_ptr, XMIT_PTR], ctx_swap, defer[1]
	alu[@mpkt_tx_ok_cnt, --, b, _temp]						; set default reset value 5 for case xmit_ptr = tfifo_entry

	alu[_temp, $xmit_ptr, -, tfifo_entry]					; figure out where the xmit_ptr is
	br=0[check_port_rdy#] 									; if xmit_ptr = tfifo_entry then make sure port is rdy before validate
	br>0[ptr_wrapped#]										; xmit ptr > t_fifo_entry -> wrap condition

	// No wrap condition
	alu[_temp, _temp, +, 4]
	br>=0[mpkt_ok_eq_4#]
	br[wait_for_xmit_ptr#]									; the xmit_ptr is not close enough yet

ptr_wrapped#:

	// Wrap condition
	alu[_temp, _temp, -, 12]
	br<0[wait_for_xmit_ptr#]								; the xmit_ptr is not close enough yet

mpkt_ok_eq_4#:
	// At most, we are allowed 7 mpackets in our tfifo at a time.  Since 2 could be in our fifo
	// right now, we can send 5 mpackets before checking the tx_rdy bits again.  We are sending
	// one mpacket right now, so reset counter to 4.
	alu[_temp, --, b, 4]
	alu[@mpkt_tx_ok_cnt, --, b, _temp]

check_port_rdy#:
check_port_rdy_loop#:
	//  if port not ready wait for port to be ready
	//
	csr[read, $tx_rdy_copy, XMIT_RDY_LO], ctx_swap
	alu[--, $tx_rdy_copy, and, port_rdy_bit]			; port_rdy_bit set at startup to 1 << port_num
	br=0[check_port_rdy_loop#]

write_validate#:

	alu_shf[--, bit15on, OR, tfifo_entry, <<5]		; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref			; tell fbi to run with that element no.

	alu[--, --, b, next_thread_ir]					; next_thread_ir set during thread initialization
	fast_wr[0, inter_thd_sig], indirect_ref			; signal next thread
end_tx_validate#:
.endlocal
#endm







// Tx_Validate_SOP														; 8 insns + tfifo_wr ~30 
//	Write status and transmit validate for start of packet mpacket.
//  For SOP mpacket we must make sure that the previous mpacket has been received by 
//  the MAC and then check the transmit ready bit.  This special step is required because
//  the MAC can not have more than 2 packets in its TFIFO at a time.
//
//	inputs:
//		tfifo_ctl_wd0	: Status word to be written to the transmit FIFO
//		tfifo_entry		: IXP-1200 transmit FIFO element to be validated
//		port			: Port that the data is to be sent to
//
#macro Tx_Validate_SOP[$tfifo_ctl_wd0, tfifo_entry, port]
.local _tfifo_entry_m1 _temp
	alu_shf[temp, 0, +8, tfifo_entry, <<1]						; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1], ctx_swap							; write the status word

	#ifndef SDRAM_VALIDATE
		ctx_arb[sdram]											; wait for packet data to be transferred to tfifo
	#endif

	ctx_arb[inter_thread]										; wait for my turn to validate

wait_for_xmit_ptr#:
	csr[read, $xmit_ptr, XMIT_PTR], ctx_swap
	// The MAC status is valid only if the xmit_ptr is at tfifo_entry, or tfifo_entry - 1.
	alu[--, $xmit_ptr, -, tfifo_entry]
	br=0[check_port_rdy_loop#] 

	alu[_tfifo_entry_m1, tfifo_entry, -, 1]				; check to see if ptr = tfifo_entry - 1
	alu[_tfifo_entry_m1, _tfifo_entry_m1, and, 0xf]		; limit to 0..15
	alu[--, $xmit_ptr, -, _tfifo_entry_m1]
	br!=0[wait_for_xmit_ptr#] 							; xmit_ptr is not close enough yet

check_port_rdy_loop#:
	//  if port not ready wait for port to be ready
	//
	csr[read, $tx_rdy_copy, XMIT_RDY_LO], ctx_swap
	alu[--, $tx_rdy_copy, and, port_rdy_bit]			; port_rdy_bit set at startup to 1 << port_num
	br=0[check_port_rdy_loop#]


write_validate#:
	alu_shf[--, bit15on, OR, tfifo_entry, <<5]			; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref				; tell fbi to run with that element no.

	alu[--, --, b, next_thread_ir]						; signal next thread to validate
	fast_wr[0, inter_thd_sig], indirect_ref				; next_thread_ir set during thread initialization

	// Set number of non-SOP mpackets that can be sent to the tfifo before checking the 
	// xmit_rdy bit again.  
	// The MAC xmit_rdy threshold must be at least 7*64 bytes.  The ref_des sets it to 8*64
	alu[_temp, --, b, 5]
	alu[@mpkt_tx_ok_cnt, --, b, _temp]

.endlocal
#endm




#macro Tx_F_FreeBuf[buf_offset, bank]
.local descriptor_ptr
	#ifdef PROFILE
		immed[descriptor_ptr, TOTAL_TRANSMITS]
		scratch[incr, --, descriptor_ptr, 0, 1]
	#endif

	//buf_offset is an offset to the last mpacket transmitted.  we need to back up to top of the packet
	#ifdef FAST_PORT1
		alu[buf_offset, buf_offset, -, @fast_port1_mpacket_cnt_m1]
		alu[@fast_port1_mpacket_cnt_m1, --, b, const_0]                 ; initialize for next packet
	#endif
	#ifdef FAST_PORT2
		alu[buf_offset, buf_offset, -, @fast_port2_mpacket_cnt_m1]
		alu[@fast_port2_mpacket_cnt_m1, --, b, const_0]                 ; initialize for next packet
	#endif
		alu_shf[descriptor_ptr, 1, B-A, buf_offset, >>3]				; get back relative address

	#ifdef BANK_SIZE_16MB
		alu_shf[--, bit20on, OR, bank, >>5]								; merge ov bit with freelist id/bank
	#else																; if 8MB BANK size
		alu_shf[--, bit20on, OR, bank, >>4]								; merge ov bit with freelist id/bank
	#endif
		sram[push, --, descriptor_ptr, buf_descriptor_base, 0], indirect_ref
.endlocal
#endm


#macro Tx_F_ReadHead[buf_offset, head_ptr, abs_head_ptr]
	alu[head_ptr, const_1, +8, abs_head_ptr]							; start at 1
	sram[read, $packet_link0, queue_descriptor_base, head_ptr, 2],
					optimize_mem, ctx_swap, defer[1]					; read packet_link 2 words get next head, status 
	alu[abs_head_ptr, const_1, +8, head_ptr]							; increment head ptr
#endm


#macro	Tx_F_ReadPacketLink[buf_offset, queue]		; (7 instr)
.local head_ptr
	alu[temp, const_x38, AND, queue, <<2]			; queue select == 3:1, jump target is 8 insns
	jump[temp, read_pl#], 
		targets[read_head0#, read_head1#, read_head2#,read_head3#, read_head4#,
				read_head5#, read_head6#, read_head7#], 
		defer[3]
	nop
	nop
	nop
read_pl#:
read_head0#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q0]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head1#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q1]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head2#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q2]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head3#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q3]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head4#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q4]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head5#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q5]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head6#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q6]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_head7#:
	Tx_F_ReadHead[buf_offset, head_ptr, @head_ptr_q7]
	br[read_pl_end#], defer[1]
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
read_pl_end#:
.endlocal
#endm


//-------------------------------end macros----------------------------------------

		
.xfer_order $xfer0 $xfer1 $xfer2 $xfer3 $xfer4 $xfer5
.operand_synonym $packet_link0 $xfer2				; packet links descriptors from queue
.operand_synonym $packet_link1 $xfer3
.operand_synonym $tfifo_ctl_wd0 $xfer4				; status to tfifo
.operand_synonym $tfifo_ctl_wd1 $xfer5
.operand_synonym $tx_rdy_copy $xfer6


StartUp#:
	#ifdef FAST_PORT1
		immed[queue_descriptor_base, XMIT_FPORT_DESCRIPTOR_BASE]
	#else ;FAST_PORT2
		immed[queue_descriptor_base, XMIT_FPORT_DESCRIPTOR_BASE_2]
	#endif
    immed32[pkt_buffer_base, SDRAM_PKT_BUFFER_BASE]
	immed[buf_descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE]
	immed[$tfifo_ctl_wd1, 0]										; second status word always 0
	alu[const_1, --, b, 1]
    immed[const_x38, 0x38]

	#ifdef FAST_PORT1
		alu[port_rdy_bit, --, b, 1, <<FAST_PORT1]
	#endif
	#ifdef FAST_PORT2
		alu[port_rdy_bit, --, b, 1, <<FAST_PORT2]
	#endif

	#ifdef BANK_SIZE_16MB
		alu_shf[bank_mask, --, B, 3, <<21]							; bank_mask = 22:21
	#else	
		alu_shf[bank_mask, --, B, 3, <<20]							; bank_mask - 21:20
	#endif


// registers to be used
//	queue_descriptor_base	SRAM_QUEUE_DESCRIPTOR_BASE
//	pkt_buffer_base			SDRAM_PKT_BUFFER_BASE
//	buf_descriptor_base		SRAM_buf_descriptor_base
//	tempa					local variable
//	queue					queue offset
//	tfifo_entry				tfifo element


	immed[const_0, 0]
	immed[const_2, 2]
	immed[skip_bit_on, SKIP_BIT_SET]
	immed[const_fc_x256, 0xfc, <<8]
	alu_shf[bit15on, --, B, 1, <<15]				; setup indirect ov bit to save a cycle
	alu_shf[bit20on, --, B, 1, <<20]				; setup indirect ov bit to save a cycle

	ctx_arb[inter_thread]							; wait for scheduler to initialize


	// The first fill thread signals itself so that it will validate and not get stuck at ctx_arb[inter_thread]
	br=ctx[2, skip_fast_wr#]
	br=ctx[3, skip_fast_wr#]
	#ifdef EVEN
		fast_wr[17, inter_thd_sig]
	#else ;ODD
		fast_wr[21, inter_thd_sig]
	#endif

	skip_fast_wr#:


	#ifdef SDRAM_VALIDATE
		alu_shf[bit20_15on, bit20on, or, bit15on]
	#endif

	// initialize sdram locations so initial skips will pass the transactor check
	#ifdef ODD
	#ifdef BANK_SIZE_16MB
		alu_shf[bank, --, B, 1, <<21]								; bank_mask = 22:21
	#else	
		alu_shf[bank, --, B, 1, <<20]								; bank_mask - 21:20
	#endif
	#else
		immed[bank, 0]
	#endif

.xfer_order $$xfer0 $$xfer1
	immed[$$xfer0, 0]
	immed[$$xfer1, 0]
	sdram[write, $$xfer0, pkt_buffer_base, bank, 1]
	
	br[tx_validated#]													; go read first assignment

port_info_restored#:
	alu[ele_remaining, 0x1f, AND, port_info, >>19]						; extract elements remaining
	br>0[tx_not_sop#], defer[1]
	#ifdef BANK_SIZE_16MB
		alu[bank, bank_mask, AND, port_info, <<5]
	#else
		alu[bank, bank_mask, AND, port_info, <<4]
	#endif

// ******************************** SOP PROCESSING *****************************************

	// If we get here then port_info said that there were no mpackets remaining from the last packet to be 
	// output on this port.  The next step is to read the next packet's link.  We must be carefull here to 
	// not allow two threads to read the next packet link at the same time.  The variable @reading_next_link_flag
	// is used to accomplish this.
	alu[--, --, b, @reading_next_link_flag]
	br=0[tx_sop#]                            	; if not reading new link, then go read it

	
	ctx_arb[voluntary]							; some other thread is reading the link now, so wait for it to read it and update port_info
	br[tx_restore#] 							; go read the freshly updated port info  


tx_sop#:
		// Set flag to prevent other threads from reading next packet link before we update port_info
		alu[@reading_next_link_flag, --, b, const_1]            
		Tx_F_ReadPacketLink[buf_offset, queue]					; (7) read pl, update fport_task
		ld_field_w_clr[ele_remaining, 0001, $packet_link1]
		#ifdef BANK_SIZE_16MB
			alu[bank, bank_mask, AND, $packet_link1, <<5] 
		#else
			alu[bank, bank_mask, AND, $packet_link1, <<4]
		#endif
		alu_shf[orig_status_byte, $packet_link1, AND, const_fc_x256]																; not sop

		.if(ele_remaining == 1)											; if at eop/sop
tx_eop_sop#:
			alu[@reading_next_link_flag, --, b, const_0]          	; clear flag now because we have eop
			Tx_SendLastData[bank, buf_offset, orig_status_byte, tfifo_entry]	; (6) transfer packet data from sdram to tfifo
			Tx_F_FreeBuf[buf_offset, bank]							; (3) free the packet buffer				
			alu_shf[status_byte, orig_status_byte, OR, 3, <<8]		; packet link byte count, eop sop
		.else
tx_not_eop_sop#:
			Tx_SendData[bank, buf_offset, tfifo_entry]				; send an mpacket
			Tx_F_SavePortInfo[orig_status_byte, ele_remaining, bank, buf_offset] ; save port info (decr ele_remaining)
			alu[@reading_next_link_flag, --, b, const_0]          	; clear flag after Tx_F_SavePortInfo
			// Increment mpacket count so that scheduler will schedule another mpacket for this port
			#ifdef FAST_PORT1
				immed[temp, XMIT_FPORT1_ELE_COUNT]										; elements queued to fast port
				scratch [incr, --, temp, 0, 1]						; increment the current count
			#else
			#ifdef FAST_PORT2
				immed[temp, XMIT_FPORT2_ELE_COUNT]										; elements queued to fast port
				scratch [incr, --, temp, 0, 1]						; increment the current count
			#endif ;FAST_PORT2
			#endif ;FAST_PORT1
			immed[status_byte, 0xfd00]

		.endif
	#ifdef FAST_PORT1
		alu[$tfifo_ctl_wd0, status_byte, OR, FAST_PORT1]			; merge status_byte and port num
	#endif
	#ifdef FAST_PORT2
		alu[$tfifo_ctl_wd0, status_byte, OR, FAST_PORT2]			; merge status_byte and port num
	#endif

	Tx_Validate_SOP[$tfifo_ctl_wd0, tfifo_entry, port]

transmit_01_done#:													; label used during debug
	br=ctx[1, read1#]
	br=ctx[2, read2#]
	br=ctx[3, read3#]
// ***************************** END SOP PROCESSING *****************************************



// ***************************** NOT SOP PROCESSING *****************************************
tx_not_sop#:															; not sop
		alu[buf_offset, 1, +16, port_info]								; get next buf_offset
		alu[port_info, 1, +, port_info]									; add 1 element to buf_offset in port_info
		.if(ele_remaining == 1)											; if at eop/ not sop
tx_eop_not_sop#:
			// Warning: Do not ctx_arb until call to Tx_F_UpdatePortInfo
			ld_field_w_clr[orig_status_byte, 0010, port_info, >>16]	; not sop
			Tx_SendLastData[bank, buf_offset, orig_status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_F_FreeBuf[buf_offset, bank]							; free the packet buffer				
			Tx_F_UpdatePortInfo[port_info]							; decr ele_remaining, at port info
			alu_shf[status_byte, orig_status_byte, OR, 2, <<8]		; packet link byte count, eop no sop
		.else
			Tx_SendData[bank, buf_offset, tfifo_entry]				; send an mpacket
			Tx_F_UpdatePortInfo[port_info]							; decr ele_remaining, at port info
			immed[status_byte, 0xfc00]								; byte count 63(64), no eop no sop
tx_not_eop_not_sop#:
			// Increment mpacket count so that scheduler will schedule another mpacket for this port
			#ifdef FAST_PORT1
				immed[temp, XMIT_FPORT1_ELE_COUNT]					; elements queued to fast port
				scratch [incr, --, temp, 0, 1]						; increment the current count
			#endif 													; FAST_PORT1
			#ifdef FAST_PORT2
				immed[temp, XMIT_FPORT2_ELE_COUNT]					; elements queued to fast port
				scratch [incr, --, temp, 0, 1]						; increment the current count
			#endif 													; FAST_PORT2
		.endif
tx_sent#:
	#ifdef FAST_PORT1
		alu[$tfifo_ctl_wd0, status_byte, OR, FAST_PORT1]			; merge status_byte and port num
	#endif
	#ifdef FAST_PORT2
		alu[$tfifo_ctl_wd0, status_byte, OR, FAST_PORT2]			; merge status_byte and port num
	#endif
	Tx_Validate[$tfifo_ctl_wd0, tfifo_entry, port]
transmit_02_done#:													; label used during debug

// ***************************** END NOT SOP PROCESSING *************************************





tx_validated#:
	br=ctx[1, read1#]
	br=ctx[2, read2#]
	br=ctx[3, read3#]
read1#:								
	Tx_ReadAssignment_1f[tfifo_entry, port, queue, @assign1, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler
read2#:
	Tx_ReadAssignment_1f[tfifo_entry, port, queue, @assign2, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler
read3#:
	Tx_ReadAssignment_1f[tfifo_entry, port, queue, @assign3, tx_restore#, tx_validated#]		; read the next assignment from the tx_scheduler

tx_restore#:
	Tx_F_RestorePortInfo[port_info]													; check for long packet continuation

transmit_done#:
	br[port_info_restored#]												; iterate
		
.endlocal
