//------------------------------------------------------------------------------------
//                                                                     
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  1998-1999 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
//-----------------------------------------------------------------------------------
// tx_fill.uc
// transmit filler thread
//-----------------------------------------------------------------------------------
//
// Version = 1.0.NoBldNum
//
// system: SA1200
// subsystem: transmit microcode
// usage: reference design
// author: dfh 12/08/97
// revisions:
//		dfh		5/20/98		base level 2
//		dfh		7/4/98		remove reference to XMIT_PW2E_ADDR
//		dfh		9/9/98		variable sized packets	
//		dfh		12/16/98	BL4 remove include config.h
//      mff     9/10/99     ifdef A1_CHIP - workaround optimize_mem hw bug
//
//-----------------------------------------------------------------------------------

// design:
//
// 1. Tranmsit Arbitor decides for each port which priority queue to use
//		It is assumed a port can have n priorities. (for example n=8)
//		This is placed in an 32x4 array shared as a global GPR with Scheduler
//
// 2. Transmit Scheduler assigns ports and T-FIFO Elements to TFill threads
//		the task assignment message consists of
//		+------+---------+----------+---------------+
//		|valid |  unused |  Element |    queue      |
//		| 31   |  30:12  |   11:8   |     7:0       |
//		+------+---------+----------+---------------+
//		valid			1 = assign is valid
//		Element			identifies the tfifo element
//		Queue			identifies the queue
//
// 3. Four TFill threads will be used to copy packet data in SDRAM to T-FIFO
//		TFill can calculate the packet location given port, queue and 
//		queue_descriptor_base
//
// 4. The Scheduler writes predetermined task assignment mailboxes for
//	assignments to the TFills. A 256 word block is used as a fifo for these assignments.
//
// 7. The TFill thread read task assignment mailboxes to pick up task assignment. If the
//	assignment valid bit is on, it clears it, increments the global message id so next
//	TFill thread can pickup the next assignment.
//
// 8. TFill thread restores state for the port in the assignment. If a long packet (> 64 bytes)
//	is in progress, it gets port_info which holds status byte, elements remaining, freelist,
//	and buffer offset. If state had 0 elements remaining, this is a new SOP.
// 
// 9. the four cases are
//		SOP/not EOP		read lock, get queue descriptor and packet link, transfer data, unlock
//						write status, save port info
//
//		SOP/EOP			read lock, get queue descriptor and packet link, transfer data,	
//						write port vector XMIT_PWP_VECTOR if queue going empty, 
//						update queue head pointer
//						if next packet link, update next packet link pointer
//						write unlock queue, write status
//
//		not SOP/not EOP	transfer data, update port info(decrement elements remaining), write status
//
//		not SOP/EOP		read lock, get queue descriptor and packet link, transfer data,	
//						write port vector XMIT_PWP_VECTOR if queue going empty, 
//						update queue head pointer
//						if next packet link, update next packet link pointer
//						update port info (decrement elements remining to 0)
//						write unlock queue, write status	
//		
//
//		if multicast (descriptor kind = 1)
//			TBD there is a count down until it has been posted to all ports
//			then the descriptor is freed
//			
// 10. When TFill has moved data to tfifo,	it writes status validate to FBI



// this ucode goes to fbox 3


// sdram, sram, scratch shared addresses
#include "mem_map.h"

// standard macros
#include "stdmac.uc"

//-------------------------------macros--------------------------------------------

// Tx_ReadAssignment
//		get next transmit assignment from the tx_scheduler
//	
//		input/output:	global_msg_id	shared by all tx_fill context, points to next assignment
//		outputs:
//						tfifo_entry		one of transmit fifo elements (0-15)
//						queue			output queue, bits 7:3 = output port number
//
// tx_scheduler places this assignment in the longword scratch location 
//		XMIT_TASK_MSG_BASE + msg_id
//
//		The tx task assignment mailbox
//		+------+---------+----------+---------------+
//		|valid |  unused |  Element |    queue      |
//		| 31   |  30:12  |   11:8   |     7:0       |
//		+------+---------+----------+---------------+
//		valid			1 = assign is valid
//		Element			identifies the tfifo element
//		Queue			identifies the queue


#macro Tx_ReadAssignment[tfifo_entry, queue, global_msg_id]

	alu[captured_id, --, B, global_msg_id]
	alu[global_msg_id, const_1, +4, global_msg_id]							; increment message for next tfill
read_again#:
	scratch[bit_wr, $task_assign_mb, captured_id, task_msg_base, test_and_clear_bits], ctx_swap
	br_bset[$task_assign_mb, 31, got_assign#], defer[2]						; read again if not valid																		; can't put mem op after a p3 branch
	alu_shf[tfifo_entry, 0xf, AND, $task_assign_mb, >>8]
	ld_field_w_clr[queue, 0001, $task_assign_mb, <<1]						; quadword align

	br_!signal[sdram_sig, read_again#]
	Tx_ValidateNoArb[$tfifo_ctl_wd0, prev_entry]							; write status and transmit validate
	br[read_again#]
	
got_assign#:
#endm



// Tx_RestorePortInfo							; 7 insns								
//	check for long packet continuation
//
//	inputs:	queue		8:4 port number 
//	output: port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 freelist	
//						15:0 buf_offset 	
//
#macro Tx_RestorePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]			; each target is offset by port*2
	jump[temp, restore_array#]
restore_array#:
	br[end#], defer[1]
	alu[port_info, --, B, @port0_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port1_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port2_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port3_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port4_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port5_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port6_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port7_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port8_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port9_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port10_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port11_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port12_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port13_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port14_in_prog]
	alu[port_info, --, B, @port15_in_prog]
end#:	
#endm



// Tx_SavePortInfo									; 7 insns
//	save elements remaining, status byte and buffer offset in global port in progress info
//
//	inputs
//		status_byte		byte enables for last element			to 31:26
//		ele_remaining	elements left to send in the packet		to 23:19
//		freelist		freelist id								to 18:16
//		buf_offset		elements offset in sdram (elements)		to 15:0
//		queue			8:4 port number 
//
#macro Tx_SavePortInfo[status_byte, ele_remaining, freelist, buf_offset, queue] 
	alu[temp, 0x1e, AND, queue, >>3]						; each target is offset by port*2
	alu_shf[ele_remaining, ele_remaining, -, 1]				; decrement elements remaining
	jump[temp, restore_array#], defer[3]
	alu_shf[port_info, freelist, OR, ele_remaining, <<3]
	alu_shf[port_info, port_info, OR, status_byte, <<8]
	alu_shf[port_info, buf_offset, OR, port_info, <<16]
restore_array#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_UpdatePortInfo									; 7 insns								
//	decrement elements remaining in port_info and save it in global port in progress info
//
//	inputs:	
//			port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 freelist	
//						15:0 buf_offset 	
//			queue		8:4 port number
//
#macro Tx_UpdatePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]				; each target is offset by port*2
	jump[temp, restore_array#], defer[1]
	alu_shf[port_info, port_info, -, 1, <<19]		; decrement elements remaining
restore_array#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_ReadLock
//	get queue descriptor 2 words and packet link (1 or 2 words)
//
//	inputs:
//		pl_wordcount			get 1 or 2 packet_link words
//	outputs:
//		$queue_descriptor0		31:16 head relative address, 15:0 tail relative address
//		$queue_descriptor1		packet count
//		$packet_link0			31:16 NPL, 15:0 element count or multicast pointer
//		$packet_link1			15:0 status with last element byte enables
//		head_ptr				queue head pointer
//		
// packet_link format:
//										$packet_link0
//   		 +---------------------------------------------------------------------------+
//			 |						  NPL (next packet link)						     |
//			 |							     31:0									     |
//			 +---------------------------------------------------------------------------+
//
//										$packet_link1
// from		 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
// rec_state |FPORT|qselect|inport|F seq |freelist|qw/byte count|eop|qtype|discard|outport|
//			 |     |       |      |      |        |             |   |     |       |  ele# |
//			 | 31  | 30:28 |27:24 |23:20 |  19:16 |     15:10   | 9 |  8  |   7   |  6:0  |
//			 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
//
#macro Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]
#ifdef A1_CHIP
	// A1 chip has optimize_mem hw bug where ref is dropped. 
	sram[read_lock, $queue_descriptor0, queue_descriptor_base, queue, 2], 
					ctx_swap								; read the queue descriptor 2 words
#else
	sram[read_lock, $queue_descriptor0, queue_descriptor_base, queue, 2], 
					optimize_mem, ctx_swap						; read the queue descriptor 2 words
#endif
	alu_shf[head_ptr, --, B, $queue_descriptor0, >>16]		; isolate next packet link

#ifdef CHECK_BAD_ADDR
	// DEBUG, test head ptr for non zero, also that it is not above 0x800
	br=0[bad_head#]
	alu[--, head_ptr, -, 8, <<8]
	br<0[good_head#]
bad_head#:
	nop
	nop
	ctx_arb[inter_thread]
good_head#:
#endif
#endm



// Tx_SendLastData
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		status_byte			last element quadword and byte enables
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendLastData[buf_offset, status_byte, tfifo_entry]
	alu_shf[qw_offset, --, B, buf_offset, <<3]
	alu[indirect, 0x7, B, status_byte, >>5]							; extract quadword count from status
	alu[indirect, bit20on, OR, indirect, <<16]						; place quadword count in 19:16
	alu_shf[--, indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
	immed[send_outstanding, 1]
// note: ctx_arb[sdram] for this happens in Tx_Validate
#endm



// Tx_SendData
//		transfer 8 quadfords from sdram to tfifo element
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendData[buf_offset, tfifo_entry]
	alu_shf[qw_offset, --, B, buf_offset, <<3]
	alu[indirect, bit20on, OR, 7, <<16]								; place quadword count 7 in 19:16
	alu_shf[--, indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
	immed[send_outstanding, 1]
#endm



// Tx_ClearPortVector												; 5 insns
//	clear the ports with packets vector bit location to flag a queue going empty
//
//	inputs:
//		queue		8:4 port number
//					3:1 qselect for that port
//			
#macro Tx_ClearPortVector[queue]
	alu_shf[portnum, --, B, queue, >>4]								; remember queue1 was shifted <<1 before
	alu_shf[portnum, portnum, B-A, 1, <<5]							; indirect shift left 32 - portnum
	alu[--, portnum, B, 0]
	alu_shf[$xfer3, --, B, 1, <<indirect]
	scratch [bit_wr, $xfer3, pwp_addr, 0, clear_bits]					; clear bit for this port
#endm



// Tx_WriteUnlock													; 13 insns
//	insert new head pointer and update packet count for a queue
//
//	inputs:
//		queue					offset from queue_descriptor_base
//		tail_ptr				15:0 previous tail
//		$packet_link0			new head
//		$queue_descriptor1		packet count
//
//	constant input:
//		queue_descriptor_base	array of all queues, each occupies 2 longwords
//
#macro Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]
	alu[que_packet_count, $queue_descriptor1, -, 1]					; decrement the element count
	br>0[packets_remaining#]
		Tx_ClearPortVector[queue]									; 5 insns flag scheduler port has no packets
packets_remaining#:
	ld_field[tail_ptr, 1100, $packet_link0, <<16]					; merge existing tail with new head_ptr
	alu_shf[$queue_descriptor0, --, B, tail_ptr];
	ld_field_w_clr[$queue_descriptor1, 0011, que_packet_count]
	sram[write_unlock, $queue_descriptor0, queue_descriptor_base, queue, 2]
#endm



#macro Tx_Unlock[queue]
	sram[unlock, --, queue_descriptor_base, queue, 1]
#endm



// Tx_Validate														; 8 insns + tfifo_wr ~30 
//	write status and transmit validate
//
//	inputs:
//		status_byte		byte enables
//		queue			8:4 port
//						3:1 qselect
//
#macro Tx_Validate[$tfifo_ctl_wd0, a_tfifo_entry]								
	alu_shf[temp, --, B, a_tfifo_entry, <<1]						; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1],  ctx_swap				; write the status word

	ctx_arb[sdram], defer[1]										; wait for packet data to be transferred to tfifo
	immed[send_outstanding, 0]
write_validate#:
	alu_shf[--, bit15on, OR, a_tfifo_entry, <<5]					; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.
#endm

#macro Tx_ValidateNoArb[$tfifo_ctl_wd0, a_tfifo_entry]								
	alu_shf[temp, --, B, a_tfifo_entry, <<1]						; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1],  ctx_swap, defer[1]		; write the status word
	immed[send_outstanding, 0]

write_validate#:
	alu_shf[--, bit15on, OR, a_tfifo_entry, <<5]					; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.
#endm

// Tx_FreeBuf														; 3 insns
//	free the packet buffer
//
#macro Tx_FreeBuf[queue_descriptor0, freelist]
	alu_shf[descriptor_ptr, 1, B-A, queue_descriptor0, >>16]			; relative address
#ifdef PROFILE
	immed[temp, TOTAL_TRANSMITS]
	scratch[incr, --, temp, 0, 1]
#endif
#ifdef CHECK_BAD_ADDR
	alu[temp, descriptor_ptr, +, buf_descriptor_base]
	alu[--, temp, -, 5, <<8]
	br>0[goodptr#]
	nop
	nop
	ctx_arb[kill]
	nop
	nop
goodptr#:
	sram[push, --, temp, 0, 0]
#else
	alu_shf[--, bit20on, OR, freelist, <<16]						; merge ov bit with freelist id
	sram[push, --, descriptor_ptr, buf_descriptor_base, 0], indirect_ref
#endif
#endm



//-------------------------------end macros----------------------------------------

		
.xfer_order $xfer0 $xfer1 $xfer2 $xfer3 $xfer4 $xfer5 $xfer6
.operand_synonym $queue_descriptor0 $xfer0			; queue head and tail pointers from/to queue
.operand_synonym $queue_descriptor1 $xfer1			; queue element count from/to queue
.operand_synonym $packet_link0 $xfer2				; packet links descriptors from queue
.operand_synonym $packet_link1 $xfer3
.operand_synonym $task_assign_mb $xfer4				; assignment from tx_scheduler
.operand_synonym $tfifo_ctl_wd0 $xfer5				; status to tfifo
.operand_synonym $tfifo_ctl_wd1 $xfer6


StartUp#:
	immed[queue_descriptor_base, SRAM_QUEUE_DESCRIPTOR_BASE]
    immed32[pkt_buffer_base, SDRAM_PKT_BUFFER_BASE]
	immed[buf_descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE]
	immed[$tfifo_ctl_wd1, 0]										; second status word always 0



// registers to be used
//	queue_descriptor_base	SRAM_QUEUE_DESCRIPTOR_BASE
//	pkt_buffer_base			SDRAM_PKT_BUFFER_BASE
//	buf_descriptor_base		SRAM_buf_descriptor_base
//	tempa					local variable
//	queue					queue offset
//	tfifo_entry				tfifo element

	immed[@port0_in_prog, 0]					; for long packet, hold port info
	immed[@port1_in_prog, 0]
	immed[@port2_in_prog, 0]
	immed[@port3_in_prog, 0]
	immed[@port4_in_prog, 0]
	immed[@port5_in_prog, 0]
	immed[@port6_in_prog, 0]
	immed[@port7_in_prog, 0]
	immed[@port8_in_prog, 0]
	immed[@port9_in_prog, 0]
	immed[@port10_in_prog, 0]
	immed[@port11_in_prog, 0]
	immed[@port12_in_prog, 0]
	immed[@port13_in_prog, 0]
	immed[@port14_in_prog, 0]
	immed[@port15_in_prog, 0]

	immed32[$task_assign_mb, 0x80000000]			; for test and clear
	immed[task_msg_base, XMIT_TASK_MSG_BASE]		; base address of transmit task assignment mailboxes
	immed[pwp_addr, XMIT_PWP_VECTOR]				; scratch location for ports with packets queued
	immed[const_1, 1]
	immed[const_fc, 0xfc]
	alu_shf[bit15on, --, B, 1, <<15]				; setup indirect ov bit to save a cycle
	alu_shf[bit20on, --, B, 1, <<20]				; setup indirect ov bit to save a cycle
	
	br=ctx[0, context_0#]
	br[tx_fill_loop#]
context_0#:
	immed[@msg_id, 0]

tx_fill_loop#:

	Tx_ReadAssignment[tfifo_entry, queue, @msg_id]						; read the next assignment from the tx_scheduler

took_assignment#:
	Tx_RestorePortInfo[port_info, queue]								; check for long packet continuation

port_info_restored#:
	alu[ele_remaining, 0x1f, AND, port_info, >>19]						; extract elements remaining
	br>0[tx_not_sop#]
tx_sop#:
		Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]		; get head/tail descriptor, packet count
#ifdef A1_CHIP
		// A1 chip has optimize_mem hw bug where ref is dropped. 
		sram[read, $packet_link0, buf_descriptor_base, head_ptr, 2],
					ctx_swap, defer[1]					; read packet_link 2 words get next head, status 
#else
		sram[read, $packet_link0, buf_descriptor_base, head_ptr, 2],
					optimize_mem, ctx_swap, defer[1]					; read packet_link 2 words get next head, status 
#endif
		ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
		ld_field_w_clr[ele_remaining, 0001, $packet_link1]
		alu[freelist, 0x7, AND, $packet_link1, >>16]
		alu_shf[buf_offset, 0, +16, $queue_descriptor0, >>13]			; relative ele addr for sop
		alu[status_byte, const_fc, AND, $packet_link1, >>8]																; not sop
		.if(ele_remaining == 1)											; if at eop/sop
			Tx_SendLastData[buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			Tx_FreeBuf[$queue_descriptor0, freelist]					; (3 insns) free the packet buffer				
			alu[status_byte, status_byte, OR, 3]						; eop sop for upcoming validate
		.else															; more than 1 remaining. not eop/ sop							
			Tx_Unlock[queue]		
			Tx_SendData[buf_offset, tfifo_entry]						; send an mpacket
			Tx_SavePortInfo[status_byte, ele_remaining, freelist, buf_offset, queue] ; save port info (decr ele_remaining)
			alu[status_byte, const_fc, OR, 1]							; not eop/ sop for upcoming validate
		.endif
		br[tx_sent#]
tx_not_sop#:																; not sop
		alu[buf_offset, 1, +16, port_info]								; get next buf_offset
		alu[port_info, 1, +, port_info]									; add 1 element to buf_offset in port_info
		ld_field_w_clr[status_byte, 0001, port_info, >>24]																; not sop
		.if(ele_remaining == 1)											; if at eop/ not sop
			Tx_SendLastData[buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]	; get head/tail descriptor. packet count
//			sram[read, $packet_link0, buf_descriptor_base, head_ptr, 1],
//					optimize_mem, ctx_swap, defer[1]						; read packet_link 1 word to get next head
			sram[read, $packet_link0, buf_descriptor_base, head_ptr, 1],
					ctx_swap, defer[1]						; read packet_link 1 word to get next head
			ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			alu_shf[freelist, 0x7, AND, port_info, >>16]	
			Tx_FreeBuf[$queue_descriptor0, freelist]					; (3 insns) free the packet buffer				
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			alu[status_byte, status_byte, OR, 2]						; eop not sop for upcoming validate
		.else															; not eop/ not sop
			Tx_SendData[buf_offset, tfifo_entry]						; send an mpacket
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			alu[status_byte, --, B, const_fc]							; not eop/ not sop for upcoming validate
		.endif
tx_sent#:
	alu_shf[temp, --, B, status_byte, <<8]
	alu[$tfifo_ctl_wd0, temp, OR, queue, >>4]							; merge status_byte and queue
	alu[prev_entry, --, B, tfifo_entry]
	Tx_ReadAssignment[tfifo_entry, queue, @msg_id]						; read the next assignment from the tx_scheduler
	Tx_RestorePortInfo[port_info, queue]								; check for long packet continuation
	.if (send_outstanding == 1)
		Tx_Validate[$tfifo_ctl_wd0, prev_entry]							; last chance write status and transmit validate
	.endif	
transmit_done#:
	br[port_info_restored#]													; iterate
	
