//------------------------------------------------------------------------------------
//                                                                     
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  1998-2000 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
//-----------------------------------------------------------------------------------
// tx_fill_f.uc
// transmit filler thread for  16 100M and 1 gigabit output ports
//-----------------------------------------------------------------------------------
//
//
// system: SA1200
// subsystem: transmit microcode
// usage: reference design
// author: dfh 12/08/97
// revisions:
//		dfh		5/20/98		base level 2
//		dfh		7/4/98		remove reference to XMIT_PW2E_ADDR	
//		dfh		12/16/98	BL4 remove include config.h
//
//-----------------------------------------------------------------------------------



// this ucode goes to fbox 3


// sdram, sram, scratch shared addresses
#include "mem_map.h"

// standard macros
#include "stdmac.uc"
#include "msgq.uc"





//-------------------------------macros--------------------------------------------

// Tx_ReadAssignment
//		get next transmit assignment from the tx_scheduler
//	
//		input/output:	global_msg_id	shared by all tx_fill context, points to next assignment
//		outputs:
//						tfifo_entry		one of transmit fifo elements (0-15)
//						queue			output queue, bits 7:3 = output port number
//
// tx_scheduler places this assignment in the longword scratch location 
//		XMIT_TASK_MSG_BASE + msg_id
//
//		The tx slow port task assignment message format
//		+--------+---------+----------+---------------+
//		|invalid |  unused |  Element |    queue      |
//		|   31   |  30:12  |   11:8   |     7:0       |
//		+--------+---------+----------+---------------+
//		valid			1 = assign is invalid
//		Element			identifies the tfifo element
//		Queue			identifies the queue
//
//		The tx fast port task assignment message format
//
//		+-----+------+---------+------+----------+------+------+
//		|Valid|unused|EleCount |unused|  Element |unused|QueSel|
//		| 31  | 30:24|  23:21  |      |   19:16  |      | 2:0  |
//		+-----+------+---------+------+----------+------+------+
//		Valid			1 = assign is valid
//		EleCount		indicates number of elements assigned (0 = no assignment)
//						bit 12 on = 1 element
//						bit 13 on = 2 elements
//		Element			identifies the tfifo element
//		QueSel			identifies the queue
//

#macro Tx_TakeMsgId[captured_id, global_msg_id]
	alu[captured_id, --, B, global_msg_id]
	alu[global_msg_id, const_1, +4, global_msg_id]							; increment message for next tfill
#endm





#macro Tx_ReadSlowAssignment[tfifo_entry, queue, captured_id, success_label]
	msg_receive[$task_assign_mb, captured_id, slow_msg_base, SYNC]			; see stdmac.uc
	br_bclr[$task_assign_mb, 31, success_label], defer[2]					; read again if not valid																		; can't put mem op after a p3 branch
	alu_shf[tfifo_entry, 0xf, AND, $task_assign_mb, >>8]
	ld_field_w_clr[queue, 0001, $task_assign_mb, <<1]						; quadword align
#endm



// Tx_RestorePortInfo							; 7 insns								
//	check for long packet continuation
//
//	inputs:	queue		8:4 port number 
//	output: port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 freelist	
//						15:0 buf_offset 	
//
#macro Tx_RestorePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]			; each target is offset by port*2
	jump[temp, restore_array#]
restore_array#:
	br[end#], defer[1]
	alu[port_info, --, B, @port0_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port1_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port2_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port3_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port4_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port5_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port6_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port7_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port8_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port9_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port10_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port11_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port12_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port13_in_prog]
	br[end#], defer[1]
	alu[port_info, --, B, @port14_in_prog]
	alu[port_info, --, B, @port15_in_prog]
end#:	
#endm



// Tx_SavePortInfo									; 7 insns
//	save elements remaining, status byte and buffer offset in global port in progress info
//
//	inputs
//		status_byte		byte enables for last element			to 31:26
//		ele_remaining	elements left to send in the packet		to 23:19
//		freelist		freelist id								to 18:16
//		buf_offset		elements offset in sdram (elements)		to 15:0
//		queue			8:4 port number 
//
#macro Tx_SavePortInfo[status_byte, ele_remaining, freelist, buf_offset, queue] 
	alu[temp, 0x1e, AND, queue, >>3]						; each target is offset by port*2
	alu_shf[ele_remaining, ele_remaining, -, 1]				; decrement elements remaining
	jump[temp, restore_array#], defer[3]
	alu_shf[port_info, buf_offset, OR, status_byte, <<16]
	alu_shf[port_info, port_info, OR, freelist, <<16]
	alu_shf[port_info, port_info, OR, ele_remaining, <<19]
restore_array#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_UpdatePortInfo									; 7 insns								
//	decrement elements remaining in port_info and save it in global port in progress info
//
//	inputs:	
//			port_info	31:26 status_byte 
//						23:19 elements remaining
//						18:16 freelist	
//						15:0 buf_offset 	
//			queue		8:4 port number
//
#macro Tx_UpdatePortInfo[port_info, queue]	
	alu[temp, 0x1e, AND, queue, >>3]				; each target is offset by port*2
	jump[temp, restore_array#], defer[1]
	alu_shf[port_info, port_info, -, 1, <<19]		; decrement elements remaining
restore_array#:
	br[end#], defer[1]
	alu[@port0_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port1_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port2_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port3_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port4_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port5_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port6_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port7_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port8_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port9_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port10_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port11_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port12_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port13_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port14_in_prog, --, B, port_info]
	br[end#], defer[1]
	alu[@port15_in_prog, --, B, port_info]
end#:	
#endm



// Tx_ReadLock
//	get queue descriptor 2 words and packet link (1 or 2 words)
//
//	inputs:
//		pl_wordcount			get 1 or 2 packet_link words
//	outputs:
//		$queue_descriptor0		31:16 head relative address, 15:0 tail relative address
//		$queue_descriptor1		packet count
//		$packet_link0			31:16 NPL, 15:0 element count or multicast pointer
//		$packet_link1			15:0 status with last element byte enables
//		head_ptr				queue head pointer
//		
// packet_link format:
//			+----------------------------------------+-------------------------------+
// word 0	|             NPL (next packet link)     |	 mpacket count OR mcast ptr  |
//			|		           31:16		         |    			15:0			 |
//			+----------------------------------------+-------------------------------+
//			+-----+------+---------+--------+--------+-------------+-------+----+----+
// word 1	|FPORT|unused|  qselect|FASTseq#|freelist|qw/byte count|eop sop|skip|port|
//			| 31  | 30:27|   26:24 |  23:20 |  19:16 |     15:10   |  9:8  | 7  | 6:0|
//			+-----+------+---------+--------+--------+-------------+-------+----+----+
//
#macro Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]
	sram[read_lock, $queue_descriptor0, queue_descriptor_base, queue, 2], 
					optimize_mem, ctx_swap						; read the queue descriptor 2 words
	alu_shf[head_ptr, --, B, $queue_descriptor0, >>16]		; isolate next packet link
#endm



// Tx_SendLastData
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		status_word			local register word containing quadword count in 15:13
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendLastData[buf_offset, status_word, tfifo_entry]
.local sd_indirect
	alu_shf[qw_offset, --, B, buf_offset, <<3]
	alu[sd_indirect, 0x7, AND, status_word, >>13]						; extract quadword count from status 15:13
	alu[sd_indirect, bit20on, OR, sd_indirect, <<16]						; place quadword count in 19:16
	alu_shf[--, sd_indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref	; transfer to tfifo, sig done
//    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
//	immed[send_outstanding, 1]
// note: ctx_arb[sdram] for this happens in Tx_Validate
.endlocal
#endm

//#macro Tx_F_SendLastData[buf_offset, status_word, tfifo_entry]
//.local sd_indirect
//	alu_shf[qw_offset, --, B, buf_offset, <<3]
//	alu[sd_indirect, 0x7, AND, status_word, >>13]						; extract quadword count from status 15:13
//	alu[sd_indirect, bit20on, OR, sd_indirect, <<16]						; place quadword count in 19:16
//	alu_shf[--, sd_indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
//   sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
// note: ctx_arb[sdram] for this happens in Tx_Validate
//.endlocal
//#endm

// Tx_SendData
//		transfer 8 quadfords from sdram to tfifo element
//
//	inputs: 
//		buf_offset			element offset in packet buffer memory
//		tfifo_entry			tfifo element 0-15

#macro Tx_SendData[buf_offset, tfifo_entry]
.local sd_indirect
	alu_shf[qw_offset, --, B, buf_offset, <<3]
	alu[sd_indirect, bit20on, OR, 7, <<16]								; place quadword count 7 in 19:16
	alu_shf[--, sd_indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref	; transfer to tfifo, sig done
//    sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
//	immed[send_outstanding, 1]
.endlocal
#endm

//#macro Tx_F_SendData[buf_offset, tfifo_entry]
//.local sd_indirect
//	alu_shf[qw_offset, --, B, buf_offset, <<3]
//	alu[sd_indirect, bit20on, OR, 7, <<16]								; place quadword count 7 in 19:16
//	alu_shf[--, sd_indirect, OR, tfifo_entry, <<7]						; put element no. in 10:7
//   sdram[t_fifo_wr, --, pkt_buffer_base, qw_offset, 8], indirect_ref, sig_done	; transfer to tfifo, sig done
//.endlocal
//#endm


// Tx_ClearPortVector												; 5 insns
//	clear the ports with packets vector bit location to flag a queue going empty
//
//	inputs:
//		queue		8:4 port number
//					3:1 qselect for that port
//			
#macro Tx_ClearPortVector[queue]
	alu_shf[portnum, --, B, queue, >>4]								; remember queue1 was shifted <<1 before
	alu_shf[portnum, portnum, B-A, 1, <<5]							; indirect shift left 32 - portnum
	alu[--, portnum, B, 0]
	alu_shf[$xfer3, --, B, 1, <<indirect]
	scratch [bit_wr, $xfer3, pwp_addr, 0, clear_bits]					; clear bit for this port
#endm



// Tx_WriteUnlock													; 13 insns
//	insert new head pointer and update packet count for a queue
//
//	inputs:
//		queue					offset from queue_descriptor_base
//		tail_ptr				15:0 previous tail
//		$packet_link0			31:16 new head
//		$queue_descriptor1		packet count
//
//	constant input:
//		queue_descriptor_base	array of all queues, each occupies 2 longwords
//
#macro Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]
	alu[queue_ele_count, $queue_descriptor1, -, 1]					; decrement the element count
	.if (queue_ele_count == 0)
		Tx_ClearPortVector[queue]									; 5 insns
	.endif
	ld_field[tail_ptr, 1100, $packet_link0, <<16]					; merge existing tail with new head_ptr
	alu_shf[$queue_descriptor0, --, B, tail_ptr];
	ld_field_w_clr[$queue_descriptor1, 0011, queue_ele_count]
	sram[write_unlock, $queue_descriptor0, queue_descriptor_base, queue, 2]
#endm


#macro Tx_Unlock[queue]
	sram[unlock, --, queue_descriptor_base, queue, 1]
#endm


// Tx_WriteEOPStatus													
//	write transmit status word
//
//	inputs:
//		status_byte		15:10 byte count
//		eopsop			eop sop 2 bit encode
//		queue			port and queue select
//		tfifo_entry	tfifo element
//
#macro Tx_WriteEOPStatus[status_byte, eopsop, queue, tfifo_entry]
	alu_shf[temp, status_byte, OR, eopsop, <<8]
	alu[$tfifo_ctl_wd0, temp, OR, queue, >>4]						; merge status_byte and portnum in queue
	alu_shf[temp, --, B, tfifo_entry, <<1]							; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1], ctx_swap				; write the status word
	Tx_F_Validate[tfifo_entry]
#endm

// Tx_WriteNonEOPStatus													
//	write transmit status word
//
//	inputs:
//		status_byte		constant 7:2 byte count all ones, 1:0 eop sop
//		queue			port and queue select
//		tfifo_entry	tfifo element
//
#macro Tx_WriteNonEOPStatus[status_byte, queue, tfifo_entry]
	immed[temp, status_byte, <<8]
	alu[$tfifo_ctl_wd0, temp, OR, queue, >>4]						; merge status_byte and portnum in queue
	alu_shf[temp, --, B, tfifo_entry, <<1]							; status address = element no. x 2
	alu_shf[temp, temp, +, 1, <<7]									;	+ 128
    t_fifo_wr [$tfifo_ctl_wd0, temp, 0, 1], ctx_swap				; write the status word
	Tx_F_Validate[tfifo_entry]
#endm

// Tx_Validate														; 8 insns + tfifo_wr ~30 
//	wait for sdram to complete then transmit validate
//
//	inputs:
//		a_tfifo_entry	tfifo element
//
#macro Tx_Validate[a_tfifo_entry]								

//	ctx_arb[sdram], defer[1]										; wait for packet data to be transferred to tfifo
//	immed[send_outstanding, 0]
write_validate#:
	alu_shf[--, --, B, a_tfifo_entry, <<5]							; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.
#endm


#macro Tx_F_Validate[a_tfifo_entry]								

//	ctx_arb[sdram]													; wait for packet data to be transferred to tfifo
write_validate#:
	alu_shf[--, const_x8000, OR, a_tfifo_entry, <<5]					; setup indirect data from element no.
//	alu_shf[--, --, B, a_tfifo_entry, <<5]							; setup indirect data from element no.
	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.
#endm


// Tx_TryValidate													
//	if sdram transfer is complete, transmit validate
//
//	inputs:
//		a_tfifo_entry	tfifo element
//
//#macro Tx_TryValidate[a_tfifo_entry]								
//
//	br_!signal[sdram_sig, end#]
//	immed[send_outstanding, 0]
//
//write_validate#:
//	alu_shf[--, --, B, a_tfifo_entry, <<5]							; setup indirect data from element no.
//	fast_wr[0, XMIT_VALIDATE], indirect_ref							; tell fbi to run with that element no.
//end#:
//#endm



// Tx_FreeBuf														; 3 insns
//	free the packet buffer
//
#macro Tx_FreeBuf[queue_descriptor0, freelist]
	alu_shf[descriptor_ptr, 1, B-A, queue_descriptor0, >>16]		; relative address
	alu_shf[--, bit20on, OR, freelist, <<16]						; merge ov bit with freelist id
	sram[push, --, descriptor_ptr, buf_descriptor_base, 0], indirect_ref
#endm

#macro Tx_F_FreeBuf[buf_offset, freelist]
	alu_shf[descriptor_ptr, 1, B-A, buf_offset, >>3]				; relative address
	alu_shf[--, bit20on, OR, freelist, <<16]						; merge ov bit with freelist id
	sram[push, --, descriptor_ptr, buf_descriptor_base, 0], indirect_ref
#endm


#macro Tx_DoSlow[prev_entry, tfifo_entry, queue]

	Tx_RestorePortInfo[port_info, queue]								; check for long packet continuation
port_info_restored#:
	alu[ele_remaining, 0x1f, AND, port_info, >>19]
	br>0[tx_not_sop#]													; sop
tx_sop#:
		Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]		; get head/tail descriptor, packet count
		sram[read, $packet_link0, buf_descriptor_base, head_ptr, 2],
					optimize_mem, ctx_swap, defer[1]					; read packet_link 2 words get next head, status 
		ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
		ld_field_w_clr[ele_remaining, 0001, $packet_link1]
		alu[freelist, 0x7, AND, $packet_link1, >>16]
		alu_shf[buf_offset, 0, +16, $queue_descriptor0, >>13]			; relative ele addr for sop
		alu[status_byte, $packet_link1, AND, const_fc_x256]																; not sop
		.if(ele_remaining == 1)											; if at eop/sop
			Tx_SendLastData[buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			Tx_FreeBuf[$queue_descriptor0, freelist]					; (3 insns) free the packet buffer				
			Tx_WriteEOPStatus[status_byte, 3, queue, tfifo_entry]		; write status to fbi (ctx swap) eop sop
		.else															; more than 1 remaining. not eop/ sop							
			Tx_Unlock[queue]		
			Tx_SendData[buf_offset, tfifo_entry]						; send an mpacket
			Tx_SavePortInfo[status_byte, ele_remaining, freelist, buf_offset, queue] ; save port info (decr ele_remaining)
			Tx_WriteNonEOPStatus[0xFD, queue, tfifo_entry]				; write status to fbi (ctx swap) sop noteop
		.endif
	br[tx_slow_sent#]
tx_not_sop#:																; not sop
		alu[buf_offset, 1, +16, port_info]								; get next buf_offset
		alu[port_info, 1, +, port_info]									; add 1 element to buf_offset in port_info
		ld_field_w_clr[status_byte, 0010, port_info, >>16]																; not sop
		.if(ele_remaining == 1)											; if at eop/ not sop
			Tx_SendLastData[buf_offset, status_byte, tfifo_entry]		; transfer packet data from sdram to tfifo
			Tx_ReadLock[head_ptr, $queue_descriptor0, $packet_link0]	; get head/tail descriptor. packet count
			sram[read, $packet_link0, buf_descriptor_base, head_ptr, 1],
					optimize_mem, ctx_swap, defer[1]					; read packet_link 1 word to get next head
			ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
			Tx_WriteUnlock[queue, tail_ptr, $packet_link0, $queue_descriptor1]		; modify head and packet count
			alu_shf[freelist, 0x7, AND, port_info, >>16]	
			Tx_FreeBuf[$queue_descriptor0, freelist]					; (3 insns) free the packet buffer				
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			alu[status_byte, const_fc_x256, AND, port_info, >>16]			; 31:26 to 15:10
			Tx_WriteEOPStatus[status_byte, 2, queue, tfifo_entry]			; write status to fbi (ctx swap) eop notsop
		.else															; not eop/ not sop
			Tx_SendData[buf_offset, tfifo_entry]						; send an mpacket
			Tx_UpdatePortInfo[port_info, queue]							; decr ele_remaining, at port info
			Tx_WriteNonEOPStatus[0xFC, queue, tfifo_entry]				; write status to fbi (ctx swap) notsop noteop
		.endif
tx_slow_sent#:

#endm		; end Tx_DoSlow



// @fport_task	from previous transfer
//		31:29	queue
//		28:24	elements remaining
//		23:21	number of elements assigned
//		19:16	tfifo entry
//		15:0	buf_offset
//
// @port_info from first transfer
//
//		18:16	freelist		freelist id		
//		15:8	status_byte		byte enables for last element			


#macro Tx_F_Restore[fport_task]
	ctx_arb[inter_thread]												; wait for signal
	alu[fport_task, --, B, @fport_task]									; restore task
#endm



#macro Tx_F_Save[fport_task, sig_thread]
	alu[@fport_task, --, B, fport_task]									; save for other thread to pick up
	fast_wr[sig_thread, inter_thd_sig]									; signal the other thread
#endm


// new task is in $task_assign_mb
//		23:21	number of elements assigned
//		19:16	tfifo entry
//		2:0	preferred queue
//
#macro Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]
get_assignment#:
.local temp_task
	alu[--, captured_id, -, $task_assign_mb, >>24]
	br=0[got_assignment#], defer[2]
	ld_field_w_clr[temp_task, 0110, $task_assign_mb]	
	alu_shf[fport_task, fport_task, AND~, 0xf, <<16]					; clear tfifo entry field

	msg_receive[$task_assign_mb, captured_id, fast_msg_base, SYNC]		; read again if not valid
	br[get_assignment#]
got_assignment#:
	alu[fport_task, fport_task, +, temp_task]							; add in new assignment
.endlocal
#endm


#macro Tx_F_NextEntry[fport_task]
	alu_shf[fport_task, fport_task, +, 1, <<16]							; increment tfifo_entry
	alu_shf[fport_task, fport_task, AND~, 1, <<20]
#endm

#macro Tx_F_ReadHead[head_ptr, abs_head_ptr]
	alu[head_ptr, const_1, +8, abs_head_ptr]							; start at 1
	sram[read, $packet_link0, queue_descriptor_base, head_ptr, 2],
					optimize_mem, ctx_swap								; read packet_link 2 words get next head, status 
	alu_shf[buf_offset, 0, +16, $packet_link0, <<3]
#endm

#macro Tx_F_WriteHead[abs_head_ptr, head_ptr]
	alu[abs_head_ptr, const_1, +8, head_ptr]
	nop
	nop
	nop 
#endm

#macro	Tx_F_ReadPacketLink[buf_offset, fport_task]							; (7)
	alu[temp, const_x38, AND, fport_task, >>26]			; queue == 31:29, jump target is 8 insns
	jump[temp, read_pl#]
read_pl#:
	Tx_F_ReadHead[head_ptr, @head_ptr_q0]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q0, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q1]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q1, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q2]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q2, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q3]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q3, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q4]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q4, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q5]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q5, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q6]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q6, head_ptr]

	Tx_F_ReadHead[head_ptr, @head_ptr_q7]
	br[read_pl_end#], defer[1]
	Tx_F_WriteHead[@head_ptr_q7, head_ptr]
read_pl_end#:
#endm


#macro Tx_DoFast[prev_entry, port_info, fport_task, tfifo_entry, queue, sig_thread]
	Tx_F_Restore[fport_task]										; (2)

	alu_shf[ele_remaining, 0x1f, AND, fport_task, >>24]				; elements remaining
	br>0[notsop#], defer[2]

	alu_shf[tfifo_entry, 0xf, AND, fport_task, >>16]				; restore tfifo_entry
	immed[queue, 0x100]												; portnum queue 0 tbd: add qselect
sop#:
		Tx_F_ReadPacketLink[buf_offset, fport_task]					; (7)
		br!=byte[$packet_link1, 0, 1, noteop_sop#], defer[2]
eop_sop#:
			alu_shf[status_byte, $packet_link1, AND, const_fc_x256]																; not sop
			Tx_SendLastData[buf_offset, status_byte, tfifo_entry]	; (6) transfer packet data from sdram to tfifo

			alu_shf[freelist, 0x7, AND, $packet_link1, >>16]
			Tx_F_FreeBuf[buf_offset, freelist]						; (3) free the packet buffer				

			alu_shf[fport_task, fport_task, -, 1, <<21]				; decr elements assigned																		; save for other thread to pick up

			alu_shf[--, fport_task, AND, 0x7, <<21]					; test elements assigned
			br>0[ele_assigned_eop_sop#]
ele_needed_eop_sop#:
				Tx_TakeMsgId[captured_id, @fast_msg_id]				; (2)
				msg_receive[$task_assign_mb, captured_id, fast_msg_base, NOSYNC]	; (1)
				Tx_WriteEOPStatus[status_byte, 3, queue, tfifo_entry]	; (5) write status to fbi (ctx swap) eop sop
				Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]; (5) iterate read until have new fport_task
				Tx_F_Save[fport_task, sig_thread]
				br[tx_sent#]

ele_assigned_eop_sop#:
				Tx_F_NextEntry[fport_task]							; (2) increment tfifo_entry
				Tx_F_Save[fport_task, sig_thread]
				Tx_WriteEOPStatus[status_byte, 3, queue, tfifo_entry]	; (5) write status to fbi (ctx swap) eop sop
				br[tx_sent#]
noteop_sop#:
			alu[@port_info, --, B, $packet_link1]					; save freelist and status_byte

			Tx_SendData[buf_offset, tfifo_entry]					; send an mpacket
			ld_field[fport_task, 1000, $packet_link1, <<24]	;		; insert elements remaining
			ld_field[fport_task, 0011, buf_offset]					; insert buf_offset
			alu_shf[fport_task, fport_task, -, 9, <<21]				; decr elements remaining, elements assigned																		; save for other thread to pick up

			alu_shf[--, fport_task, AND, 0x7, <<21]					; test for more elements assigned
			br>0[ele_assigned_noteop_sop#]
ele_needed_noteop_sop#:
				Tx_TakeMsgId[captured_id, @fast_msg_id]			; (2)
				msg_receive[$task_assign_mb, captured_id, fast_msg_base, NOSYNC]; (1)
				Tx_WriteNonEOPStatus[0xFD, queue, tfifo_entry]		; write status to fbi (ctx swap) eop sop
				Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]	; (5) iterate read until have new fport_task
				Tx_F_Save[fport_task, sig_thread]
				br[tx_sent#]
ele_assigned_noteop_sop#:
				Tx_F_NextEntry[fport_task]							; (2) increment tfifo_entry
				Tx_F_Save[fport_task, sig_thread]
				Tx_WriteNonEOPStatus[0xFD, queue, tfifo_entry]	; write status to fbi (ctx swap) eop sop
				br[tx_sent#]

notsop#:															; not sop
		alu[buf_offset, const_1, +16, fport_task]					; add 1 element to buf_offset
		alu[fport_task, const_1, +, fport_task]						; add 1 element to buf_offset
		.if(ele_remaining == 1)										; if at eop/ not sop
eop_notsop#:
			alu[port_info, --, B, @port_info]
			Tx_SendLastData[buf_offset, port_info, tfifo_entry]		; transfer packet data from sdram to tfifo

			alu_shf[freelist, 0x7, AND, port_info, >>16]
			Tx_F_FreeBuf[buf_offset, freelist]						; (3) free the packet buffer				

			alu_shf[fport_task, fport_task, -, 9, <<21]				; decr elements remaining, elements assigned																		; save for other thread to pick up

			alu_shf[--, fport_task, AND, 0x7, <<21]					; test ele assigned
			br>0[ele_assigned_eop_notsop#], defer[1]
			alu_shf[status_byte, port_info, AND, const_fc_x256]			; get status byte in 15:8
ele_needed_eop_notsop#:
				Tx_TakeMsgId[captured_id, @fast_msg_id]				; (2)
				msg_receive[$task_assign_mb, captured_id, fast_msg_base, NOSYNC]	; (1)
				Tx_WriteEOPStatus[status_byte, 2, queue, tfifo_entry]	; write status to fbi (ctx swap) eop notsop
				Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]	; (5) iterate read until have new fport_task
				Tx_F_Save[fport_task, sig_thread]
				br[tx_sent#]
ele_assigned_eop_notsop#:
				Tx_F_NextEntry[fport_task]							; (2) increment tfifo_entry
				Tx_F_Save[fport_task, sig_thread]
				Tx_WriteEOPStatus[status_byte, 2, queue, tfifo_entry]	; write status to fbi (ctx swap) eop notsop
				br[tx_sent#]

		.else														; not eop/ not sop
noteop_notsop#:
			Tx_SendData[buf_offset, tfifo_entry]					; send an mpacket
			alu_shf[fport_task, fport_task, -, 9, <<21]				; decr elements remaining, elements assigned																		; save for other thread to pick up
			alu_shf[--, fport_task, AND, 0x7, <<21]
			br>0[ele_assigned_noteop_notsop#]
ele_needed_noteop_notsop#:
				Tx_TakeMsgId[captured_id, @fast_msg_id]				; (2)
				msg_receive[$task_assign_mb, captured_id, fast_msg_base, NOSYNC]; (1)
				Tx_WriteNonEOPStatus[0xFC, queue, tfifo_entry]	; write status to fbi (ctx swap) noteop notsop
				Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]	; (5) iterate read until have new fport_task
				Tx_F_Save[fport_task, sig_thread]
				br[tx_sent#]
ele_assigned_noteop_notsop#:
				Tx_F_NextEntry[fport_task]							; (2) increment tfifo_entry
				Tx_F_Save[fport_task, sig_thread]
				Tx_WriteNonEOPStatus[0xFC, queue, tfifo_entry]	; write status to fbi (ctx swap) noteop notsop
		.endif
		
tx_sent#:
//	alu[prev_entry, tfifo_entry, OR, 1, <<10]							; save entry with ov for later validate

#endm																	; Tx_DoFast

//-------------------------------end macros----------------------------------------

		
.xfer_order $xfer0 $xfer1 $xfer2 $xfer3 $xfer4 $xfer5 $xfer6 $xfer7
.xfer_order $$xfer0 $$xfer1
.operand_synonym $queue_descriptor0 $xfer0								; queue head and tail pointers from/to queue
.operand_synonym $queue_descriptor1 $xfer1								; queue element count from/to queue
.operand_synonym $packet_link0 $xfer2									; packet links descriptors from queue
.operand_synonym $packet_link1 $xfer3
.operand_synonym $task_assign_mb $xfer4									; new task assignment from tx_scheduler
.operand_synonym $initial_tail $xfer5									; element assignment from tx_scheduler
.operand_synonym $tfifo_ctl_wd0 $xfer6									; status to tfifo
.operand_synonym $tfifo_ctl_wd1 $xfer7									; status 63:32 always 0

.operand_synonym portnum temp
.operand_synonym qw_offset temp

StartUp#:
    immed32[pkt_buffer_base, SDRAM_PKT_BUFFER_BASE]
	immed[$tfifo_ctl_wd1, 0]										; second status word always 0



// registers to be used
//	queue_descriptor_base	SRAM_QUEUE_DESCRIPTOR_BASE
//	pkt_buffer_base			SDRAM_PKT_BUFFER_BASE
//	buf_descriptor_base		SRAM_buf_descriptor_base
//	temp					local variable
//	queue					queue offset
//	tfifo_entry				tfifo element

	immed[@port0_in_prog, 0]										; for long packet, hold port info
	immed[@port1_in_prog, 0]
	immed[@port2_in_prog, 0]
	immed[@port3_in_prog, 0]
	immed[@port4_in_prog, 0]
	immed[@port5_in_prog, 0]
	immed[@port6_in_prog, 0]
	immed[@port7_in_prog, 0]
	immed[@port8_in_prog, 0]
	immed[@port9_in_prog, 0]
	immed[@port10_in_prog, 0]
	immed[@port11_in_prog, 0]
	immed[@port12_in_prog, 0]
	immed[@port13_in_prog, 0]
	immed[@port14_in_prog, 0]
	immed[@port15_in_prog, 0]

	immed[buf_descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE]
	immed32[$task_assign_mb, 0x80000000]			; for test and set
	immed[const_1, 1]
	immed[const_fc_x256, 0xfc, <<8]
	alu_shf[bit20on, --, B, 1, <<20]				; setup indirect ov bit to save a cycle
	immed[const_x8000, 0x8000]
	
	br=ctx[0, tx_fill_slow_start_ctx0#]
	br=ctx[1, tx_fill_slow_start_ctx1#]
	br=ctx[2, tx_fill_fast_start_ctx2#]
	br=ctx[3, tx_fill_fast_start_ctx3#]

.local pwp_addr queue_ele_count slow_msg_base tail_ptr

tx_fill_slow_start_ctx0#:
	immed[@msg_id, 0]

tx_fill_slow_start_ctx1#:
	immed[pwp_addr, XMIT_PWP_VECTOR]					; scratch location for ports with packets queued
	immed[queue_descriptor_base, SRAM_QUEUE_DESCRIPTOR_BASE]
	immed[slow_msg_base, XMIT_TASK_MSG_BASE]		; base address of slow port transmit task assignment mailboxes


// tx_fill_slow_loop  
//		contexts 0 and 1
//
//		if slow port assignment, do slow port fill, 
//		tbd: otherwise if fast port assignment, do fast port fill
//
tx_fill_slow_loop#:


	Tx_TakeMsgId[captured_id, @msg_id]
read_assign_s#:
	Tx_ReadSlowAssignment[tfifo_entry, queue, captured_id, do_slow#]		; read the next assignment from the tx_scheduler
//	Tx_TryValidate[prev_entry]
	br[read_assign_s#]

// now $task_assign_mb has 
//		<15:12> element count
//		<11:8>	element 
//		<7:0>	queue	<7:3> port
//
do_slow#:
	Tx_DoSlow[prev_entry, tfifo_entry, queue]
transmit_01_done#:
	br[tx_fill_slow_loop#]

.endlocal


// tx_fill_fast_loop 
//		contexts 2 and 3
//
//		if fast port assignment, do fast port fill, 
//		tbd: otherwise if slow port assignment, do slow port fill
//
.local fport_task const_x38 fast_msg_base 
#define_eval XMIT_FAST_MSG_BASE (XMIT_TASK_MSG_BASE + 18)


tx_fill_fast_start_ctx2#:
	immed[fport_task, 0]
	immed[const_x38, 0x38]
	immed[fast_msg_base, XMIT_FAST_MSG_BASE]				; fast port assignments
	immed[queue_descriptor_base, XMIT_FPORT_DESCRIPTOR_BASE]
	
	Tx_TakeMsgId[captured_id, @fast_msg_id]
	msg_receive[$task_assign_mb, captured_id, fast_msg_base, SYNC]; (1)
	Tx_F_NewTask[fport_task, $task_assign_mb, captured_id]	; (5) iterate read until have new fport_task
	alu[@fport_task, --, B, fport_task]						; save for next restore to pick up
	br[tx_fill_first2#]

tx_fill_fast_loop2#:
//	Tx_Validate[prev_entry]								; when sdram complete, write transmit validate
tx_fill_first2#:
	Tx_DoFast[prev_entry, port_info, fport_task, tfifo_entry, queue, 15]
transmit_2_done#:
	br[tx_fill_fast_loop2#]


tx_fill_fast_start_ctx3#:
	immed[fport_task, 0]
	immed[const_x38, 0x38]
	immed[fast_msg_base, XMIT_FAST_MSG_BASE]				; fast port assignments
	immed[queue_descriptor_base, XMIT_FPORT_DESCRIPTOR_BASE]
	fast_wr[14, inter_thd_sig]								; signal ctx 2 so it can restore

	br[tx_fill_first3#]

tx_fill_fast_loop3#:	
//	Tx_Validate[prev_entry]								; when sdram complete, write transmit validate
tx_fill_first3#:
	Tx_DoFast[prev_entry, port_info, fport_task, tfifo_entry, queue, 14]
transmit_3_done#:
	br[tx_fill_fast_loop3#]

.endlocal