////////////////////////////////////////////////////////////////////////////////
//                                                                     
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001-2002 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
////////////////////////////////////////////////////////////////////////////////
//
//
//      File Name: sphy_mphy4_tx_util.uc
//
//      Purpose: Utility macros for Packet (POS/Ethernet) TX
//
///////////////////////////////////////////////////////////////////////////////

#ifndef __SPHY_MPHY4_TX_UTIL_UC__
#define __SPHY_MPHY4_TX_UTIL_UC__

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_16bit_msf_reg()
//
// Description: Use msf[fast_wr...] to set MSF register 
//
// Outputs: 
//								None
//
// Inputs:  
//		in_msf_reg_addr:		MSF register address to set
//		in_msf_reg_val:			MSF register value to set 
//
// Constants:
//								None 
//
// Size: 
//
//		3 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_set_16bit_msf_reg(in_msf_reg_addr, in_msf_reg_val)
.begin
.reg tmp_addr tmp_val
	  alu[tmp_addr, --, B, in_msf_reg_addr]			; get 16 bit msf address
	  alu_shf[tmp_val, --, B, in_msf_reg_val, <<16]	; get value							
	  msf[fast_wr, --, tmp_addr, tmp_val] 			; setting with fast_wr
.end // tmp_addr tmp_val
#endm    // end of macro _sphy_mphy4_tx_set_16bit_msf_reg()

///////////////////////////////////////////////////////////////////////////////
// 
// _sphy_mphy4_tx_init_gprs()
//
// Description:
// 	
//		Initialize the common globals in context relative gprs to save 
//      instruction cycles
//
// Outputs: 
//      out_indiref_base:		base for indirect refernce in moving 
//								payload from dram to tbuf
//      out_l2_table_base:      base to read L2 table entry
//		out_default_sigmask1:	default sig_mask for phase 1	
//		out_default_sigmask2:	default sig_mask for phase 2	
//		
// Inputs:  
//		None
//		
// Constants
//      DRAM_INDIREF_BASE_LW:	base for indirect refernce in moving 
//								payload from dram to tbuf

//
// Size: 
//
//		10 instructions
// 
//
///////////////////////////////////////////////////////////////////////////////
#macro	_sphy_mphy4_tx_init_gprs(out_indiref_base, out_l2_table_base, \
								out_default_sigmask1, out_default_sigmask2)

.begin
	// base to calculate the address field in indirect ref of 
	// dram[tbuf_wr....] instruction
	move(out_indiref_base, DRAM_INDIREF_BASE_LW)

#ifdef ETHERNET_TX
	immed32(out_l2_table_base, L2_TABLE_SRAM_BASE)
#endif

	// default phase 1 signal mask at start of each iteration of loop 
	alu[out_default_sigmask1, --, B, 0]			; initialize to 0
	_sphy_mphy4_tx_set_signal[out_default_sigmask1, sig2_next_context]	; add sig2_next_context
#ifndef DISABLE_TX2SCHED_FEEDBACK
	_sphy_mphy4_tx_set_signal[out_default_sigmask1, sig_reflect_write]	; add sig_reflect_write
#endif	// #ifndef DISABLE_TX2SCHED_FEEDBACK
	_sphy_mphy4_tx_set_signal[out_default_sigmask1, sig_msf_access_3]	; add sig_msf_access_3

	// default phase 2 signal mask at start of each iteration of loop 
	alu[out_default_sigmask2, --, B, 0]			; initialize to 0
	_sphy_mphy4_tx_set_signal[out_default_sigmask2, sig3_next_context]	; add sig3_next_context
	_sphy_mphy4_tx_set_signal[out_default_sigmask2, sig_dram_access_1]	; add sig_dram_access_1
	_sphy_mphy4_tx_set_push_signal[out_default_sigmask2, sig_dram_access_1]	; add push signal
	_sphy_mphy4_tx_set_signal(out_default_sigmask2, sig_sram_read_1)	; add sig_sram_read_1

.end

#endm  // end of #macro _sphy_mphy4_tx_init_gprs()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_lmindex0_to_queue_tail()
//
// Description: set local memory index 0 to the tail of the queue entry for 
//				the port and put the packet in tx request in queue tail
//
// Outputs:					
//							None 
//
// Inputs:  
//      in_turnaround:		queue head/tail offset value for reset offset
//
// Constants:
//						 	None 
//
//	Size:
//		6 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_set_lmindex0_to_queue_tail[in_turnaround]

	// set local memory index to queue entry at queue tail of that port,
	// update ab_bd and p_sop_flag 
.begin 
.reg tmpxy
 
	local_csr_wr[active_lm_addr_0, *l$index1[GLOBAL_TAIL_OFFSET_INDEX]]
	// while waiting for local_csr_wr to settle down,
	// update queuetail and @queue_count
	alu[--, *l$index1[GLOBAL_TAIL_OFFSET_INDEX], -, in_turnaround]
	bne[set_lmindex0_to_queue_tail_done#], defer[1]
		alu[*l$index1[GLOBAL_TAIL_OFFSET_INDEX], \
			*l$index1[GLOBAL_TAIL_OFFSET_INDEX], +, QUEUE_ENTRY_SIZE]
	alu[*l$index1[GLOBAL_TAIL_OFFSET_INDEX], --, B, \
			*l$index1[GLOBAL_QUEUE_INFO_BASE_INDEX]]
set_lmindex0_to_queue_tail_done#:
.end 
#endm // end of #macro _sphy_mphy4_tx_set_lmindex0_to_queue_tail()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_advance_queue_head()
//
// Description: advance queue head after the last mpcket of one packet is sent
//              to the port
//
// Outputs:							
//							None
//
// Inputs:  
//      in_turnaround:		queue head/tail offset value for reset offset
//
// Constants:
//						 	None 
//
// Size:
//		6 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_advance_queue_head(in_turnaround)

.begin
continue_advance_queue_head#:
	alu[--, *l$index1[GLOBAL_HEAD_OFFSET_INDEX], -, in_turnaround]
	bne[advance_queue_head_done#], defer[3]
		alu[*l$index1[GLOBAL_PKTS_IN_QUEUE_INDEX], \
			*l$index1[GLOBAL_PKTS_IN_QUEUE_INDEX], -, 1]
		alu[*l$index1[GLOBAL_HEAD_OFFSET_INDEX], \
				*l$index1[GLOBAL_HEAD_OFFSET_INDEX], +, QUEUE_ENTRY_SIZE]
		alu[*l$index1[GLOBAL_PKTS_TXED_INDEX], \
				*l$index1[GLOBAL_PKTS_TXED_INDEX], +, 1]
	alu[*l$index1[GLOBAL_HEAD_OFFSET_INDEX], --, B, \
			*l$index1[GLOBAL_QUEUE_INFO_BASE_INDEX]]
advance_queue_head_done#:

.end
#endm // end of macro _sphy_mphy4_tx_advance_queue_head()


 #ifndef DISABLE_TX2SCHED_FEEDBACK

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_notify_scheduler()
//
// Description: notify scheduler that one packet has been transmitted from 
//				this port 
//
// Outputs:							
//      out_txd_port:		total packets transmitted by the port (sram 
//                          transfer register)
//
// Inputs:  
//      in_port:			port to transmit the current mpacket 
//
// Constants:
//						 	None 
//
// Size:
//		2 instructions for SPHY_1X32 mode, and 10 instructions for other modes
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_notify_scheduler(out_txd_port, in_port)
#if (TX_PHY_MODE == SPHY_1_32)

#ifdef MICROC_SCHEDULER
.reg remote $$_transmit_count
	alu[out_txd_port, --, b, *l$index1[GLOBAL_PKTS_TXED_INDEX]] ; get txed pkts

	// update scheduler with updated counts of packets transmitted for port 0
	cap[write, out_txd_port, SCHEDULER_ME, $$_transmit_count, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write

#else
.reg remote $$txd_p0 
	alu[out_txd_port, --, b, *l$index1[GLOBAL_PKTS_TXED_INDEX]] ; get txed pkts

	// update scheduler with updated counts of packets transmitted for port 0
	cap[write, out_txd_port, SCHEDULER_ME, $$txd_p0, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write

	_sphy_mphy4_tx_debug_incr_counter(@pkt_tx_num_pkts_notified)

#endif

#else // (TX_PHY_MODE == SPHY_4_8) or (TX_PHY_MODE == MPHY_4) 

/* nizhner 8/13/04: added micro-c scheduler dependency for mphy4 */
#ifdef MICROC_SCHEDULER
.reg remote $$_transmit_count0 $$_transmit_count1 $$_transmit_count2 $$_transmit_count3
#else
.reg remote $$txd_p0 $$txd_p1 $$txd_p2 $$txd_p3 
#endif

.begin 
.reg jmp_offset

	_sphy_mphy4_tx_debug_incr_counter(@pkt_tx_num_pkts_notified)

	alu_shf[jmp_offset, --, b, in_port, <<1]    ; get jmp_offset by port times two
	// jump to different port
	jump[jmp_offset, jmp_tbl#],defer[3], targets[xp0#,xp1#,xp2#,xp3#] 
		alu[out_txd_port, --, b, *l$index1[GLOBAL_PKTS_TXED_INDEX]] ; get txed pkts
		nop
		nop

//	Start of jump table. 
jmp_tbl#:

#ifdef MICROC_SCHEDULER  
xp0#:	
	// update scheduler with updated counts of packets transmitted for port 0
	cap[write, out_txd_port, SCHEDULER_ME, $$_transmit_count0, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp1#:
	// update scheduler with updated counts of packets transmitted for port 1
	cap[write, out_txd_port, SCHEDULER_ME, $$_transmit_count1, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp2#:
	// update scheduler with updated counts of packets transmitted for port 2
	cap[write, out_txd_port, SCHEDULER_ME, $$_transmit_count2, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp3#:
	// update scheduler with updated counts of packets transmitted for port 3
	cap[write, out_txd_port, SCHEDULER_ME, $$_transmit_count3, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write

#else
xp0#:	
	// update scheduler with updated counts of packets transmitted for port 0
	cap[write, out_txd_port, SCHEDULER_ME, $$txd_p0, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp1#:
	// update scheduler with updated counts of packets transmitted for port 1
	cap[write, out_txd_port, SCHEDULER_ME, $$txd_p1, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp2#:
	// update scheduler with updated counts of packets transmitted for port 2
	cap[write, out_txd_port, SCHEDULER_ME, $$txd_p2, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
  	br[notify_schlr_done#]

xp3#:
	// update scheduler with updated counts of packets transmitted for port 3
	cap[write, out_txd_port, SCHEDULER_ME, $$txd_p3, 0, 1], \
			sig_done[sig_reflect_write]	; reflect write
#endif 

notify_schlr_done#:
.end // jmp_offset
#endif
#endm // end of macro _sphy_mphy4_tx_notify_scheduler()
#endif	// #ifndef DISABLE_TX2SCHED_FEEDBACK

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_next_availble_tbuf_ele()
//
// Description: update next available tbuf element in local memory
//
// Outputs:							
//							None
//
// Inputs:  
//							None
//
// Constants:
//							None
//
// Size:
//		2 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_next_availble_tbuf_ele()
.begin 
		alu[*l$index1[GLOBAL_AVAIL_TBUF_ELEMENT_INDEX], \
			*l$index1[GLOBAL_AVAIL_TBUF_ELEMENT_INDEX], +, 1]
		alu[*l$index1[GLOBAL_AVAIL_TBUF_ELEMENT_INDEX], \
			*l$index1[GLOBAL_AVAIL_TBUF_ELEMENT_INDEX], and, TBUF_ELE_BITS_MASK]
.end 

#endm // end of macro _sphy_mphy4_tx_update_next_availble_tbuf_ele()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_get_tbuf_addr()
//
// Description: get tbuf element adddress for input tbuf element
//
// Outputs:							
//		out_tbuf_addr:		the starting address of the input tbuf element
//							element
//
// Inputs:  
//		in_tbufele:			the input tbuf element 
//
// Constants:
//							None
//
// Size:
//		2 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_get_tbuf_addr(out_tbuf_addr, in_tbufele)
.begin 
.reg tmpx		
	alu_shf[tmpx, --, B, in_tbufele, <<TBUF_ADDR_SHF]
	alu[out_tbuf_addr, tmpx, +, *l$index1[GLOBAL_TBUF_BASE_INDEX]]
.end // tmpx

#endm // end of macro _sphy_mphy4_tx_get_tbuf_addr()

/////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_move_sop_paylo_to_tbuf() 
//
// Description: move payload of sop or sop_eop mpacket to tbuf
//
// Outputs:							
// 		out_indir:			indirect reference used in dram[tbuf_wr...] 
//							instruction
//		out_dramaddr:		dram address of the payload
//
// Inputs:  
//		in_bd:				meta data handle 
//		in_addrtbuf:		the starting address of tbuf for payload 
//		in_indirbase:		base for  indir
//      in_of_set: 			the starting address of the payload to copy
//		in_paylo:			payload length
//		in_offsetmod8:		payload offset
//		in_sig_dram:		signal number for dram[tbuf_wr...] instruction
//
// Constants:
//							none
//
// Size:
//		8 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_move_sop_paylo_to_tbuf(out_indir, out_dramaddr, in_bd, \
			in_addrtbuf, in_indirbase, in_of_set, in_paylo, in_offsetmod8, \
			in_sig_dram) 
.begin
.reg tmpx
	// prepare tbuf address for  dram[tbuf_wr......]
	alu[out_indir, in_indirbase, or, in_addrtbuf, <<5]

	// get buffer handle 
	alu_shf[tmpx, --, b, in_bd, <<2]
    // use following dl_buf_get_data_from_meta macro from dispatch_loop.uc to
	// get dram address at the beginning of the buffer from true_dl_buf_hdl
	// now it takes one instruction
    dl_buf_get_data_from_meta[out_dramaddr, tmpx]

	// total we need to fecth (offsetmod8 + paylo) bytes
	// to tbuf
	alu[tmpx, in_offsetmod8, +, in_paylo]

    // since ref count 0 will fectch 8 bytes, and ref count 1 will fetch
	// 16 bytes......, and ref count 15 will fetch 128 bytes, 
	// the right way to do it is followinhg:
	//    a) decrease tm_payload_len by one
	//    b) then right shift the result by 3
	// We will do step a first, then combine the right shift in the shift into
	// the ref count in indir_ref.  In this way, we make sure that we fetch 
	// enough and closet to 8 byte alignment of packet data	
	alu[tmpx, tmpx, -, 0x01]	
	// right shift tm_payload_len to get rid of any bits set in bit 0 to 3 to 
	// get ref_cnt for indir_ref 
    alu_shf[tmpx, --, b, tmpx, >>3]
    // left shift 21 bit to ref_cnt location
    alu[--, out_indir, or, tmpx, <<21] 

    // how many bytes of packet payload are copied to tbuf from dram is 
	// controlled by Ref Cnt field in indir_ref, following max_16 is
	// dummy, offset is added to dramaddr to get the actual address to start 
	// fetch, and the lower 3 bits of the address is ignored by dram channel	 
    dram[tbuf_wr, --, out_dramaddr, in_of_set, max_16], indirect_ref, \
		sig_done[in_sig_dram]
.end // tmpx
#endm // end of macro _sphy_mphy4_tx_move_sop_paylo_to_tbuf()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_tcw0_with_paylo_len_offset_chnl()
//
// Description: set payload_length, payload_offset, and channel fields in 
//              trnsmit control word
//
// Outputs:							
//							None
//
// Inputs/Outputs:  
//		io_tcw:				transmit control word
// 
// Inputs:  
//		in_paylo_len:		payload length
//		in_paylo_offse:		payload offset
//		in_chnl:			port number
//
// Constants:
//		TCW0_PAYLOAD_LEN_LSB_LOC:	payload length field location in transmit 
//									control word   
//		TCW0_PAYLOAD_OFFSET_LSB_LOC:	payload offset field location in  
//										transmit contrl word     
//
// Size:
//		3 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_set_tcw0_with_paylo_len_offset_chnl(io_tcw, in_paylo_len, \
													in_paylo_offset, in_chnl)
	alu_shf[io_tcw, io_tcw, or, in_paylo_len, <<TCW0_PAYLOAD_LEN_LSB_LOC]
	alu_shf[io_tcw, io_tcw, or, in_paylo_offset, <<TCW0_PAYLOAD_OFFSET_LSB_LOC]
	alu[io_tcw, io_tcw, or, in_chnl]					; set channel field
#endm // end of macro _sphy_mphy4_tx_set_tcw0_with_paylo_len_offset_chnl()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_tcw0_with_paylo_len_offset()
//
// Description: set payload_length, payload_offset, and channel fields in 
//              trnsmit control word
//
// Outputs:							
//							None
//
// Inputs/Outputs:  
//		io_tcw:				transmit control word
// 
// Inputs:  
//		in_paylo_len:		payload length
//		in_paylo_offse:		payload offset
//
// Constants:
//		TCW0_PAYLOAD_LEN_LSB_LOC:	payload length field location in transmit 
//									control word   
//		TCW0_PAYLOAD_OFFSET_LSB_LOC:	payload offset field location in  
//										transmit contrl word     
//
// Size:
//		2 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_set_tcw0_with_paylo_len_offset(io_tcw, in_paylo_len, \
													in_paylo_offset)
	alu_shf[io_tcw, io_tcw, or, in_paylo_len, <<TCW0_PAYLOAD_LEN_LSB_LOC]
	alu_shf[io_tcw, io_tcw, or, in_paylo_offset, <<TCW0_PAYLOAD_OFFSET_LSB_LOC]
#endm // end of macro _sphy_mphy4_tx_set_tcw0_with_paylo_len_offset()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_save_sop_meta_to_lm()
//
// Description: save sop meta data to local memory
//
// Outputs: 
//								none
//
// Inputs:  
//			in_txr_lw0:			tx request from Queue Manager 
//
// Constants:
//								none
//
//	Size: 11 instructions. (worst case)
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_save_sop_meta_to_lm(in_txr_lw0)
.begin 
.reg v_buf_size whole_dl_meta0 v_nexthopid

	_sphy_mphy4_tx_debug_incr_counter(@pkt_tx_num_tx_requests_rxed)

	dl_meta_get_buffer_next[whole_dl_meta0]
	ld_field_w_clr[*l$index0[SBD_0_OFFSET], 0111, whole_dl_meta0]	; get sbd
	alu[--, *l$index0[SBD_0_OFFSET], -, IX_NULL]	; is null (0xFF)?
	beq[finish_save_sop_meta_to_lm#], defer[3]
		ld_field_w_clr[*l$index0[ABD_0_OFFSET], 0111, in_txr_lw0]	; set ab_bd
		alu_shf[*l$index0[ABD_0_OFFSET], *l$index0[ABD_0_OFFSET], or, \
				1, <<SOP_BIT_LOC]
 	  	//update sop buffer offset
		dl_meta_get_offset[*l$index0[ABD_1_OFFSET]]
	 
	alu_shf[*l$index0[ABD_0_OFFSET], *l$index0[ABD_0_OFFSET], or, 1, \
			<<RD_NBD_BIT_LOC]	; set ab_rd_nbd_flag
finish_save_sop_meta_to_lm#:
	dl_meta_get_buffer_size[v_buf_size]
	ld_field[*l$index0[ABD_1_OFFSET], 1100, v_buf_size, <<PAYLO_RMND_LOC] 
	alu[*l$index1[GLOBAL_PKTS_IN_QUEUE_INDEX], \
				*l$index1[GLOBAL_PKTS_IN_QUEUE_INDEX], +, 1]
#ifdef ADD_L2_HEADER
	dl_meta_get_nexthop_id[*l$index0[NEXTHOPID_OFFSET]]		; get nexthop_id
#endif // ADD_L2_HEADER
.end // v_buf_size whole_dl_meta0

#endm // end of #macro _sphy_mphy4_tx_save_sop_meta_to_lm()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_read_meta_data_from_sram()  
//
// Description: read the first to third longwords of meta data from sram
//
// Outputs:							
//		out_xmeta:			the sram transfer registers to keep meta data 
//
// Inputs:  
//		in_buf_handle:		buffer handle to get meta data
//
// Constants:
//		none   
//
// Size:
//		4 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_read_meta_data_from_sram(out_xmeta, in_buf_handle)
	// read meta data by dispatch_loop macro, signal (sig_sram_read_1)
	// is included in sigmask_phs2_default to save one instruction cycle 
	// in the critical POS min packet case
	dl_meta_load_cache(in_buf_handle, out_xmeta, sig_sram_read_1, 0, \
			META_DATA_LW_TO_READ)

#endm // end of macro _sphy_mphy4_tx_read_meta_data_from_sram()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_read_sb_meta_data_from_sram()  
//
// Description: read the first to third longwords of meta data from sram
//
// Outputs:							
//		out_xmeta:			the sram transfer registers to keep meta data 
//
// Inputs/Outputs:  
//		io_sigmask:			signal mask to be added with sig_sram_read_1
// 
// Inputs:  
//		in_buf_handle:		buffer handle to get meta data
//
// Constants:
//		none   
//
// Size:
//		4 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_read_sb_meta_data_from_sram(out_xmeta, io_sigmask, \
								in_buf_handle)
	// add sig_sram_read_1 to io_sigmask
	_sphy_mphy4_tx_set_signal(io_sigmask, sig_sram_read_1)	; sig_sram_read_1
	// read meta data by dispatch_loop macro
	dl_meta_load_cache(in_buf_handle, out_xmeta, sig_sram_read_1, 0, 3)

#endm // end of macro _sphy_mphy4_tx_read_sb_meta_data_from_sram()

/////////////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_clear_signal()
//
// Description:
//		Clear a signal from a signal mask 
//
// Outputs: 
//		None 
//
// Inputs/Outputs:  
//		io_sigmask:			signal mask to be cleared with in_sig
//
// Inputs:  
//		in_sig:				signal number to clear
//
// Constants
//
//		None
//
// Size: 
//
//		1 instructions
// 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _sphy_mphy4_tx_clear_signal(io_sigmask, in_sig)

.begin

#if ( isnum(in_sig) )
	
	#define_eval SIG_SHIFT sig
	alu_shf[io_sigmask, io_sigmask, AND~, 1, <<SIG_SHIFT ]
	#undef SIG_SHIFT

#else

	alu[io_sigmask, io_sigmask, AND~, 1, <<(&in_sig)]

#endif

.end

#endm // end of #macro _sphy_mphy4_tx_clear_signal()

/////////////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_signal()
//
// Description:
// 	
//		Set a signal in a signal mask 
//
// Outputs: 
//		None 
//
// Inputs/Outputs:  
//		io_sigmask:			signal mask to be added with in_sig
//
// Inputs:  
//		in_sig:				signal number to set
//
// Constants
//		None
//
// Size: 
//
//		1 instruction
// 
//
////////////////////////////////////////////////////////////////////////////////////

#macro _sphy_mphy4_tx_set_signal(io_sigmask, in_sig)

.begin
#if ( isnum(in_sig) )

	#define_eval SIG_SHIFT in_sig
	alu_shf[io_sigmask, io_sigmask, OR, 1, <<SIG_SHIFT]
	#undef SIG_SHIFT

#else

	alu[io_sigmask, io_sigmask, OR, 1, <<(&in_sig)]

#endif

.end

#endm // end of #macro _sphy_mphy4_tx_set_signal()


/////////////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_set_push_signal()
//
// Description:
// 	
//		Add push signal (i.e. dram access has puu and push signal) in a signal mask 
//
// Outputs:
//		None 
//
// Inputs/Outputs:  
//		io_sigmask:			signal mask to be added with push signal of in_sig
//
// Inputs:  
//		in_sig:				signal number to set
//
// Constants
//		None
//
// Size: 
//
//		1-4 instructions
// 
//
////////////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_set_push_signal(io_sigmask, in_sig)
.begin
.reg tmp
#if ( isnum(in_sig) )

	#define_eval SIG_SHIFT (in_sig +1)
	alu_shf[io_sigmask, io_sigmask, OR, 1, <<SIG_SHIFT]
	#undef SIG_SHIFT

#else
	alu[tmp, --, B, &in_sig]
	alu[tmp, tmp, +, 1]
	alu[--, tmp, OR, 0]										
	alu_shf[io_sigmask, io_sigmask, OR, 1, <<indirect]

#endif
.end

#endm // end of #macro _sphy_mphy4_tx_set_push_signal()

/////////////////////////////////////////////////////////////////////////////////////
// 
// _sphy_mphy4_tx_sop_eop_check_tbuf_full_and_wait_not_full()
//
// Description:
// 	
//		Check whether tbuf is full to avoid overwritting tbuf, also utilizes
//		defer slots to compose tcw0 for sop_eop mpkt
//
// Outputs: 
//		out_tcw_addr	Validation address for this tbuf element
//
// Inputs/Outputs:
//		io_tcw0			tcw0 for this transmit
//
// Inputs:  
//		in_tbufele:		tbuf element number which will be used for tx
//		in_paylo		payload length for tcw0
//
// Constants:
//		None
//
// Size: 
//
//		8 instructions in not full case, otherwise indefinite
// 
//
////////////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_sop_eop_check_tbuf_full_and_wait_not_full(out_tcw_addr, \
			io_tcw0, in_tbufele, in_paylo)
.begin	
.reg tmp

	alu[tmp, $mpkts_sent0, and, TBUF_ELE_BITS_MASK ]	; get mpkts actually sent

	alu[tmp, in_tbufele, -, tmp]				; get difference between
												; current tbuf_ele and txed

	alu[tmp, tmp, and, TBUF_ELE_BITS_MASK]		; mask out overflow value 

	alu[tmp, tmp, -, TBUF_WAIT_THRESHOLD]   	; compare the difference with 
												;threshold 

	blt[packet_tx_tbuf_not_full#], defer[3]		; tbuf not full, return

		alu_shf[io_tcw0, io_tcw0, OR, in_paylo, <<TCW0_PAYLOAD_LEN_LSB_LOC]

#ifdef IXP2800
	
		alu_shf[tmp, --, B, in_tbufele, <<ELE_2_TX_CNTRL_ADDR]

#else // #ifdef IXP2800

		alu_shf[tmp, --, B, in_tbufele, <<3]

#endif // #ifdef IXP2800

		alu[out_tcw_addr, tmp, +, *l$index1[GLOBAL_CTW_VD_BASE_INDEX]]

	// when the control goes here, it means tbuf is full

read_tx_sequencer_loop#:

	// read the actual number of mpkts out of tbuf

	msf[read, $mpkts_sent0, addr_tx_seq, 0, 1], sig_done[sig_msf_access_3]	

wait_tx_seq_read_done#:
	br_!signal[sig_msf_access_3, wait_tx_seq_read_done#]	

	alu[tmp, $mpkts_sent0, and, TBUF_ELE_BITS_MASK ]	; get mpkts actually sent

	alu[tmp, in_tbufele, -, tmp]						; get difference between
														; current tbuf_ele and txed

	alu[tmp, tmp, and, TBUF_ELE_BITS_MASK]				; mask out overflow value

	alu[tmp, tmp, -, TBUF_WAIT_THRESHOLD]   			; compare the difference 
														; with threshold 

	bge[read_tx_sequencer_loop#] 						; loop again

packet_tx_tbuf_not_full#:
.end		
#endm // end of _sphy_mphy4_tx_sop_eop_check_tbuf_full_and_wait_not_full


/////////////////////////////////////////////////////////////////////////////////////
// 
// _sphy_mphy4_tx_not_seop_check_tbuf_full_and_wait_not_full()
//
// Description:
// 	
//		Check whether tbuf is full to avoid overwritting tbuf, if full, wait 
//      till not full in not sop-eop mpkt case
//
// Outputs: 
//		None
//
// Inputs/Outputs:
//		None
//
// Inputs:  
//		in_tbufele:		tbuf element number which will be used for tx
//
// Constants:
//		None
//
// Size: 
//
//		8 instructions in not full case, otherwise indefinite
// 
//
////////////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_not_seop_check_tbuf_full_and_wait_not_full(in_tbufele)
.begin	
.reg tmp

	alu[tmp, $mpkts_sent0, and, TBUF_ELE_BITS_MASK ]	; get mpkts actually sent

	alu[tmp, in_tbufele, -, tmp]				; get difference between
												; current tbuf_ele and txed

	alu[tmp, tmp, and, TBUF_ELE_BITS_MASK]		; make out overflow value

	alu[tmp, tmp, -, TBUF_WAIT_THRESHOLD]   	; compare the difference with 
												; threshold 

	blt[packet_tx_tbuf_not_full#]				; tbuf not full, return

	// when the control goes here, it means tbuf is full

read_tx_sequencer_loop#:

	// read the actual number of mpkts out of tbuf

	msf[read, $mpkts_sent0, addr_tx_seq, 0, 1], sig_done[sig_msf_access_3]	

wait_tx_seq_read_done#:
	br_!signal[sig_msf_access_3, wait_tx_seq_read_done#]	

	alu[tmp, $mpkts_sent0, and, TBUF_ELE_BITS_MASK ]	; get mpkts actually sent

	alu[tmp, in_tbufele, -, tmp]				; get difference between 
												; current tbuf_ele and txed

	alu[tmp, tmp, and, TBUF_ELE_BITS_MASK]		; mask out overflow value

	alu[tmp, tmp, -, TBUF_WAIT_THRESHOLD]   	; compare the difference with 
												; threshold 

	bge[read_tx_sequencer_loop#] 				; loop again

packet_tx_tbuf_not_full#:
.end		
#endm // end of _sphy_mphy4_tx_not_seop_check_tbuf_full_and_wait_not_full


/////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_ab_paylo_rmnd_and_offset_rpaylo()
//
// Description: update ab_paylo_rmnd and ab_offset_rpaylo in queue entry 
//
//
// Outputs:							
//							None
//
// Inputs/Outputs:  
//		io_mpkt_paylo_len:	payload bytes will transmitted by this mpacket from 
// 							the active buffer
//
// Inputs: 
//							None 
//
// Constants:
//		PAYLO_RMND_LOC:		location of the least significant bit of the 
//							remained payload in queue entry 
//
// Size:
//		3 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_ab_paylo_rmnd_and_offset_rpaylo(io_mpkt_paylo_len)
#ifndef AVOID_USING_SKIP_BIT_IN_TCW0
.begin
.reg tmpx
	alu[*l$index0[ABD_1_OFFSET], *l$index0[ABD_1_OFFSET], +, io_mpkt_paylo_len]
	alu_shf[tmpx, --, b, io_mpkt_paylo_len, <<PAYLO_RMND_LOC]
	alu[*l$index0[ABD_1_OFFSET], *l$index0[ABD_1_OFFSET], -, tmpx]
.end // tmpx
#else //#ifndef AVOID_USING_SKIP_BIT_IN_TCW0
// adjust current mpkt size to make sure next mpkt in this buffer always have
// more than 4 bytes payload, so SKIT bit never set in TCW0, this only happens
// in SPHY_1x32 mode  
.begin
.reg next_mpkt_len tmpx
    ld_field_w_clr[tmpx, 0011, *l$index0[ABD_1_OFFSET], >>PAYLO_RMND_LOC]
	alu[next_mpkt_len, tmpx, -, io_mpkt_paylo_len]
	alu[--, next_mpkt_len, -, BUS_WIDTH_IN_BYTES]
	bge[adjust_this_mpkt_to_avoid_using_skip_bit_done#] 
	alu[io_mpkt_paylo_len, io_mpkt_paylo_len, -, BUS_WIDTH_IN_BYTES]
adjust_this_mpkt_to_avoid_using_skip_bit_done#:
	alu[*l$index0[ABD_1_OFFSET], *l$index0[ABD_1_OFFSET], +, io_mpkt_paylo_len]
	alu_shf[tmpx, --, b, io_mpkt_paylo_len, <<PAYLO_RMND_LOC]
	alu[*l$index0[ABD_1_OFFSET], *l$index0[ABD_1_OFFSET], -, tmpx]
.end // next_mpkt_len tmpx
#endif //#ifndef AVOID_USING_SKIP_BIT_IN_TCW0
#endm // end of _sphy_mphy4_tx_update_ab_paylo_rmnd_and_offset_rpaylo()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_save_sbd_meta_to_lm()
//
// Description: save secodary buffer data to local memory
//
// Outputs:							
//							None
//
// Inputs:  
//							None
//
// Constants:
//							None  
//
// Size: 7 instructions. (Worst case cycle count)
//
/////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_save_sbd_meta_to_lm()
.begin 
.reg v_buf_size tmp_nsbd tmp_whole
	dl_meta_get_buffer_next[tmp_whole]
	; only need lower three bytes
	ld_field_w_clr[tmp_nsbd, 0111, tmp_whole]
	alu[*l$index1[GLOBAL_SBD_NEXT_BD_INDEX], --, b, tmp_nsbd]
	alu[--, *l$index1[GLOBAL_SBD_NEXT_BD_INDEX], -, IX_NULL]	; is null (0xFF)?
	beq[finish_save_sbd_meta_to_lm#], defer[2]
 	  	//update secondary buffer size, offset always zero
		dl_meta_get_buffer_size[v_buf_size]
		ld_field_w_clr[*l$index0[SBD_1_OFFSET], 1100, v_buf_size, <<PAYLO_RMND_LOC] 
	 
	alu_shf[*l$index0[SBD_0_OFFSET], *l$index0[SBD_0_OFFSET], or, 1, <<RD_NBD_BIT_LOC]	; set ab_rd_nbd_flag
finish_save_sbd_meta_to_lm#:

.end // v_buf_size tmp_nsbd tmp_whole
#endm // end of #macro _sphy_mphy4_tx_save_sbd_meta_to_lm() 

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_secondary_buffer_become_active_buffer[]
//
// Description: replace active buffer data with secondary buffer data in  
//              queue entry
//
// Outputs:							
//							None
//
// Inputs:  
//							None
//
// Constants:
//							None 
//
// Size: 
//		5 instructions. (Worst case cycle count)
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_secondary_buffer_become_active_buffer()
.begin
	alu[*l$index0[ABD_0_OFFSET], --, b, *l$index0[SBD_0_OFFSET]]
	alu[*l$index0[ABD_1_OFFSET], --, b, *l$index0[SBD_1_OFFSET]]
	alu[*l$index0[SBD_0_OFFSET], --, b, *l$index1[GLOBAL_SBD_NEXT_BD_INDEX]]
	alu[*l$index0[SBD_1_OFFSET], --, b, 0]		; reset
	alu[*l$index1[GLOBAL_SBD_NEXT_BD_INDEX], --, b, IX_NULL]	; reset
.end
#endm // end of #macro _sphy_mphy4_tx_secondary_buffer_become_active_buffer()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_move_not_sop_paylo_to_tbuf() 
//
// Description: move payload of not_sop mpacket to tbuf 
//
// Outputs:							
// 		out_indir:			indirect reference used in dram[tbuf_wr...] 
//							instruction
//		out_dramaddr:		dram address of the payload
//
// Inputs:  
//		in_bd:				meta data handle for active buffer 
//		in_addrtbuf:		the starting address of tbuf for payload 
//		in_indirbase:		base for  indir
//      in_offset: 			the starting address of the payload to copy
//		in_tcw0:			transmit control word which already contains 
//							payload length and payload offset
//		in_sig_dram:		signal number for dram[tbuf_wr...] instruction
//
// Constants:
//							None   
//
// Size:
//		11 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_move_not_sop_paylo_to_tbuf(out_indir, out_dramaddr, in_bd, \
			in_addrtbuf, in_indirbase, in_offset, in_tcw0, in_sig_dram) 
.begin
.reg t_paylo_len t_paylo_offset tmpx
	// prepare tbuf address for  dram[tbuf_wr......]
	alu[out_indir, in_indirbase, or, in_addrtbuf, <<5]

	ld_field_w_clr[t_paylo_len, 0001, in_tcw0, >>TCW0_PAYLOAD_LEN_LSB_LOC]
	ld_field_w_clr[t_paylo_offset, 0001, in_tcw0, >>TCW0_PAYLOAD_OFFSET_LSB_LOC]
	alu[t_paylo_offset, t_paylo_offset, and, TCW0_PAYLOAD_OFFSET_MASK]

	// get true bd  
	alu_shf[tmpx, --, b, in_bd, <<2]
    // use following dl_buf_get_data_from_meta macro from dispatch_loop.uc to
	// get dram address at the beginning of the buffer from true_dl_buf_hdl
	// now it takes one instruction
    dl_buf_get_data_from_meta[out_dramaddr, tmpx]

	// total we need to fecth (t_paylo_len + t_paylo_offset) bytes
	// to tbuf
	alu[tmpx, t_paylo_len, +, t_paylo_offset]

    // since ref count 0 will fectch 8 bytes, and ref count 1 will fetch
	// 16 bytes......, and ref count 15 will fetch 128 bytes, 
	// the right way to do it is followinhg:
	//    a) decrease tm_payload_len by one
	//    b) then right shift the result by 3
	// We will do step a first, then combine the right shift in the shift into
	// the ref count in indir_ref.  In this way, we make sure that we fetch 
	// enough and closet to 8 byte alignment of packet data	
	alu[tmpx, tmpx, -, 0x01]	
	// right shift tm_payload_len to get rid of any bits set in bit 0 to 3 to 
	// get ref_cnt for indir_ref 
    alu_shf[tmpx, --, b, tmpx, >>3]
    // left shift 21 bit to ref_cnt location
    alu[--, out_indir, or, tmpx, <<21] 

    // how many bytes of packet payload are copied to tbuf from dram is 
	// controlled by Ref Cnt field in indir_ref, following max_16 is
	// dummy, offset is added to dramaddr to get the actual address to start 
	// fetch, and the lower 3 bits of the address is ignored by dram channel	 
    dram[tbuf_wr, --, out_dramaddr, in_offset, max_16], indirect_ref, \
													sig_done[in_sig_dram]
.end // t_paylo_len t_paylo_offse tmpx
#endm // end of macro _sphy_mphy4_tx_move_not_sop_paylo_to_tbuf()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_leftover_bytes_alignment()
//
// Description: do necessary bytes alignment with offset, so the leftover 
//              will be realigned in first longword without any offset 
//              to the first byte of the long word
//
//
// Outputs:							
//		out_lw0:			leftover longword which will be saved in local
//                          memory
//
// Inputs:  
// 		in_offset:			offset of leftover starting byte in the input four 
//							dram xfer registers which hold the leftover bytes
//		in_lw0:				fisrt dram xfer registers which hold the leftover
//							bytes
//		in_lw1:				second dram xfer registers which hold the leftover
//							bytes
//		in_lw2:				third dram xfer registers which hold the leftover 
//							bytes
//		in_lw3:				fourth dram xfer registers which hold the leftover 
//							bytes
//
// Constants:
//							None   
//
// Size: 
//		15 instructions. (Worst case cycle count)
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_leftover_bytes_alignment(out_lw0, in_offset, in_lw0, \
				in_lw1, in_lw2, in_lw3)
.begin
	alu[--, in_offset, -, 4]
	beq[offset_is_four_bytes#]
	blt[offset_less_than_four_bytes#]
offset_greater_than_four_bytes#:
	alu[in_offset, in_offset, -, 4]			; make offset less than 3
	local_csr_wr[BYTE_INDEX, in_offset]	; need 3 cycles for local_csr_wr to settle
	nop
	nop
	nop
	byte_align_be[--, in_lw1]
	byte_align_be[out_lw0, in_lw2]
	br[alignment_done#]
offset_less_than_four_bytes#:
	local_csr_wr[BYTE_INDEX, in_offset]; need 3 cycles for local_csr_wr to settle
	nop
	nop
	nop
	byte_align_be[--, in_lw0]
	byte_align_be[out_lw0, in_lw1]
	br[alignment_done#]
offset_is_four_bytes#:
	alu[out_lw0, --, b, in_lw1]
alignment_done#:
.end
#endm // end of macro _sphy_mphy4_tx_leftover_bytes_alignment()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_free_buffer()
//
// Description: free buffer handle by dispatch loop macro dl_buf_free
//
// Outputs:							
// 		none
//
// Inputs:  
//		in_buf_handle:		buffer handle to free 
//
// Constants:
//		none   
//
// Size:
//		4 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro	_sphy_mphy4_tx_free_buffer(in_buf_handle)
.begin
	// free buffer handle  
#ifdef UNIT_TEST
	alu[@buf_freed, @buf_freed, +, 1]
#endif //UNIT_TEST

#ifndef SKIP_FREE_BD
     // use dl_buf_free in dl_buf.uc in dispatch_loop to free buffer handle
	dl_buf_free(in_buf_handle, BUF_FREE_LIST0)
#endif //#ifndef SKIP_FREE_BD
.end	

#endm // end of #macro _sphy_mphy4_tx_free_buffer()

#ifdef COUNTERS
///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_counters_sop_eop()
//
// Description: update the bytes tranmitted by mpkt with SOP bit set in tcw
//
// Outputs:							
//		out_bytecnt			sram xfer register to keep the value to add to 
// 							counters in sram
//
// Inputs:  
// 		in_tcw:				transmit control word which has payload length 
//                          and prepend length
//		in_sram_add_sig:	signal for sram add operation
//
// Constants:
//							None 
//
// Size: 
//		10 instructions.
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_counters_sop_eop(out_bytecnt, in_tcw, \
					in_sram_add_sig)
.begin
.reg port bytes_txed paylo cntr_addr
	alu[port, --, B, *l$index1[GLOBAL_PORT_ID_INDEX]]	; get port
	alu_shf[cntr_addr, --, b, port, <<4]			; get counter addr
	alu[cntr_addr, cntr_addr, +, @cntr_base]		; add base
	alu_shf[bytes_txed, --, b, in_tcw, >>TCWO_PREPEND_LEN_LSB_LOC] ; prepend
	sram[incr, --, cntr_addr, PKT_TRANSMITTED_CNT_IDX]	; increment pkts transmitted
	alu[bytes_txed, bytes_txed, and, 0x1F]			; mask out
	alu_shf[paylo, --, b, in_tcw, >>TCW0_PAYLOAD_LEN_LSB_LOC] ; payload
	alu[bytes_txed, bytes_txed, +, paylo]
	alu[out_bytecnt, --, B, bytes_txed]				; get bytes txed
	sram[add, out_bytecnt, cntr_addr, BYTE_TRANSMITTED_CNT_IDX],\
	     sig_done[in_sram_add_sig]			 	
.end //	port bytes_txed paylo cntr_addr 	 
#endm // end of macro _sphy_mphy4_tx_update_counters_sop_eop()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_counters_sop_only()
//
// Description: update the bytes tranmitted by mpkt with SOP bit set in tcw
//
// Outputs:							
//		out_bytecnt			sram xfer register to keep the value to add to 
// 							counters in sram
//
// Inputs:  
// 		in_tcw:				transmit control word which has payload length 
//                          and prepend length
//		in_sram_add_sig:	signal for sram add operation
//
// Constants:
//							None 
//
// Size: 
//		9 instructions.
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_counters_sop_only(out_bytecnt, in_tcw, \
					in_sram_add_sig)
.begin
.reg port bytes_txed paylo cntr_addr
	alu[port, --, B, *l$index1[GLOBAL_PORT_ID_INDEX]]	; get port
	alu_shf[cntr_addr, --, b, port, <<4]			; get counter addr
	alu[cntr_addr, cntr_addr, +, @cntr_base]		; add base
	alu_shf[bytes_txed, --, b, in_tcw, >>TCWO_PREPEND_LEN_LSB_LOC] ; prepend
	alu[bytes_txed, bytes_txed, and, 0x1F]			; mask out
	alu_shf[paylo, --, b, in_tcw, >>TCW0_PAYLOAD_LEN_LSB_LOC] ; payload
	alu[bytes_txed, bytes_txed, +, paylo]
	alu[out_bytecnt, --, B, bytes_txed]				; get bytes txed
	sram[add, out_bytecnt, cntr_addr, BYTE_TRANSMITTED_CNT_IDX],\
	     sig_done[in_sram_add_sig]			 	
.end //	port bytes_txed paylo cntr_addr 	 
#endm // end of macro _sphy_mphy4_tx_update_counters_sop_only()


///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_counters_ether_critical()
//
// Description: update the bytes tranmitted for Ethernet critical packet
//
// Outputs:							
//		out_bytecnt			sram xfer register to keep the value to add to 
// 							counters in sram
//
// Inputs:  
// 		in_tcw_1:			transmit control word for first mpkt in Ethernet
//							crtical packet 
// 		in_tcw_2:			transmit control word for second mpkt in Ethernet
//							crtical packet 
//                          and prepend length
//		in_sram_add_sig:	signal for sram add operation
//
// Constants:
//							None 
//
// Size: 
//		12 instructions.
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_counters_ether_critical(out_bytecnt, in_tcw_1, \
					in_tcw_2, in_sram_add_sig)
.begin
.reg port bytes_txed paylo cntr_addr
	alu[port, --, B, *l$index1[GLOBAL_PORT_ID_INDEX]]	; get port
	alu_shf[cntr_addr, --, b, port, <<4]			; get counter addr
	alu[cntr_addr, cntr_addr, +, @cntr_base]		; add base
	sram[incr, --, cntr_addr, PKT_TRANSMITTED_CNT_IDX]	; increment pkts transmitted
	alu_shf[bytes_txed, --, b, in_tcw_1, >>TCWO_PREPEND_LEN_LSB_LOC] ; prepend
	alu[bytes_txed, bytes_txed, and, 0x1F]			; mask out
	alu_shf[paylo, --, b, in_tcw_1, >>TCW0_PAYLOAD_LEN_LSB_LOC] ; payload for 
																; mpkt 1
	alu[bytes_txed, bytes_txed, +, paylo]
	alu_shf[paylo, --, b, in_tcw_2, >>TCW0_PAYLOAD_LEN_LSB_LOC] ; payload for 
																; mpkt 2
	alu[bytes_txed, bytes_txed, +, paylo]
	alu[out_bytecnt, --, B, bytes_txed]				; get bytes txed
	sram[add, out_bytecnt, cntr_addr, BYTE_TRANSMITTED_CNT_IDX],\
	     sig_done[in_sram_add_sig]			 	
.end //	port bytes_txed paylo cntr_addr 	 
#endm // end of macro _sphy_mphy4_tx_update_counters_ether_critical()

///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_update_counters_not_sop()
//
// Description: update the bytes tranmitted by mpkt without SOP bit set in tcw
//
// Outputs:							
//		out_bytecnt			sram xfer register to keep the value to add to 
// 							counters in sram
//
// Inputs:  
// 		in_tcw:				transmit control word which has payload length 
//                          and prepend length
//		in_sram_add_sig:	signal for sram add operation
//
// Constants:
//							None 
//
// Size: 
//		15 instructions. (Worst case cycle count)
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_update_counters_not_sop(out_bytecnt, in_tcw, \
					in_sram_add_sig)
.begin
.reg port bytes_txed paylo cntr_addr

	// SKIP bit set in in_tcw?
    br_bset[in_tcw, SKIP_TRANSMIT_FLAG_BIT, bytes_txed_update_done#], defer[3]
		alu[cntr_addr, --, b, @cntr_base]	; dummy for SKIP case
		alu[out_bytecnt, --, b, 0]				; dummy  for SKIP case
		alu[port, --, B, *l$index1[GLOBAL_PORT_ID_INDEX]]	; get port
	alu_shf[cntr_addr, --, b, port, <<4]			; get counter addr
	alu[cntr_addr, cntr_addr, +, @cntr_base]		; add base
 
    br_bclr[in_tcw, TCW0_EOP_BIT_LOC, pkt_txed_update_done#], defer[3]
		alu_shf[bytes_txed, --, b, in_tcw, >>TCWO_PREPEND_LEN_LSB_LOC] ; prepend
		alu[bytes_txed, bytes_txed, and, 0x1F]			; mask out
		alu_shf[paylo, --, b, in_tcw, >>TCW0_PAYLOAD_LEN_LSB_LOC] ; payload
	sram[incr, --, cntr_addr, PKT_TRANSMITTED_CNT_IDX]	; increment pkts transmitted
pkt_txed_update_done#:

	alu[bytes_txed, bytes_txed, +, paylo]
	alu[out_bytecnt, --, B, bytes_txed]				; get bytes txed
bytes_txed_update_done#:
	sram[add, out_bytecnt, cntr_addr, BYTE_TRANSMITTED_CNT_IDX],\
	     sig_done[in_sram_add_sig]			 	
.end //	port bytes_txed paylo cntr_addr 	 
#endm // end of macro _sphy_mphy4_tx_update_counters_not_sop()
	
#endif // COUNTERS

#ifdef ADD_L2_HEADER

/////////////////////////////////////////////////////////////////////////////
// 	_sphy_mphy4_tx_move_sop_paylo_to_tbuf_with_l2_hdr_space() 
//
// Description: move payload of sop or sop_eop mpacket to tbuf with space 
//              in beginning of tbuf for L2 header
//
// Outputs:							
// 		out_indir:			indirect reference used in dram[tbuf_wr...] 
//							instruction
//		out_dramaddr:		dram address of the payload
//
// Inputs:  
//		in_bd:				meta data handle 
//		in_addrtbuf:		the starting address of tbuf for payload 
//		in_indirbase:		base for  indir
//      in_of_set: 			the starting address of the payload to copy
//		in_paylo:			payload length
//		in_offsetmod8:		payload offset
//		in_sig_dram:		signal number for dram[tbuf_wr...] instruction
//
// Constants:
//							none
//
// Size:
//		8 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro 	_sphy_mphy4_tx_move_sop_paylo_to_tbuf_with_l2_hdr_space(out_indir, \
			out_dramaddr, in_bd, in_addrtbuf, in_indirbase, in_of_set, \
			in_paylo, in_offsetmod8, in_sig_dram) 
.begin
.reg tmpx
	alu[tmpx, in_addrtbuf, +, BYTES_RESERVED_FOR_L2_HEADER_IN_TBUF]
	// prepare tbuf address for  dram[tbuf_wr......]
	alu[out_indir, in_indirbase, or, tmpx, <<5]

	// get buffer handle 
	alu_shf[tmpx, --, b, in_bd, <<2]
    // use following dl_buf_get_data_from_meta macro from dispatch_loop.uc to
	// get dram address at the beginning of the buffer from true_dl_buf_hdl
	// now it takes one instruction
    dl_buf_get_data_from_meta[out_dramaddr, tmpx]

	// total we need to fecth (offsetmod8 + paylo) bytes
	// to tbuf
	alu[tmpx, in_offsetmod8, +, in_paylo]

    // since ref count 0 will fectch 8 bytes, and ref count 1 will fetch
	// 16 bytes......, and ref count 15 will fetch 128 bytes, 
	// the right way to do it is followinhg:
	//    a) decrease tm_payload_len by one
	//    b) then right shift the result by 3
	// We will do step a first, then combine the right shift in the shift into
	// the ref count in indir_ref.  In this way, we make sure that we fetch 
	// enough and closet to 8 byte alignment of packet data	
	alu[tmpx, tmpx, -, 0x01]	
	// right shift tm_payload_len to get rid of any bits set in bit 0 to 3 to 
	// get ref_cnt for indir_ref 
    alu_shf[tmpx, --, b, tmpx, >>3]
    // left shift 21 bit to ref_cnt location
    alu[--, out_indir, or, tmpx, <<21] 

    // how many bytes of packet payload are copied to tbuf from dram is 
	// controlled by Ref Cnt field in indir_ref, following max_16 is
	// dummy, offset is added to dramaddr to get the actual address to start 
	// fetch, and the lower 3 bits of the address is ignored by dram channel	 
    dram[tbuf_wr, --, out_dramaddr, in_of_set, max_16], indirect_ref, \
		sig_done[in_sig_dram]
.end // tmpx
#endm // end of macro _sphy_mphy4_tx_move_sop_paylo_to_tbuf_with_l2_hdr_space()

#ifdef ETHERNET_TX
///////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_read_l2_table_entry()  
//
// Description: read the L2 table entry data from sram
//
// Inputs/Outputs:  
//		io_sigmask:			signal mask to be added with sig_sram_read_2
// 
// Inputs:  
//		in_l2_table_base:	L2 table base
//		in_nexthop_id:		nexthop_id
//
// Constants:
//		none   
//
// Size:
//		3 instructions
//
///////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_read_l2_table_entry(io_sigmask, in_l2_table_base, \
											in_nexthop_id)
.begin
.reg l2_entry_addr
	// add sig_sram_read_2 to io_sigmask
	_sphy_mphy4_tx_set_signal(io_sigmask, sig_sram_read_2)	; sig_sram_read_2
	alu_shf[l2_entry_addr, --, B, in_nexthop_id, <<L2_TABLE_ENTRY_SHFT]
	sram[read, $l2_entry_lw0, in_l2_table_base, l2_entry_addr, \
			L2_TABLE_ENTRY_SIZE_IN_LW], sig_done[sig_sram_read_2]
.end // l2_entry_addr

#endm // end of macro _sphy_mphy4_tx_read_l2_table_entry()

#endif // #ifdef ETHERNET_TX
#endif // #ifdef ADD_L2_HEADER


/////////////////////////////////////////////////////////////////////////////
// _sphy_mphy4_tx_debug_incr_counter
//	
//	Description:
//		Increment the specified debug counter by 1.
//
//	Outputs:	
//	
//		
//	Inputs:
//
//
//	CONSTANTS:	
//		None
//	
//	Global variables:
//		The debug counter in an absolute register.
//
/////////////////////////////////////////////////////////////////////////////
#macro _sphy_mphy4_tx_debug_incr_counter(counter)

#ifdef	_DEBUG_COUNTERS_

	alu[counter,counter,+,1]

#endif

#endm

#endif 		// __SPHY_MPHY4_TX_UTIL_UC__

///////////////////////////////////////////////////////////////////////////////

