///////////////////////////////////////////////////////////////////////////////
//
//                  I N T E L   P R O P R I E T A R Y
//
//     COPYRIGHT [c]  2002 BY  INTEL  CORPORATION.  ALL RIGHTS
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT
//     THE PRIOR WRITTEN PERMISSION OF :
//
//                        INTEL  CORPORATION
//
//                     2200 MISSION COLLEGE BLVD
//
//               SANTA  CLARA,  CALIFORNIA  95052-8119
//
///////////////////////////////////////////////////////////////////////////////
//
//
//      File Name: tx_helper.uc
//
//      Purpose: Tx helper for Egress IXP2800
//
///////////////////////////////////////////////////////////////////////////////

#ifndef __TX_HELPER_UC__
#define __TX_HELPER_UC__

///////////////////////////////////////////////////////////////////////////////
//
// This a context pipe-stage microblock which receives tansmit request from 
// Queue Manager and pass the tx request to Packet TX microblock through NN 
// ring  or scratch rings (MPHY16 mode).
//
// This microblock runs in two phases.
//
// Worst case instuction cycle: 
//
/////////////////////////////////////////////////////////////////////////////////////

// following definition selects sram transfer registers to be used with 
// dl_meta.uc dispatch loop

#define DL_META_DATA_IN_SXFER
	

// include stdmac.uc in IXPblocks Portable library

#include <stdmac.uc>

// include local memory macros in IXPblocks 

#include <localmem.uc>

// definition of hardware register address 

#include <hardware.h>

// global definition of common constants which are shared by all microblocks

#include "dl_system.h"

// definition of constants used

#include "tx_helper.h"

// definition of utilization macros used

#include "tx_helper_util.uc"

// initialization code for Tx helper

#include "tx_helper_init.uc"

// dispatch loop macros

#include "dispatch_loop.uc"


#ifdef _DEBUG_COUNTERS_
	.reg @tx_helper_requests_in
	.reg @tx_helper_requests_out
#endif													
///////////////////////////////////////////////////////////////////////////////
//
// tx_helper()
//
// Tx helper
//
// Size:	instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro tx_helper()

.begin

	.reg sig_mask_1
	.reg sig_mask_2
	.reg sig_mask_3
	.reg default_sigmask_1 		; default value for signal mask for phase 1
	.reg default_sigmask_2 		; default value for signal mask for phase 2
	.reg default_sigmask_3 		; default value for signal mask for phase 3
	.reg exe_stat_flag	
	.reg volatile tx_request

	.reg $tx_req

	;registers to store packet meta data from SRAM
	.reg $dl_meta0 $dl_meta1 $dl_meta2 $dl_meta3 $dl_meta4 $dl_meta5 $dl_meta6
	.xfer_order $dl_meta0 $dl_meta1 $dl_meta2 $dl_meta3 $dl_meta4 $dl_meta5 \
				$dl_meta6
	
	.reg output_port			; output port from packet meta data
	.reg class_id				; class id from meta data
	.reg queue_struct_addr			; queue structure SRAM address for this class

	.reg read $rd_deq_cntr 		; dequeue counter value read from SRAM
	.reg write $wr_deq_cntr 	; dequeue counter value to write back to SRAM

	// For the initial run let the assembler think the meta data is loaded 

	_tx_helper_init_sigmasks(default_sigmask_1, default_sigmask_2, \
						default_sigmask_3)
						
	#if( (TX_PHY_MODE == SPI_4_16PORTS) || (TX_PHY_MODE == SPI_4_10PORTS) )

	// initialize ring addresses in gpr's
	
	alu[packet_tx_ring_0, --, B,  QM_TO_PACKET_TX_SCR_RING_0, <<2]
	alu[packet_tx_ring_1, --, B,  QM_TO_PACKET_TX_SCR_RING_1, <<2]	 
	#endif	// #if(TX_PHY_MODE == SPI_4_16PORTS or TX_PHY_MODE == SPI_4_10PORTS)

	// The first time through the loop, wait for next thread signal only 
	ctx_arb[sig_next_context]

phase_1#:

	// At this point the meta data i/o is finished and the signals for dram and
	// sram have been received. let the assembler know

	// wake up next thread in the very beginning of phase 2 to reduce wakeup 
	// latency (1)
	_tx_helper_signal_next[sig1_next_context_gpr]

	// set sig_mask_1 to default_sigmask_1 for phase 1 
	alu[sig_mask_1, --, B, default_sigmask_1]

	alu[exe_stat_flag, --, B, 0]		; reset exe_stat_flag

	.set $rd_deq_cntr queue_struct_addr output_port

	// check and get tx request from NN ring (2)

	_tx_helper_read_tx_request(output_port, tx_request, no_tx_request_in_phase_1#)

	// get meta data by dl_meta_load_cache (3)

	dl_meta_load_cache(tx_request, $dl_meta, sig_sram_read, 6, 1)

end_of_phase_1#:
	
	ctx_arb[--], defer[1]
	local_csr_wr[active_ctx_wakeup_events, sig_mask_1]		
	
	.io_completed sig_next_context sig_sram_read 

phase_2#:

	.set $dl_meta0 $dl_meta1 $dl_meta2 $dl_meta3 $dl_meta4 $dl_meta5 $dl_meta6

	// wake up next thread in the very beginning of phase 2 to reduce wakeup 
	// latency (1)
	_tx_helper_signal_next[sig2_next_context_gpr]

	// set sig_mask_2 to default_sigmask_2 for phase 2 
	alu[sig_mask_2, --, B, default_sigmask_2]

	br_bset[exe_stat_flag, _NO_TX_REQUEST_BIT, \
					no_tx_request_in_phase_2#]


	// pass tx_request to downstream microblock (2 for SPHY_1_32, 
	// 7 for SPI_4_16PORTS or SPI_4_10PORTS mode)

	_tx_helper_passdown_tx_request(tx_request, output_port)

	;extract class_id	(1)
	dl_meta_get_class_id(class_id)

	;calculate address of data structure for this queue (2)
	_tx_helper_calc_queue_struct_addr(queue_struct_addr, output_port, \
									class_id, all_queue_structs_base)

	;read the dequeue counter in SRAM (1)
	_tx_helper_read_deq_cntr(queue_struct_addr, $rd_deq_cntr, \
					sig_rd_deq_cntr_done, phase_3#)

end_of_phase_2#:

	ctx_arb[--], defer[1]
	local_csr_wr[active_ctx_wakeup_events, sig_mask_2]

	.io_completed sig_next_context sig_rd_deq_cntr_done


phase_3#:

	// wake up next thread in the very beginning of phase 3 to reduce wakeup 
	// latency (1)
	_tx_helper_signal_next[sig3_next_context_gpr]

	// set sig_mask_3 to default_sigmask_3 for phase 3 
	alu[sig_mask_3, --, B, default_sigmask_3]
		
	br_bset[exe_stat_flag, _NO_TX_REQUEST_BIT, \
					no_tx_request_in_phase_3#]


	;increment and write back class dequeue counter	(2)
	_tx_helper_incr_and_write_deq_cntr(queue_struct_addr, $rd_deq_cntr, \
								$wr_deq_cntr, sig_wr_deq_cntr_done)
	
end_of_phase_3#:

	.io_completed sig_next_context sig_wr_deq_cntr_done sig_scratch_write

	ctx_arb[--] , defer[1], br[phase_1#]
	local_csr_wr[active_ctx_wakeup_events, sig_mask_3]


no_tx_request_in_phase_1#:

	_tx_helper_clear_signal[sig_mask_1, sig_sram_read]		

	alu_shf[exe_stat_flag, exe_stat_flag, OR, 1, <<_NO_TX_REQUEST_BIT]

	br[end_of_phase_1#]

no_tx_request_in_phase_2#:
	; clear sig_rd_deq_cntr_done
	_tx_helper_clear_signal[sig_mask_2, sig_rd_deq_cntr_done]

	#if((TX_PHY_MODE == SPI_4_16PORTS) || (TX_PHY_MODE == SPI_4_10PORTS))
		_tx_helper_clear_signal[sig_mask_2, sig_scratch_write]	
	#endif	// #if(TX_PHY_MODE == SPI_4_16PORTS or SPI_4_10PORTS)

	br[end_of_phase_2#]

no_tx_request_in_phase_3#:
	; clear sig_wr_deq_cntr_done
	_tx_helper_clear_signal[sig_mask_3, sig_wr_deq_cntr_done]

	br[end_of_phase_3#]


.end

#endm	 // end of #macro tx_helper()


///////////////////////////////////////////////////////////////////////////////
// The main code starts here

main#:

// following context relative GPR variables are preassigned in initialization 
// to save instrction cycles in POS TX process
	.reg sig1_next_context_gpr	; value to write to SAME_ME_SIGNAL csr to wake up
								; next thread from phase 1

	.reg sig2_next_context_gpr	; value to write to SAME_ME_SIGNAL csr to wake up
								; next thread from phase 2

	.reg sig3_next_context_gpr	; value to write to SAME_ME_SIGNAL csr to wake up
								; next thread from phase 3

	.reg all_queue_structs_base	; base of all class data structures in SRAM.
								; The dequeue counter of each class is the last 
								; longword in each class data structure

#if( (TX_PHY_MODE == SPI_4_16PORTS) || (TX_PHY_MODE == SPI_4_10PORTS) )
	.reg	packet_tx_ring_0	; input scratch ring for the first Packet TX
	.reg	packet_tx_ring_1	; input scratch ring for the second Packet TX

#ifdef MULTIPORT_TEST
.reg	@mport
#endif	// #ifdef MULTIPORT_TEST
#endif	// #if((TX_PHY_MODE == SPI_4_16PORTS) || (TX_PHY_MODE == SPI_4_10PORTS))


	// initialize the microblock

	tx_helper_init()

	// call the packet processing block

	tx_helper()

///////////////////////////////////////////////////////////////////////////////

#endif 		// __tx_helper_UC__

///////////////////////////////////////////////////////////////////////////////


      

