///////////////////////////////////////////////////////////////////////////////
//                                                                     
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001-2002 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
///////////////////////////////////////////////////////////////////////////////
//
//
//      File Name: packet_rx_common_util.uc
//
//      Purpose: Contains macros used by packet rx that are common to one ME
//               and two ME configurations.
//
//      History:
//
//
//      Date            Comment                         By
//      ---------------------------------------------------------------------
//
//      06/05/2002      Created                         David Meng
//
/////////////////////////////////////////////////////////////////////////////

#ifndef	__PACKET_RX_COMMON_UTIL_UC__
#define	__PACKET_RX_COMMON_UTIL_UC__

#include <dl_system.h>
#include <stdmac.uc>
#include <dispatch_loop.uc>
#include <hardware.h>
#include <packet_rx.h>
#include <packet_rx_context.h>


///////////////////////////////////////////////////////////////////////////////
//
//	OBSOLETE: Use #define_eval a log2(b) instead.
//
// _LN
//
//	 	Description: 
//		
//		Returns Log (input) to base 2. i.e 2 ** result = input. 
//		In other words, given a value (it should be a power of 2) this
//		macro will return the shift amount.
//
//		This is not like normal macros. This does not produce any code
//		and is expected to be used in preprocessing. 
//
//	 	Outputs:
//
//			The result will be available in LN_RESULT (it's a #define)
//
//		Inputs:
//
//			value		: This should be a power of 2.
//
//		Size: 
//
//			0 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro	_LN(value)

.begin

#define_eval			LN_RESULT		0
#define_eval			LN_VALUE	 	value	

	//	Find the bit position where a '1' (and only one '1') is set
	//	in value.

#while (LN_VALUE)

	#define_eval		IS_SET		(LN_VALUE & 0x1)

	#if	(IS_SET == 0)

		#define_eval	LN_RESULT	(LN_RESULT+1)
		#define_eval	LN_VALUE	(LN_VALUE>>1)
	#else

		#define_eval	LN_VALUE	0

	#endif

#endloop

#undef 	IS_SET
#undef	LN_VALUE

	//  At the end, LN_RESULT will contain the result.

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_get_rand_buf_offset
//
//	 	Description: 
//
//		[Dram Bank Scheduling] DDR in IXP2400 has 4 banks. Each bank is 128
//		bytes. For better performance dram access should be spread uniformly
//		across these 4 banks. 
//
//		Buffer offset refers to where data begins in a buffer (i.e headroom).
//	 	If this is a constant, then the IP header will invariably fall in 
//		the same DRAM bank (and for every packet IP hdr is read from DRAM)
//		causing performance degradation.
//
//		To overcome that, each SOP buffer (i.e buffer containing IP hdr)
//		will have a different offset: 128 or 256 or 384 or 512 bytes.
//
//		This macro returns the offset to be used in the next buffer.
//
//	 	Outputs:
//
//			buf_offset		:	buffer offset to use in next buffer
//
//		Inputs:
//
//
//		Size: 
//
//			3 instructions.
//
///////////////////////////////////////////////////////////////////////////////

#macro	_packet_rx_get_rand_buf_offset(buf_offset)
.begin

#ifdef IXP2800
	// For IXP2800 we do not need to round robin amongst buffer offsets
	// to achieve uniform DRAM bank utilization.s
	immed[buf_offset, 384]
#else
	alu[@rand_buf_offset, @rand_buf_offset, AND, @rand_offset_mask]
	alu[@rand_buf_offset, @rand_buf_offset, +, DDR_BANK_SIZE]
	alu[buf_offset, --, B, @rand_buf_offset]
#endif

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_first_cell_count
//
//	 	Description: 
//
//			calculate how many cells are required to transmit a given number
//			of bytes. This macro is used to calculate the cells in the first
//			mpacket (i.e SOP mpacket). For subsequent mpacket use 
//			_incr_cell_count.
//
//	 	Outputs:
//
//			cell_count		:	Number of cells in this byte count.
//								( 0==>1 cell, 1==>2 cells etc)
//
//			remainder		:	byte_count % cell size.
//
//		Inputs:
//
//			byte_count		:	Number of bytes to use in this cell count
//								calculation.
//		Size: 
//
//			7 instructions at a minimum. (max depends on the loop)
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_first_cell_count(cell_count, remainder, byte_count)
.begin

	alu[remainder, --, B, byte_count]			; 
	alu[cell_count, --, B, 0]					; init cell count to 0

	//	While remainder is GT cell size, iterate.

while_remainder_gte_cell_size#:

	alu[--, remainder, -, CELL_SIZE]			; minus cell size
	ble[done#]

	br[while_remainder_gte_cell_size#], defer[2]
		alu[cell_count, cell_count, +, 0x1]		; incr cell count
		alu[remainder, remainder, -, CELL_SIZE]	; new remainder -= cell size

done#:

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_incr_cell_count
//
//	 	Description: 
//
//			calculate how many cells are required to transmit a given number
//			of bytes and increment cell count accordingly. 
//
//	 	Outputs:
//
//			cell_count		:	(old) cell count + cells in byte_count
//								( 0==>1 cell, 1==>2 cells etc)
//
//			remainder		:	(old remainder + byte_count) % cell size.
//
//		Inputs:
//
//			cell_count		:	existing cell count to which to increment.
//			
//			byte_remainder	:	existing byte remainder
//
//			byte_count		:	Number of bytes to use in this cell count
//								calculation.
//		Size: 
//
//			6 instructions at a minimum. (max depends on the loop)
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_incr_cell_count(cell_count, remainder, byte_count)
.begin

	alu[remainder, remainder, +, byte_count]

	//	While remainder is GT cell size, iterate.

while_remainder_gte_cell_size#:

	alu[--, remainder, -, CELL_SIZE]			; minus cell size
	ble[done#]									; is remainder <= cell size

	br[while_remainder_gte_cell_size#], defer[2]
		alu[cell_count, cell_count, +, 0x1]		; incr cell count
		alu[remainder, remainder, -, CELL_SIZE]	; new remainder -= cell size

done#:

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_free_rbuf_element
//
//	 	Description: 
//
//			Free an rbuf element.
//
//	 	Outputs:
//
//		Inputs:
//
//			element_num		: element number to free.
//
//		Size: 
//
//			2 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_free_rbuf_element(element_num)

.begin

	.reg ele_num_16

	//	For msf[fast_wr...] the most significant 16 bits of (op1 + op2) 
	//	should contain the element number and ls16 bits should contains the
	//	RBUF_ELEMENT_DONE CSR address.

	alu[ele_num_16, --, B, element_num, <<16]		; form element number is MSB
	msf[fast_wr, --, ele_num_16, RBUF_ELEMENT_DONE] ; ms16 (op1 + op2) is ele num

.end

#endm


///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_free_all_rbuf_elements
//
//	 	Description: 
//
//			Free all rbuf elements.
//
//	 	Outputs:
//
//		Inputs:
//
//			TOTAL_ELEMENTS		: total number of elements to free.
//
//		Size: 
//
//			??
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_free_all_rbuf_elements(TOTAL_ELEMENTS)

.begin

	.reg element_num, temp_addr

	//	Free rbuf elements one by one. At reset the rbuf element free list
	//	is empty. The software must initialise by placing all elements
	//	in the free list.

	immed[element_num, 0]

	.while (element_num < TOTAL_ELEMENTS)				; 8192 / 128 = 64

		_packet_rx_free_rbuf_element(element_num)					; free rbuf element

		alu[element_num, element_num,  + , 1]			; next element

	.endw

.end

#endm


///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_get_current_me_thread_number
//
//	 	Description: 
//
//			Get the ME and thread number on which this code runs.
//
//	 	Outputs:
//
//			me_num			: ME number. (0-7 for IXP2400, 0-15 for IXP2800)
//
//			thread_num		: Thread number (0-7)
//
//		Inputs:
//
//
//		Size: 
//
//			8 instructions at the max.
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_get_current_me_thread_number(out_me_num, out_thread_num)
	
	//	Read active context status register

	local_csr_rd[active_ctx_sts]
	immed[out_me_num,0] 

	//	Suasalito active_ctx_sts[7:3] are ME number and cluster number, 
	//	and defined as following:
	//	ME number        7  6  5  4  3  This is the latest definition    
	//    0              0  0  0  0  0
	//    1              0  0  0  0  1
	//    2              0  0  0  1  0
	//    3              0  0  0  1  1
	//    4              1  0  0  0  0
	//    5              1  0  0  0  1
	//    6              1  0  0  1  0
	//    7              1  0  0  1  1

	//  [2:0] is thread number	
	
	alu[out_thread_num, 0x07, and, out_me_num]				; [2:0] is thread number
	alu_shf_right(out_me_num, 0x1f, and, out_me_num, 3)		; [7:3] is ME number

#ifdef IXP2400
	
	//	For IXP2400, Cluster bit should be set
	//	if me number greater than 3, reset cluster bit and 
	//	add 4 to the ME number in order to get correct ME number 
	
	br_bclr[out_me_num, 4, first_cluster_processing#] 		; check if cluster bit is set
															; No. i.e ME number > 3
	alu[out_me_num, out_me_num, and, 0x0f]					; Mask OFF cluster bit
	alu[out_me_num, out_me_num, +, 0x4]						; add 4 to ME number

	//	Here you should get ME number between 0 and 7.

first_cluster_processing#:

#endif

//	IXP2800 active_ctx_sts[7:3] are ME number and cluster number, 
//	and defined as following:
//	ME number          7  6  5  4  3  This is the latest definition    
//	    0              0  0  0  0  0
//	    1              0  0  0  0  1
//	    2              0  0  0  1  0
//	    3              0  0  0  1  1
//		4              0  0  1  0  0
//		5              0  0  1  0  1
//		6              0  0  1  1  0
//		7              0  0  1  1  1
//		8              1  0  0  0  0
//		9              1  0  0  0  1
//		10             1  0  0  1  0
//		11             1  0  0  1  1
//		12             1  0  1  0  0
//		13             1  0  1  0  1
//		14             1  0  1  1  0
//		15             1  0  1  1  1

#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_config_msf_rx_ctrl
//
//	 	Description: 
//
//			configure MSF.
//
//	 	Outputs:
//
//		Inputs:
//
//		Size: 
//
//			4 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_config_msf_rx_ctrl() 

.begin

	.reg 	$tmp
	.reg 	msf_rx_addr
	.sig 	sig_msf_wr

	//	[1:0]	Reserved
	//	[3:2]	01 ==> RBUF element size is 128
	//	[8:4]	Reserved
	//	[9]		0  ==> CSIX Free list - Data and control cframes goto 
	//			different Rx_thread_freelist. (Not used here)
	//	[12:10]	Reserved
	//	[13]	0  ==> Tx Cbus width is 4 bit
	//	[15:14]	Reserved
	//	[16]	0  ==> Tx CBus Mode is simplex mode
	//	[17]	RX Mphy related - Not used here. default value 0
	//	[18]	RX Mphy related - Not used here. default value 0
	//	[19]	0  ==> RX Single Phy Mode 
	//	[21:20]	00 ==> RX width 1 x32
	//	[22]	0  ==> RX Mode is UTOPIA/POS mode
	//	[27:23]	Reserved
	//	[31:29]	0xF ==> Receive enable in all (0,1,2,3) channels.
	
	//	The above definitions are for IXP2400. It may differ for IXP2800. [TBD]

	move(msf_rx_addr, MSF_RX_CNTL)
	
	// initialize the RBUF element size to 128

	move($tmp, 0x4)
	msf[write, $tmp, msf_rx_addr, 0, 1], sig_done[sig_msf_wr]
	ctx_arb[sig_msf_wr]


	// now you can enable receive.

#ifdef IXP2800

	move($tmp, 0x40010004)

#else
  #if(RX_PHY_MODE == SPHY_1_32)
	move($tmp, 0xf0000004) 						; SPHY 1X32
  #else
    #if(RX_PHY_MODE == SPHY_4_8)
		move($tmp, 0xf0200004) 					; SPHY 4X8
    #else
	  #if (RX_PHY_MODE == MPHY_4)						
			move($tmp, 0xf0090004)				; MPHY 4	  						
	  #else 
	    #if (RX_PHY_MODE == MPHY_16)	  	
				move($tmp, 0x100E0004)			; MPHY 16
	    #endif
	  #endif
	#endif
  #endif
#endif

	msf[write, $tmp, msf_rx_addr, 0, 1], sig_done[sig_msf_wr]
	ctx_arb[sig_msf_wr]

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_rx_thread_freelist_timeout
//
//	 	Description: 
//
//			Set timeout value for Receive Thread freelist. If there is no 
//			data to receive, then at the end of this timeout, receive logic 
//			will wakeup the thread with a NULL packet.
//
//	 	Outputs:
//
//		Inputs:
//
//			FREELIST_NUM	: Free list number (0-3)			
//
//			TIMEOUT_VAL		: timeout value in clock cycles. (0-0xFFFF)
//
//		Size: 
//
//			4 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_rx_thread_freelist_timeout(FREELIST_NUM, timeout_val) 

.begin

	.reg 	tmp
	.reg 	msf_rx_addr

	//	Set Timeout for Rx Thread free list. Timeout specified in
	//	clock cycles. Timeout value <= 0xffff. FREELIST_NUM <= 3.

	move(msf_rx_addr, RX_THREAD_FREELIST_TO_/**/FREELIST_NUM)
	move(tmp, timeout_val)

	alu[tmp,--, B, tmp, <<16]					; timeout value in ms16 bits 
	msf[fast_wr, --, tmp, msf_rx_addr]			; tmp + msf_rx_addr.

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_rx_up_ctrl
//
//	 	Description: 
//
//			Some more MSF configuration.
//
//	 	Outputs:
//
//		Inputs:
//
//			RX_UP_CTRL_NUM	: should be 0-3.
//
//		Size: 
//
//			2 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_rx_up_ctrl(RX_UP_CTRL_NUM) 

.begin

	.reg	 tmp
	.reg 	msf_rx_addr

	//	[0]		- 1  ==> decode response time = 2 cycles. (XXX Why?)
	//	[1]		- x	 ==> don't care - 0
	//	[3:2]	- 10 ==> single bit odd parity (0x11 ==> Single bit even parity)
	//	[4]		- 1  ==> packet mode
	//	[5]		- 1  ==> POS-PHY Level 3 mode (0 ==> POS 2 Mode)
	//	[31:6]	- x  ==> reserved

	move(msf_rx_addr, RX_UP_CTRL_/**/RX_UP_CTRL_NUM)

#ifdef IXP_SIMULATION

	//	Workbench has some problems with POS2 Mode. 
	//	Until it's fixed we'll use POS 3 Mode.

	alu[tmp,--,B, 0x3d]

#else
	#ifndef POS_PHY_LEVEL_3 
	  #ifndef ODD_PARITY
		alu[tmp, --, B, 0x1d]	; POS Level 2, even parity
	  #else // #ifndef ODD_PARITY
		alu[tmp, --, B, 0x19]	; POS Level 2, odd parity
	  #endif //#ifndef ODD_PARITY
	#else ; POS_PHY_LEVEL_3
	  #ifndef ODD_PARITY
		alu[tmp, --, B, 0x3d]	; POS Level 3, even parity
	  #else ; ODD_PARITY
		alu[tmp, --, B, 0x39]	; POS Level 3, odd parity
	  #endif ; ifndef ODD_PARITY	
	#endif ; POS_PHY_LEVEL_3
#endif

	alu[tmp, --, B, tmp, <<16]					; put value in ms16 bits 
	msf[fast_wr, --, tmp, msf_rx_addr]			; tmp + msf_rx_addr and write the 
												; value in ms16 bits of temp.

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_freelist_reg
//
//	 	Description: 
//
//			Given a ME, Thread, signal and xfer register number, form
//			the value that can be used in putting that thread to the 
//			thread freelist
//
//	 	Outputs:
//
//			freelist_reg	: a GPR where the result is put.
//
//		Inputs:
//
//			sig				: signal number to wakeup this thread.
//
//			me				: me number
//
//			thread			: thread number
//
//			xfer_n			: xfer register number (use &). 
//							  Two transfer registers  xfer and xfer + 1 
//							  are used.
//	
//		Size: 
//
//			4 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_freelist_reg(freelist, sig, me, thread, xfer_n) 

.begin

	//	Form signal number, me number, thread number and xfer register
	//	numbers in Rx Thread Freelist format. 
	
	alu[freelist, --, B, xfer_n]						; [3:0] xfer register
	alu[freelist, freelist, OR, thread, <<RXTF_THREAD]	; [6:4] thread
	alu[freelist, freelist, OR, me, <<RXTF_ME]			; [9:7] me number
	alu[freelist, freelist, OR, sig, <<RXTF_SIGNAL]		; [15:12] signal number

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_signal_next_thread
//
//	 	Description: 
//
//			Signal next thread. It usually needs 3 instruction to do this,
//			of which 2 instructions are used in preparing a register with
//			correct value. Since the signal number is known at compile time
//			we initialise a global register with required value and pass 
//			that register here, there by reducing this macro to just 
//			one instruction. 
//
//			The down side is we end up using one register per thread. 
//			This macro is used twice per iteration, meaning we save 
//			four instructions.
//
//	 	Outputs:
//
//		Inputs:
//
//			sig_reg			: a register containing the next thread signal.
//
//		Size: 
//
//	
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_signal_next_thread(sig_reg) 

	local_csr_wr[SAME_ME_SIGNAL, sig_reg]					; signal next context

#endm



///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_clear_all_context_desc
//
//	 	Description: 
//
//			Clear all context descriptors. (i.e reassembly context that
//			that is maintained in local memory).
//
//	 	Outputs:
//
//		Inputs:
//
//		Size: 
//
//			~196 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_clear_all_context_desc() 
.begin
	.reg count, lm_addr

	//	The Reassembly context data per port is kept in Local memory.
	//	Each context is 64 (CONTEXT_SIZE) bytes.
	//	Clear this area of local memory (starting from CONTEXT_DESC_BASE)
	//	to zeros.

#ifdef	PO2_CTX_SIZE_LW
#warning "PO2_CTX_SIZE_LW is already defined"
#endif

	_LN(CONTEXT_SIZE/4)								; result in LN_RESULT

#define_eval	PO2_CTX_SIZE_LW		LN_RESULT

#undef			LN_RESULT


	move(lm_addr, CONTEXT_DESC_BASE)				; set local memory to 
	local_csr_wr[active_lm_addr_0, lm_addr]			; context base.

	//	calculate the total number of longwords used for
	//	reassembly context.

	move(count, TOTAL_PORTS)						; total ports
	alu_shf[count,--,B, count, <<PO2_CTX_SIZE_LW] 	; total ports * context size

loop#:
	alu[count, count, -, 0x1] 
	bne[loop#], defer[1]
		alu[*l$index0++, --, B, 0x0]

//	cleanup namespace

#undef	PO2_CTX_SIZE_LW

.end
#endm


///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_rsw_get_port_num
//
//	 	Description: 
//
//			Get port number from Receive Status Word. This will indicate
//			from which port this mpacket was received.
//
//	 	Outputs:
//
//			element_num		: element number
//
//		Inputs:
//
//			rsw1			: Receive Status Word 1 (1 of 2)
//
//		Size: 
//
//			1 instruction
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_rsw_get_port_num(port_num, rsw1) 
.begin

#ifdef IXP2800	
	alu[port_num, rsw1, AND, 0xFF] 					; [7:0] is port number
#else
	alu[port_num, rsw1, AND, 0xF] 					; [3:0] is port number
#endif

.end
#endm


///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_rsw_get_element_num
//
//	 	Description: 
//
//			Get rbuf element number from Receive Status Word.
//
//	 	Outputs:
//
//			element_num		: element number
//
//		Inputs:
//
//			rsw1			: Receive Status Word 1 (1 of 2)
//
//		Size: 
//
//			1 instruction
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_rsw_get_element_num(element_num, rsw1) 
.begin

	alu_shf[element_num, 0x7F, AND, rsw1, >>24]		; [30:24] is element number

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_rsw_get_byte_count
//
//	 	Description: 
//
//			Get number of bytes received from Receive Status Word.
//
//	 	Outputs:
//
//			byte_count		: number of bytes received.
//
//		Inputs:
//
//			rsw1			: Receive Status Word 1 (1 of 2)
//
//		Size: 
//
//			1 instruction
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_rsw_get_byte_count(byte_count, rsw1) 
.begin

	alu[byte_count, 0xff, AND, rsw1, >>16]			; [23:16] is byte count

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_get_machine_state
//
//	 	Description: 
//
//			Get the receive machine state. The receive machine has two states.
//			START and PROCessing. A given mpacket can have SOP or not. It
//			can have EOP or not. This gives us a total of 8 states.
//
//	 	Outputs:
//
//			mach_state		: one of the 8 states (0-7)
//
//		Inputs:
//
//			rsw1			: Receive Status Word 1 (1 of 2). This will
//							  SOP, EOP info. 
//
//		Size: 
//
//			1 instruction
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_get_machine_state(mach_state, rsw1) 
.begin

	//	The receive machine state is composed as follows.
	//	[0]	- START or PROC STATE. (This is kept in reassembly context)
	//	[1] - EOP or not, in received mpacket ( 0 ==> !EOP ). From rsw1
	//	[2]	- SOP or not, in received mpacket ( 0 ==> !SOP ). From rsw1

	alu_shf[mach_state, 0x6, AND, rsw1, >>13] 		; [15:14] SOP:EOP, put it in [2:1]
	alu[mach_state, mach_state, OR, lm_state]		; START/PROC from context to [0].

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_state
//
//	 	Description: 
//
//			Set the receive machine state (in reassembly context in local
//			memory). There are two states. START and PROCessing.
//
//	 	Outputs:
//
//		Inputs:
//
//			RX_STATE		: Receive (Machine) State. (START or PROCessing)
//
//		Size: 
//
//			1 instruction
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_state(RX_STATE) 
.begin

	//	Set state for the Receive State Machine
	//	Either START (0) or PROC (1).

	alu[lm_state, --, B, RX_STATE]					; START(0) / PROC(1)

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_move_rbuf_to_pkt_buf
//
//	 	Description: 
//
//			move the data in rbuf to packet buffer in DRAM.
//
//	 	Outputs:
//
//		Inputs:
//
//			dram_addr		: DRAM addr where data is to be moved.
//
//			element_addr	: rbuf element addr. (this element num * rbuf size)
//
//			ref_cnt			: number of bytes to move
//
//			rbuf_base		: The base address where rbuf begins.
//
//			req_sig			: signal use in I/O (dram[rbuf_rd..])
//
//		Size: 
//
//			6 instructions
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_move_rbuf_to_pkt_buf(dram_addr, element_addr, ref_cnt, rbuf_base, req_sig) 

.begin

	.reg 	tmp
	.reg	rbuf_addr

	//	We over ride ref_cnt [25] and rbuf byte address [4] in the
	//	indirect format for rbuf_rd.

	//	compute how many bytes should be moved from rbuf to dram
	//	In Indirect format, a ref_cnt of 0 means 1 Qword, 1 => 2 Qword 
	//	etc. Hence we subtract 1(byte) and then compute the number
	//	of Qwords.

	alu[tmp, ref_cnt, -, 0x1]						; in bytes. 
	alu[tmp, --, B, tmp, >>3]						; convert to Quad Word (8 byte)
	alu[tmp, dram_rbuf_ov, OR, tmp, <<21]			; [24:21] ref count in Quad words
													; OV [25] ref_cnt, [4] rbuf addr 
	//	calculate rbuf byte address where data is 
	//	to be stored

	alu[rbuf_addr, element_addr, +, rbuf_base]		; base + (element * ele_size)
	alu[--, tmp, OR, rbuf_addr, <<5]				; [18:5] rbuf byte address
													; [7:5] are ignored, 8B boundary
	//	move data from rbuf to the dram

	dram[rbuf_rd, --, dram_addr, 0, max_16], indirect_ref, sig_done[req_sig]

.end
#endm

///////////////////////////////////////////////////////////////////////////////
// _packet_rx_incr_counter
//
//	 	Description: 
//
//			Increment the Packet Rx counters.
//
//			This particular macro is used to increment number of packets
//			received, number of packets dropped & number of packets marked as
//			exception.			
//			This macro is called only on EOP cases
//
//		Outputs:
//
//		Inputs:
//
//			port_num		: the port number for which the counter needs to 
//							  be incremented
//	
//			COUNTER			: Valid values for COUNTER are -
//				
//									PACKET_PKTS_RECEIVED	
//									PACKET_PKTS_DROPPED  	
//									PACKET_PKTS_EXCEPTION	
//									PACKET_BYTES_RECEIVED	
//		Size: 
//
//			3 instructions
//
//		XXX - Note: Here we can save couple of instructions if we move the offset
//					calculation outside of this macro. This is because this
//					offset calculation is common to both _packet_rx_incr_counter and
//					_packet_rx_bytes_add_counter macros. REVISIT it later...
/////////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_incr_counter(port_num, COUNTER)
.begin

#ifdef	RFC_PPP_COUNTERS

	.reg	offset

	alu[offset, --, B, port_num, <<4]					; x16; 16 counters per port
	alu[offset, offset, +, COUNTER]						; 

	sram[incr, --, counter_base, offset]				; incr counter for this port

#endif


.end
#endm

///////////////////////////////////////////////////////////////////////////////
// _packet_rx_bytes_add_counter
//
//	 	Description: 
//
//			Add the number of bytes received by Packet Rx.
//
//			This macro is called only on EOP cases
//
//		Outputs:
//
//		Inputs:
//
//			port_num		: the port number for which the counter needs to 
//							  be incremented
//	
//			num_bytes		: number of bytes you want to add to the Byte counter
//		
//			sram_xfer		: sxfer reg used to write to SRAM
//			
//			COUNTER			: Valid value for COUNTER is PACKET_BYTES_RECEIVED	
//
//			sram_sig		: The signal you want to wait on for sram[add,..] 
//							  in the main calling macro.
//		Size: 
//
//			4 instructions
//
//		XXX - Note: Here we can save couple of instructions if we move the offset
//					calculation outside of this macro. This is because this
//					offset calculation is common to both _packet_rx_incr_counter and
//					_packet_rx_bytes_add_counter macros. REVISIT it later...
///////////////////////////////////////////////////////////////////////////////
#macro _packet_rx_bytes_add_counter(port_num, num_bytes, sram_wxfer, COUNTER, sram_sig)
.begin

#ifdef	RFC_PPP_COUNTERS

	.reg	offset

	alu[offset, --, B, port_num, <<4]					; x16; 16 counters per port
	alu[offset, offset, +, counter_base]
	
	alu[sram_wxfer/**/0, --, b, num_bytes]						; 

	sram[add, sram_wxfer/**/0, offset, COUNTER], sig_done[sram_sig]	

#endif

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_drop_current_packet
//
//	 	Description: 
//
//			Drop the so far reassembled packet.
//
//			XXX - This macro should be made capable of receiving lm as 
//			paramters
//
//	 	Outputs:
//
//		Inputs:
//
//			sop_handle		: SOP buffer handle 
//
//			curr_handle		: For partially assembled packets, curr_handle 
//							  will be the last buffer in the buffer chain 
//							  (linked list). For small packets (i.e no buffer
//							  chain) curr_handle = sop_handle.
//
//		Size: 
//
//			11 instructions for large packets
//			~9-11 instructions for small packets.
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_drop_current_pkt(sop_handle, curr_handle)

.begin

	//	There are two conditions to handle
	//	1. Only a single buffer needs to be dropped.
	//	2. A buffer chain needs to be dropped. (large packet)
	//
	//	In case 1, sop_handle = curr_handle and both will have the 
	//	SOP bit set. (This is a little peculiar but it makes life
	//	easier elsewhere)
	//
	//	In case 2, sop_handle != curr_handle and only sop_handle
	//	will have the SOP bit set.

	br_bclr[curr_handle, 30, dop_buffer_chain#], defer[1]	; single ?
		alu[lm_pkt_err, 1, +, lm_pkt_err] 					; update statistics.

	//	yeah, single...enjoy....

	dl_buf_drop(sop_handle)									; drop singe buffer
	br[drop_current_pkt_done#]

dop_buffer_chain#:

	#ifdef POTS
		.reg seq_num

		// store sequence number in variable to be used in dl_drop_buffer_chain()
		alu[seq_num, --, b, @global_seq_num]

		// mask msw to make global_seq_num a 16 bit counter
		alu[@global_seq_num, 1, +16, @global_seq_num]
	#endif

	dl_buf_drop_chain(sop_handle, curr_handle)				; drop a large packet.

drop_current_pkt_done#:

.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_port_header_type
//
//	 	Description: 
//
//			Setport header type in the local memory 
//
//	 	Outputs:
//			The port header type is set up 
//		Inputs:
//			port_num: the port number
//		Size: 
//
//			
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_port_header_type(port_num) 
.begin
	.reg lm_addr

#ifdef	PO2_CTX_SIZE_LW
#warning "PO2_CTX_SIZE_LW is already defined"
#endif

	_LN(CONTEXT_SIZE)								; result in LN_RESULT

#define_eval	PO2_CTX_SIZE_LW		LN_RESULT

#undef			LN_RESULT


	alu[lm_addr, --, B, port_num, <<PO2_CTX_SIZE_LW]; ge offset
	alu[lm_addr, lm_addr, +, CONTEXT_DESC_BASE]		; set local memory to 
	local_csr_wr[active_lm_addr_0, lm_addr]			; context base
	nop
	nop
	nop

#if(RX_PHY_MODE == SPHY_1_32)
		alu[lm_header_type, --, B, PORT_0_TYPE]
#else
#if ( (RX_PHY_MODE == SPHY_4_8) || (RX_PHY_MODE == MPHY_4) )
	.if(port_num == PORT_0_IDX)
		alu[lm_header_type, --, B, PORT_0_TYPE]
	.elif(port_num == PORT_1_IDX)
		alu[lm_header_type, --, B, PORT_1_TYPE]
	.elif(port_num == PORT_2_IDX)
		alu[lm_header_type, --, B, PORT_2_TYPE]
	.elif(port_num == PORT_3_IDX)
		alu[lm_header_type, --, B, PORT_3_TYPE]
	.endif
#else	
#if (RX_PHY_MODE ==	MPHY_16) 
	.if(port_num == PORT_0_IDX)
		alu[lm_header_type, --, B, PORT_0_TYPE]
	.elif(port_num == PORT_1_IDX)
		alu[lm_header_type, --, B, PORT_1_TYPE]
	.elif(port_num == PORT_2_IDX)
		alu[lm_header_type, --, B, PORT_2_TYPE]
	.elif(port_num == PORT_3_IDX)
		alu[lm_header_type, --, B, PORT_3_TYPE]
	.elif(port_num == PORT_4_IDX)
		alu[lm_header_type, --, B, PORT_4_TYPE]
	.elif(port_num == PORT_5_IDX)
		alu[lm_header_type, --, B, PORT_5_TYPE]
	.elif(port_num == PORT_6_IDX)
		alu[lm_header_type, --, B, PORT_6_TYPE]
	.elif(port_num == PORT_7_IDX)
		alu[lm_header_type, --, B, PORT_7_TYPE]
	.elif(port_num == PORT_8_IDX)
		alu[lm_header_type, --, B, PORT_8_TYPE]
	.elif(port_num == PORT_9_IDX)
		alu[lm_header_type, --, B, PORT_9_TYPE]
	.elif(port_num == PORT_10_IDX)
		alu[lm_header_type, --, B, PORT_10_TYPE]
	.elif(port_num == PORT_11_IDX)
		alu[lm_header_type, --, B, PORT_11_TYPE]
	.elif(port_num == PORT_12_IDX)
		alu[lm_header_type, --, B, PORT_12_TYPE]
	.elif(port_num == PORT_13_IDX)
		alu[lm_header_type, --, B, PORT_13_TYPE]
	.elif(port_num == PORT_14_IDX)
		alu[lm_header_type, --, B, PORT_14_TYPE]
	.elif(port_num == PORT_15_IDX)
		alu[lm_header_type, --, B, PORT_15_TYPE]
	.endif
#else
#warning	NO Mode is defined
#endif
#endif
#endif

#undef	PO2_CTX_SIZE_LW
.end
#endm

///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_set_all_port_header_type
//
//	 	Description: 
//
//			Set all port header type in the local memory 
//
//	 	Outputs:
//			All port header type are set up 
//		Inputs:

//		Size: 
//
//			
//
///////////////////////////////////////////////////////////////////////////////

#macro _packet_rx_set_all_port_header_type() 

.begin
	.reg count
	alu[count,--,B,0x0]

#if (RX_PHY_MODE == SPHY_1_32)
	_packet_rx_set_port_header_type(0)
#else
#if ( (RX_PHY_MODE == SPHY_4_8) || (RX_PHY_MODE == MPHY_4) )
	.while(count < 4)
		_packet_rx_set_port_header_type(count)
		alu[count,count,+,0x1]
	.endw
#else
#if(RX_PHY_MODE == MPHY_16)
	.while(count < 16)
		_packet_rx_set_port_header_type(count)
		alu[count,count,+,0x1]
	.endw
#else
#warning NO PHY MODE is defined  
#endif
#endif
#endif
.end
#endm


/////////////////////////////////////////////////////////////////////////////
// _packet_rx_debug_incr_counter
//	
//	Description:
//		Increment the specified debug counter by 1.
//
//	Outputs:	
//	
//		
//	Inputs:
//
//
//	CONSTANTS:	
//		None
//	
//	Global variables:
//		The debug counter in an absolute register.
//
/////////////////////////////////////////////////////////////////////////////
#macro _packet_rx_debug_incr_counter(counter)

#ifdef	_DEBUG_COUNTERS_

	alu[counter,counter,+,1]

#endif

#endm


///////////////////////////////////////////////////////////////////////////////
//
// _packet_rx_buf_alloc
//
//	 	Description: 
//
//			Allocates a buffer either from nn ring or from sram. This macro
//          should be used only when the CSIX TX microblock puts free buffers
//          on this ME's nn ring. Currently supported only when TWO_ME_PACKET_RX
//			switch is used.
//
//	 	Outputs:
//			Global registers $prefetch_buf_handle and
//          prefetch_buf_handle_gpr.
//		Inputs:
//			free_list	:	Specifies the pool from which buffers are to be allocated.
//			req_sig		:	Signal to use in the I/O operation.
//			sig_action	:	What to do with I/O operation - 
//							- wait for signal(s) as specified by sig_action
//							- do not wait for signal, just return (SIG_NONE)
//			sig_mask	:	Signal mask to add 'req_seg' if necessary.
//		Size: 
//
///////////////////////////////////////////////////////////////////////////////
#macro _packet_rx_buf_alloc(free_list, req_sig, sig_action, sig_mask)
.begin

	#if (streq('sig_action', 'SIG_NONE'))

	; First check if a buffer freed by the CSIX TX microblock and placed on
	; the nn ring is available. If available, it saves us a trip to sram.
	br_inp_state[NN_EMPTY, default_buf_alloc#]

	; Buffer is available on nn-ring. Copy it into 'prefetch_buf_handle_gpr'.
	alu[prefetch_buf_handle_gpr, --, b, *n$index++]

	; When we use this buffer, we do not know if it was fetched from nn ring or
	; from sram. This instruction saves us a branch that we would otherwise need.
	; to figure this out. Using this instruction, we insure that the prefetched
	; buffer handle is always in 'prefetch_buf_handle_gpr'.
	; Before using 'prefetch_buf_handle_gpr', we will do 
	; prefetch_buf_handle_gpr = prefetch_buf_handle_gpr xor $prefetch_buf_handle
	; to get the buffer handle in 'prefetch_buf_handle_gpr'.
	alu[prefetch_buf_handle_gpr, prefetch_buf_handle_gpr, xor, $prefetch_buf_handle]
	br[end#]

default_buf_alloc#:

	; Buffer was not available on nn ring. Fetch it from sram.
	dl_buf_alloc($prefetch_buf_handle, free_list, req_sig, sig_action)
	alu[sig_mask, sig_mask, or, 1, <<&req_sig]

	; When we use this buffer, we do not know if it was fetched from nn ring or
	; from sram. This instruction saves us a branch that we would otherwise need.
	; to figure this out. Using this instruction, we insure that the prefetched
	; buffer handle is always in 'prefetch_buf_handle_gpr'.
	; Before using 'prefetch_buf_handle_gpr', we will do 
	; prefetch_buf_handle_gpr = prefetch_buf_handle_gpr xor $prefetch_buf_handle
	; to get the buffer handle in 'prefetch_buf_handle_gpr'.
	immed[prefetch_buf_handle_gpr, 0]

end#:

	#else

	dl_buf_alloc($prefetch_buf_handle, free_list, req_sig, sig_action)
	immed[prefetch_buf_handle_gpr, 0]

	#endif

.end
#endm

#endif	//	__PACKET_RX_COMMON_UTIL_UC__