//------------------------------------------------------------------------------------
//                                                                      
//                   I N T E L   P R O P R I E T A R Y                   
//                                                                       
//      COPYRIGHT (c)  1998-1999 BY  INTEL  CORPORATION.  ALL RIGHTS          
//      RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//      BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//      RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//      LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//      MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//      THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                       
//                         INTEL  CORPORATION                            
//                                                                      
//                      2200 MISSION COLLEGE BLVD                        
//                                                                       
//                SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                       
//------------------------------------------------------------------------------------
// rec_enqueue.uc
// receive packet enqueue following bridge or route decision
//
// Version = 1.0.NoBldNum
//
// system: SA1200
// subsystem: receive microcode
// usage: enqueue the packet
// author: dfh 2/20/98
// revisions:
//	dfh		July 28, 1998	FAST PORT ucode supports 1 fast port, 16 100M ports
//	dfh		Aug 5, 1998		implement Non-SOP
//	dfh		Aug 11, 1988	implement enqueue seq check(move from rec_lmatch) and fast_wr[INCR_SOP1]
//  mff     Sept 10, 1999   ifdef A1_CHIP - workaround optimize_mem hw bug
//
// ---------------------------SA1200 microcode--------------------------


//-------------------------------macros--------------------------------------------


// Rx_WriteDescriptor
//	write 2 descriptor words; zero in first word, rec_state and mpacket count in second word
//
//  inputs:
//		buf_handle				relative address of sram buffer and mpacket count
//		rec_state				receive state information
//		descriptor_base			base address for sram buffer descriptors
//	outputs:
//		relative_buf_addr		relative address of new descriptor
//
#macro Rx_WriteDescriptor[relative_buf_addr, buf_handle, rec_state, descriptor_base]
.local _temp holder
	immed[$xfer6, 0]
	alu_shf[relative_buf_addr, --, B, buf_handle, >>16]						; extract relative buffer address

	ld_field_w_clr[holder, 0010, rec_state, <<8]	; move byte cnt, eop, sop from 7:0 to 15:8
	alu[tempa, 1, AND, rec_state, >>29]				; peel off packet_freelist
	alu[holder, holder, or, tempa, <<16]			; insert packet_freelist at position 16
	alu[tempa, 0xf, AND, rec_state, >>20]			; peel off SOP seq #
	alu[holder, holder, or, tempa, <<20]			; insert SOP seq # in at 23:20
	alu[tempa, 1, AND, rec_state, >>30]				; peal off fast_port source
	alu[holder, holder, or, tempa, <<31]			; insert fast_port source at 31
	ld_field[buf_handle, 1110, holder]				; insert rec_state 31:8
	alu_shf[$xfer7, --, B, buf_handle]										; move to transfer reg
	sram [write, $xfer6, descriptor_base, relative_buf_addr, 2], ctx_swap	; write the packet_link
.endlocal									
#endm


// write fast port descriptor
//
#macro Rx_F_WriteDescriptor[index, buf_handle, rec_state, descriptor_base]
.local tempa holder
	//format the data for the transmit side (- transmit should be made compatable with rx format)
	alu_shf[$xfer6, --, B, buf_handle, >>16]		; extract relative buffer address
	ld_field_w_clr[holder, 0010, rec_state, <<8]	; move byte cnt, eop, sop from 7:0 to 15:8
	alu[tempa, 1, AND, rec_state, >>29]				; peel off packet_freelist
	alu[holder, holder, or, tempa, <<16]			; insert packet_freelist at position 16
	alu[tempa, 0xf, AND, rec_state, >>20]			; peel off SOP seq #
	alu[holder, holder, or, tempa, <<20]			; insert SOP seq # in at 23:20
	alu[tempa, 1, AND, rec_state, >>30]				; peal off fast_port source
	alu[holder, holder, or, tempa, <<31]			; insert fast_port source at 31
	ld_field[buf_handle, 1110, holder]				; insert rec_state 31:8
	alu_shf[$xfer7, --, B, buf_handle]				; move to transfer reg
	sram [write, $xfer6, descriptor_base, index, 2], ctx_swap	; write the packet_link - need ctx_swap
.endlocal
#endm



// update number of elements stored for fast output port
// 
#macro FastPort_UpdateCount[gig_count_addr]
	immed[tempb, gig_count_addr]				; elements queued to fast port
	scratch [incr, --, tempb, 0, 1]				; read the current count
#endm



// for testing under ZERO_PROTOCOL
// insert the receive control and status<31:0> to packet address + 4 (bytes 32-39 of packet)
#macro tag_packet
.local tempa tempb

	alu_shf[tempa, 1, +, rfifo_entry, >>2]				; rfifo_entry is ele<<3)	; ele no * 2 + 1
	immed[tempb, 0x80]
	r_fifo_rd[$xfer0, tempa, tempb, 1], ctx_swap

	immed32[tempa, SDRAM_PKT_BUFFER_BASE]
	alu_shf[tempa, tempa, +, buf_handle, >>8]			; 2k byte chunks would be <<11, *8 makes it <<8
	alu[$$xfer1, --, B, $xfer7]							; receive control
#if (ZERO_PROTOCOL == 1)
	immed32[tempb, 0x32333435]							; known key
	alu[$$xfer0, --, B, tempb]
#elif (ZERO_PROTOCOL == 2)
	alu[$$xfer0, --, B, $xfer0]							; status <31:0>
#endif
	sdram[write, $$xfer0, tempa, 4, 1], ctx_swap
.endlocal
#endm

//----------------------------end macros-------------------------------------------




.local queue_descriptor_addr descriptor_base queue_packet_count next_ptr head_ptr tail_ptr



// ---------------------------------------------------------------------
//	register usage at this point
//
//	buf_handle				address offset of the SOP sram descriptor, mpacket count
//	rec_state				contains byte count, eop, sop, fast port info, dest port
//	packet_buf_addr			address of buffer in sdram





// we will get to done# in about 175 cycles

 
// now update the packet queue / packet link descriptors
//
// format the new packet link descriptor
//
// packet_link format:
//										$packet_link0
//   		 +---------------------------------------------------------------------------+
//			 |						  NPL (next packet link)						     |
//			 |							     31:0									     |
//			 +---------------------------------------------------------------------------+
//
//										$packet_link1
// from		 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+
// rec_state |FPORT|qselect|inport|F seq |freelist|qw/byte count|eop|qtype|discard|outport|
//			 |     |       |      |      |        |             |   |     |       |  ele# |
//			 | 31  | 30:28 |27:24 |23:20 |  19:16 |     15:10   | 9 |  8  |   7   |  6:0  |
//			 +-----+-------+------+------+--------+-------------+---+-----+-------+-------+

// 
//	FPORT			1 if source port is fast port
//	qselect			selected q
//	inport			slow input port number
//	F seq		    fast port packet sequence number if SOP, mpacket sequence number if not SOP		
//	freelist		buffer freelist
//	qw byte count	3 bits quadword count 3 bits byte count of last element
//	qtype			destination or core queue (note: up until qselect, sop is held here)
//  discard			if =1, packet is not to be enqueued
//	outport/ele#	up until enqueue, output port.	number of elements in packet

// There is a choice of queues the packet can be placed on:
//
//		qtype						count/description			handled by	
//		1=destination queues		8 per port x 32 ports		transmit microcode
//									qselect per port, 
//										0 = hi priority
//										7 = lo priority
//								
//		0=slow path queue			1							core receive application
//		others unused				filter, trap, etc
//
									
// There are 8 free lists for descriptors. The dtype field picks which one.
//	dtype
//	0 = normal packet data:		sram and sdram allocated, 8 bytes sram, 2k byte sdram chunks
//	1-7	unused

// this is fast path
// dtype		= 0 normal packet
// qtype		= 1 destination queue
// qselect	= 0 hi priority

header_stored#:

	.if (bit(rec_state, 1) == 0)							; if not EOP don't enqueue yet
#ifdef FAST_PORT_ENABLED
sop_save#:
		.if (bit(rec_state, 30) ==1)						; if fast port
			alu[tempb, 0xf, AND, rec_state, >>20]			; get SOP seq number
			state_save[$xfer5, output_port, RX_FPORT1_OUTPORT, tempb]	; save output_port for EOP thread to pick up
			br[next_packet#]
		.endif
#endif

#ifdef REC_STATE_SAVE
slow_save1#:
		Rx_SaveSlowState[buf_handle, rec_state, output_port]
		br[next_packet#]
#endif
		br[next_mpacket#]
	.endif



rec_enqueue#:


// queue_descriptor array has space for up to 8 queues per port
// 2 words per queue descriptor = 16 words per port
// FAST PORT ref design has 17 ports
//
rec_queue_addr#: 
	immed[queue_descriptor_addr, SRAM_QUEUE_DESCRIPTOR_BASE]

// test for multicast - multicast not supported
//	alu_shf[tempb, 0x7, AND, rec_state, >>16]				; get freelist choice
//	alu[--, 0x3, -, tempb]
//	br=0[rec_enqueue_multicast#]						; dtype == multicast


rec_enqueue_unicast#:

.operand_synonym $new_tail $xfer4
.operand_synonym $queue_descriptor0	$xfer2
.operand_synonym $queue_descriptor1	$xfer3



#ifdef FAST_PORT_ENABLED
.operand_synonym $gig_count $xfer3

// if fast input port, enqueue in order
//
	.if (bit(rec_state, 30) == 1)
fast_seq_check#:												; FAST input PORT
		alu_shf[tempa, 0xf, AND, rec_state, >>20]				; extract seq number
		#ifdef FAST_PORT1
			FastPort_SeqCheck[ENQUEUE_SEQ1, tempa]				; wait for my seq no. to come up
			fast_wr[3, INCR_ENQ_NUM1]							; increment fast port 1 seq number
		#else
		#ifdef FAST_PORT2
			FastPort_SeqCheck[ENQUEUE_SEQ2, tempa]				; wait for my seq no. to come up
			fast_wr[3, INCR_ENQ_NUM2]							; increment fast port 1 seq number
		#endif ; FAST_PORT2
		#endif ; FAST_PORT1
	.endif

// test fast or slow output port
	alu[--, output_port, -, FAST_PORT1]
	br!=0[not_fast_output_port#], defer[2]
//	alu[tempb, 0x7, AND, rec_state, >>28]										; get q select
	alu[tempb, --, b, 0]														; hard code qselect to 0
	alu[queue_descriptor_addr, queue_descriptor_addr, +4, tempb]				; add qselect
	alu[queue_descriptor_addr, queue_descriptor_addr, +, 1, <<8]		; @single fast output port

fast_enqueue#:
	immed[descriptor_base, XMIT_FPORT_DESCRIPTOR_BASE]

// for fast port, $queue_descriptor0 will contain index to packet link array
//	the head is stored in transmit fill internal registers

// note: fast port queue descriptors are initialized to point to initial tails XMIT_FPORT_TAILS
// transmit fill will read these to get started

		sram [read_lock, $queue_descriptor0, queue_descriptor_addr, 0, 1],	
					ctx_swap								; read current packet link index from queue

		alu[tempa, 2, +, $queue_descriptor0]				; increment to next packet link index
		alu[$new_tail, 0, +8, tempa]						; truncate so wrap is = transmit wrap
		sram[write_unlock, $new_tail, queue_descriptor_addr, 0, 1], priority		; increment index

		Rx_F_WriteDescriptor[$queue_descriptor0, buf_handle, rec_state, descriptor_base] ; write packet link

fport_update_count#:
#ifdef FAST_PORT1
		FastPort_UpdateCount[XMIT_FPORT1_ELE_COUNT]						; update queued element count
#else
#ifdef FAST_PORT2
		FastPort_UpdateCount[XMIT_FPORT2_ELE_COUNT]						; update queued element count
#endif ;FAST_PORT2
#endif ;FAST_PORT1

		br[enqueue_done#]
not_fast_output_port#:
#else

	alu[tempb, --, b, 0]														; hard code q select
	alu[queue_descriptor_addr, queue_descriptor_addr, +4, tempb]				; add qselect
#endif	// FAST_PORT_ENABLED
	alu[queue_descriptor_addr, queue_descriptor_addr, +, output_port, <<4]		; add output port

slow_enqueue#:
// order of sram accesses for queue update for slow output port
//	1. read the queue_descriptor and lock the linked list(old head/tail pointer = queue descriptor word 0)
//	2. get the current tail
//	3. write new queue tail
//	4. write the new queue_descriptor and unlock the linked list


write_buf_descriptor#:
	immed[descriptor_base, SRAM_BUFF_DESCRIPTOR_BASE]
	Rx_WriteDescriptor[relative_buf_addr, buf_handle, rec_state, descriptor_base]


// the queue descriptor format for slow ports is
//		word 0		<31:16>		head packet link pointer
//		word 0		<15:0>		tail packet link pointer
//		word 1					packet count
//
	sram [read_lock, $queue_descriptor0, queue_descriptor_addr, 0, 2], 
				optimize_mem, ctx_swap								; read the queue descriptor 2 words
	alu_shf[queue_packet_count, 1, +16, $queue_descriptor1]			; update queue element count
	.if (queue_packet_count == 1)									; if queue was empty before
		scratch_bset_ind[$xfer5, output_port, XMIT_PWP_VECTOR, NOSYNC]	; set packet queued bit for output port
		alu_shf [next_ptr, 0, b, relative_buf_addr, <<16]	
		alu [$queue_descriptor0, next_ptr, or, relative_buf_addr]	; set head pointer and tail pointer to new buffer descr
	.else															; if queue had one or more packet
		ld_field_w_clr[tail_ptr, 0011, $queue_descriptor0]
		ld_field_w_clr[head_ptr, 1100, $queue_descriptor0]
		alu [$queue_descriptor0, head_ptr, or, relative_buf_addr]	; OR in the new tail pointer for queue descr
		alu [$new_tail, --, B, relative_buf_addr]					; relative_buf_addr is the new tail
		sram [write, $new_tail, descriptor_base, tail_ptr, 1], ctx_swap	; rewrite the current (now old) tail
	.endif
	ld_field_w_clr[$queue_descriptor1, 0111, queue_packet_count]
	sram [write_unlock, $queue_descriptor0, queue_descriptor_addr, 0, 2], priority, ctx_swap ; write the updated queue descriptor and release the lock - ctx_swap required

#ifdef REC_STATE_SAVE
	Rx_EOPUnBlockSlowPort[rec_state]					; unblock the port enabling next SOP for this port
#endif

enqueue_done#:

#ifdef PROFILE
	immed[tempa, TOTAL_RECEIVES]							; total enqueues
	scratch[incr, --, tempa, 0, 1]
#endif
	br [next_packet#]


#ifdef FAST_PORT_ENABLED
// not sop case from rec_nextpac, fast port only
//
fast_not_sop#:
	alu[--, $xfer7, AND, 1, <<1]										; test EOP
	br=0[fast_wait_for_pop#], defer[1]									; if not EOP, state will be saved later
	alu[tempb, 0x1e, AND, $xfer7, >>13]									; get mseq num
	Rx_ValidateNextFastState[tempb]										; validate the next slot

fast_wait_for_pop#:
		ctx_arb[sram]														; wait for pop to complete

fast_rest#:
	Rx_RestoreFastState[buf_handle, rec_state]							; get state from previous mpacket

not_using_buffer#:
	alu[--, --, b, $new_descriptor]
	br=0[skip_push_no_buf#]
		sram[push, --, $new_descriptor, 0, PACKET_FREELIST]					; push buffer back to freelist	
skip_push_no_buf#:
	ld_field[rec_state, 0001, $xfer7]									; merge byte count, eop, sop from rcv_cntl 7:0 to rec_state 7:0

// calculate packet buffer address, update mpacket count in buf_handle
//
	Rx_SetPktBufAddr[packet_buf_addr, buf_handle, rec_state]						; defined in rec_nextpac
	br[store_data#]
#endif	// FAST_PORT_ENABLED


//  if thread stays with the slow port it comes here after forwarding decision or store data
//
next_mpacket#:
#ifdef RECEIVE16
check_port#:
	csr[read, $xfer0, RCV_RDY_LO], defer[1], ctx_swap				; get port ready
	alu[tempa, --, B, rfifo_entry, >>3]
	alu[--, tempa, B, 0]
	alu[--, 1, AND, $xfer0, >>indirect]
	br=0[check_port#]

port_rdy1#:
send_req1#:
	sem_wait[@req_inflight]
	sem_flip[@req_inflight]								; set to block other contexts from sending rec_req

got_rr_avail#:
	move[$xfer0, receive_req]
	csr[write, $xfer0, rcv_req],ctx_swap				; send req to FBI
next_packet_data#:


#endif
	Rx_Receive[$xfer7, mpacket_cancel#, mpacket_rxfail#]				; get receive control for next mpacket
#ifdef RECEIVE16
#endif
	br_bset [$xfer7, 0, wrong_sop#], defer[2]							; check for continuation packet
	immed[exception, MAC_EXP_NOT_SOP]									; expected non-SOP from the mac device
	ld_field[rec_state, 0001, $xfer7]									; merge byte count, eop, sop from rcv_cntl 7:0 to rec_state 7:0


// from rec_nextpac or not EOP just finished lookup
//
slow_not_sop#:


#ifdef REC_STATE_SAVE
		ctx_arb[sram]													; wait for pop to complete
slow_restore#:
		Rx_RestoreSlowState[output_port, buf_handle, rec_state, $xfer7]
		alu[--, --, b, $new_descriptor]									; if no buf, then don't push
		br=0[skip_not_sop_push#]
		sram[push, --, $new_descriptor, 0, PACKET_FREELIST]				; push buffer back to freelist	
skip_not_sop_push#:
		ld_field[rec_state, 0001, $xfer7]								; merge byte count, eop, sop from rcv_cntl 7:0 to rec_state 7:0
#endif


// calculate packet buffer address, update mpacket count in buf_handle
//
	Rx_SetPktBufAddr[packet_buf_addr, buf_handle, rec_state]						; defined in rec_nextpac


store_data#:


// move the packet data from receive fifo element to sdram (macro defined in rec_nextpac.uc)
//
	.if (bit(rec_state, 1) == 1)										; if EOP
		.if (bit(rec_state, 19) == 1)									; check for non eop discard
			// mike - push buffer being used back onto freelist
			fast_wr[3, THREAD_DONE]										; Beta4 Fix
			br[next_packet#]											; if discard, don't enqueue
		.endif

	    sdram_r_fifo_rd[packet_buf_addr, rfifo_entry, 0, 8, SYNC]		; move mpacket to sdram, ctx_swap


#ifdef ZERO_PROTOCOL
#if (ZERO_PROTOCOL > 0)
tag_the_packet#:
		tag_packet							; insert receive control and status in bytes 32-39
#endif
#endif

last_mpacket_stored#:
#ifdef FAST_PORT_ENABLED
		alu[--, FAST_PORT1, -, $xfer7, >>24]
		br!=0[slow_last_mpacket_stored#]
			fast_wr[2, THREAD_DONE]										; write fast port done
			alu[tempb, 0xf, AND, rec_state, >>20]						; get SOP seq number
			state_restore[$xfer5, output_port, RX_FPORT1_OUTPORT, tempb]
			br[rec_enqueue#]											; write descriptor and enqueue 
#endif
// else slow port EOP
slow_last_mpacket_stored#:
#ifndef RECEIVE16
		fast_wr[3, THREAD_DONE]											; else write slow port EOP done
#endif
		br[rec_enqueue#]									; write descriptor and enqueue 
	.else

// not EOP
		.if (bit(rec_state, 19) == 0)									; if no discard
			sdram_r_fifo_rd[packet_buf_addr, rfifo_entry, 0, 8, ASYNC]	; move mpacket to sdram, sig_done
		.endif
mid_mpacket_stored#:													; not SOP not EOP
#ifdef FAST_PORT_ENABLED
		alu[--, FAST_PORT1, -, $xfer7, >>24]
		br!=0[slow_mid_mpacket_stored#]
			alu[tempb, 0x1e, AND, $xfer7, >>13]							; get mseq num
fast_save2#:
#ifdef ZERO_PROTOCOL
			.if (bit(rec_state, 0) == 0)								; if not SOP and ZERO_PROTOCOL
#endif
				Rx_SaveFastState[buf_handle, rec_state, tempb]			; save state to next mpacket
#ifdef ZERO_PROTOCOL
			.endif
#endif
			alu_shf[--, rec_state, AND, 1, <<19]
			br>0[no_sdram_pending1#]									; if discard there will be no sdram sig
			ctx_arb[sdram]
no_sdram_pending1#:

			#ifndef RECEIVE16
				fast_wr[2, THREAD_DONE]									; notify receive scheduler fast port done
			#endif
			br[next_packet#]											; get next slow packet or fast mpacket
#endif

// else slow port not EOP
slow_mid_mpacket_stored#:
		alu_shf[--, rec_state, AND, 1, <<19]
		br>0[no_sdram_pending2#]										; if discard there will be no sdram sig
		ctx_arb[sdram]
no_sdram_pending2#:

		fast_wr[1, THREAD_DONE]											; notify receive scheduler non-EOP done
#ifdef REC_STATE_SAVE
slow_save3#:
		Rx_SaveSlowState[buf_handle, rec_state, output_port]
		br[next_packet#]												; get next mpacket
#endif
		br[next_mpacket#]												; get next mpacket

	.endif



mpacket_cancel#:
// packet is dropped due to cancel. just incase this happened on slow port. skip and proceed to next mpacket
//
	immed[tempa, EXCEPTION_COUNTERS]					; 11 contexts, 16 locations each
	alu[tempa, tempa, +, rfifo_entry, <<1]				; add context id * 16
	scratch[incr, --, tempa, exception, 1]

	immed[tempa, TOTAL_DISCARDS]
	scratch[incr, --, tempa, 0, 1]

	fast_wr[2, THREAD_DONE]
	br[next_mpacket#]

mpacket_rxfail#:
// packet is dropped due to rxfail. bump a counter and treat it like EOP
//
	immed[tempa, EXCEPTION_COUNTERS]					; 11 contexts, 16 locations each
	alu[tempa, tempa, +, rfifo_entry, <<1]				; add context id * 16
	scratch[incr, --, tempa, exception, 1]

	immed[tempa, TOTAL_DISCARDS]
	scratch[incr, --, tempa, 0, 1]

#ifndef RECEIVE16
		fast_wr[3, THREAD_DONE]											; else write slow port EOP done
#endif

	br[next_packet#]


wrong_sop#:
	br[wrong_sop#]

rec_enqueue_multicast#:
; *tbd*
	br[rec_enqueue_multicast#]

.endlocal

