//------------------------------------------------------------------------------------
//                                                                      
//                   I N T E L   P R O P R I E T A R Y                   
//                                                                       
//      COPYRIGHT (c)  1998-2000 BY  INTEL  CORPORATION.  ALL RIGHTS          
//      RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//      BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//      RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//      LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//      MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//      THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                       
//                         INTEL  CORPORATION                            
//                                                                      
//                      2200 MISSION COLLEGE BLVD                        
//                                                                       
//                SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                       
//------------------------------------------------------------------------------------
// rec_scheduler2_fbust.uc
// receive scheduler 16 100Mbit ports, configurable for test purposes
//
//
// system: SA1200
// subsystem: receive microcode
// usage: example
// author: dfh 3/30/99
// revisions:
//

// Assembly time DEFINE switches
//		BUFFER_COUNT				how many 2KB packet buffers to allocate
//		FETCH9					if 1, set bit 29 on in receive request
//		REAL_RDYBUS_PROG			if defined, call macro to set next 3 CSRs
//		DEF_RDYBUS_TEMPLATE_CTL			32 bit value for RDYBUS_TEMPLATE_CTL
//		DEF_RDYBUS_TEMPLATE_PROG1		32 bit value for RDYBUS_TEMPLATE_PROG1
//		DEF_RDYBUS_SYNCH_COUNT_DEFAULT		value for RDYBUS_SYNCH_COUNT_DEFAULT



// ---------------------------SA1200 microcode--------------------------

// FBOX microcode assignment
//
//    1. the receive threads are threads 0-11
//    2. receive scheduler is thread 16

// general design flow:

// 1. initialize memory freelists
// 2. initialize registers used in the main loop
// 3. major loop:
//
//  a) get thd_done and calculate rec_rdy_true
//          because there is a delay on receiving rec_ready from the fbi,
//          we will delay making a new assignment to a port if it was assigned on previous iteration
//     assume:
//     CSR Rec_rdycnt is pushed to $xfer0
//     CSR Rec_rdybits are pushed to $xfer1
// 
//	For an iteration of the major loop, we expect amortize these 30 cycles in the minor loop,
//	which picks up two thread dones per iteration.           
//
//  b) minor loop: find_next_threads
//     
//      1)  find first bit set in thread_done_copy 21:0 and 23:22
//       
//		2) process two thread dones, alternating between the fast port and any slow port   


// key registers:
//
// thread_done_copy			copy of thd_done_reg0 CSR.
// current_thread			current thread index
// current_port				current port index
// rec_rdy_true				calc from rec_rdy AND !port_mask0 AND !port_mask1
// this_rr					receive request while being assembled

//---------------------------------definitions-------------------------------
#include "mem_map.h"
// standard macros
#include "stdmac.uc"
#include "mem.uc"			// memory allocation macros
#include "refdes_macros.uc"


//----------------------------------startup----------------------------------
.xfer_order $xfer0 $xfer1 $xfer2


	br=ctx[0, rec_scheduler#]		; receive scheduler
	br=ctx[1, tdone_rdybit_reader#]	; context to read thread done and rdy bits
	ctx_arb[kill]					; kill contexts 2-3


rec_scheduler#:

// setup receive ready control to identify which thread fbi will auto_push ready bits to
//
// +--------+----------+-------+----+---------+-----+----------+
// |1thread |push count|enab ap|when| pushcmd | sig |thread id |
// |  14    |  12:10   |   9   |  8 |  7:6    |  5  |   4:0    |
// +--------+----------+-------+----+---------+-----+----------+
// field		value
// 1thread		0	for fast port put the thread of receive request to recevie control
// push count	3	delay timer for both receive and xmit
// enab ap		0	enable autopush
// when			0   after rec_rdycnt has incremented	
// pushcmd		0	push nothing
// sig			0	signal
// thread_id	16	rec_scheduler

setup_ctl#:
	immed[$xfer2, 0xc10]		
	csr [write, $xfer2, rcv_rdy_ctl], ctx_swap 


// program fbi bus mode and rdybus sequencer program for transmit/receive autopush and rdy bit polling
// note: Core can do this by writing to the FBI CSRs
// prerequisite: define constants as described in 
//		Fbus_SetupRdyProg,  refdes_macros.uc
//
#ifdef REAL_RDYBUS_PROG
setup_rdyprg#:
	Fbus_SetupRdyProg
#endif


// call	#macro freelist_create[freelist_id, base_addr, stride, count]
//		initialize freelist 0 for BUFFER_COUNT 2KB_packet_descriptors
//		see also: mem_map.h, stdmac.uc, stdfunc.uc
//
#ifndef BUFFER_COUNT
	#define BUFFER_COUNT FREELIST_BTYPE_SRAM8_SDRAM2K_COUNT
#endif
//
freelist_create[0, SRAM_BUFF_DESCRIPTOR_BASE, 4, BUFFER_COUNT]

freelist_created#:

// signal receive threads to go
fast_wr[0, inter_thd_sig]
fast_wr[4, inter_thd_sig]
fast_wr[8, inter_thd_sig]


	immed[fbi_req_outstanding, 0]								; initialize no fbi receive request in progress

	immed[const_0x101, 0x101]
	immed[port_mask_fairness, 0xffff]

// context 0: job is to select thread/port  and write receive requests
//
rx_sched_first#:
	ctx_arb[voluntary]											; allow other contexts to run
	alu[--, --, B, @thread_done_capture]
	br=0[rx_sched_first#]
	alu[--, --, B, @rec_rdy]
	br=0[rx_sched_first#]

rx_sched_major#:
	ctx_arb[voluntary]
	alu[thread_done_copy, thread_done_copy, OR, @thread_done_capture]
	immed[@thread_done_capture, 0]									; clear captured bits
	alu[thread_done_copy, thread_done_copy, OR, thread_done_skips]	; recover skipped threads due to port not ready
	immed[thread_done_skips, 0]										; clear skip bits

//--------------------------rec scheduler loop -------------------------

rx_find_next_thread#:
	find_bset[thread_done_copy], clr_results					; find first bit set in 15:0
rx_find_next_thread2#:
	find_bset[thread_done_copy, >>16]							; find first bit set in 31:16
	RxSched_UpdatePortMask[port_mask_prev, port_mask_current]

rx_service_thread_a#:
	load_bset_result1[current_threadx2a]
	br=0[rx_service_thread_b_no_a#], defer[1]					; no thread done set in 15:0
	load_bset_result2[current_threadx2b]
	alu_shf[current_threadx2, current_threadx2a, AND~, const_0x101]	; clear the valid bit and low order bit
	RxSched_RequestSlowPort[current_threadx2]					; ~27 insns, schedule receive request for slow thread

rx_service_thread_b#:
	alu[--, --, B, current_threadx2b]	
	br=0[rx_find_next_thread#], defer[1]						; no thread done in 23:16
	alu_shf[current_threadx2, current_threadx2b, AND~, const_0x101]	; clear the valid bit and low order bit
	RxSched_UpdatePortMask[port_mask_prev, port_mask_current]
	RxSched_RequestSlowPort[current_threadx2]					; ~27 insns, schedule receive request for slow thread
	br[rx_find_next_thread2#], defer[1]							; iterate
	find_bset[thread_done_copy], clr_results					; find first bit set in 15:0

rx_service_thread_b_no_a#:
	alu[--, --, B, current_threadx2b]	
	br=0[rx_sched_major#], defer[1]						; no thread done in 23:16
	alu_shf[current_threadx2, current_threadx2b, AND~, const_0x101]	; clear the valid bit and low order bit
	RxSched_RequestSlowPort[current_threadx2]					; ~27 insns, schedule receive request for slow thread
	br[rx_find_next_thread2#], defer[1]							; iterate
	find_bset[thread_done_copy], clr_results					; find first bit set in 15:0

//---------------------end rec scheduler loop --------------------------



//---------------------context 1 --------------------------
// context 1: job is to read thread_done1 register to get receive thread done status
//				and to read ready bits and ready count
//
.operand_synonym $rec_rdy_count $xfer0
.operand_synonym $rec_rdy $xfer1

tdone_rdybit_reader#:
	immed[const0, 0]
tdone_rdybit_reader_loop#:													; 7 instr overhead
	csr[read, $thread_done_reg0, THREAD_DONE_REG0], ctx_swap	; thread done two bits per thread
	alu[$thread_done_reg0, --, B, $thread_done_reg0]			; write back to thread done to clear captured bits
	csr[write, $thread_done_reg0, THREAD_DONE_REG0], ctx_swap, defer[1]
 	alu[@thread_done_capture, @thread_done_capture, OR, $thread_done_reg0]	; take thread done snapshot
	ctx_arb[voluntary]
// must read these two together
	csr[read, $rec_rdy_count, RCV_RDY_CNT]
	csr[read, $rec_rdy, RCV_RDY_LO], ctx_swap
	alu[@rec_rdy_count, const0, +8, $rec_rdy_count]
	br[tdone_rdybit_reader_loop#], defer[1]
	alu[@rec_rdy, const0, +16, $rec_rdy]


