//------------------------------------------------------------------------------------
//                                                                      
//                   I N T E L   P R O P R I E T A R Y                   
//                                                                       
//      COPYRIGHT (c)  1998-2000 BY  INTEL  CORPORATION.  ALL RIGHTS          
//      RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//      BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//      RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//      LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//      MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//      THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                       
//                         INTEL  CORPORATION                            
//                                                                      
//                      2200 MISSION COLLEGE BLVD                        
//                                                                       
//                SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                       
//------------------------------------------------------------------------------------
// rec_scheduler_f.uc
// receive scheduler 8 100M (or up to 16 slower ports), 1 Gig port
//
//
// system: SA1200
// subsystem: receive microcode
// usage: example
// author: dfh 4/27/98
// revisions:
//		dfh		7/4/8		FAST PORT version
//		dfh		7/23/8		support all 16 slow ports, bookkeep, fast port mpacket sequencing
//		dfh		12/16/98	BL4 remove include config.h
//		dfh		1/20/98		slow ports threads now save and restore, remove bindings, no bookkeeping
//      mff     10/6/99     Bug fix.  Remove defer(1) from csr instruction  "csr[read, $rec_rdy, RCV_RDY_LO], ctx_swap, defer(1)"  
//
// ---------------------------SA1200 microcode--------------------------

// FBOX microcode assignment
//
//    1. the receive threads are threads 0-11
//    2. receive scheduler is thread 16


// general design flow:

// context 0 (thread 16)
//
// 1. initialize memory freelists and fast port mutex location on behalf of receive threads
// 2. initialize registers used in the main loop
// 3. main loop: find_next_threads
//
//     
//      1) find first bit set in thread_done_copy 15:0, representing receive threads 0-7
//       
//      2) find first bit set in thread_done_copy 23:16, representing receive threads 8-11
//
//		3) choose a port based on ready flags, attempting to alternate between slow and fast ports
//
//		4) assemble and issue receive request for each receive thread done


// context 1 (thread 17)
//
// polls (reads) the following registers:
//	rec ready count
//	rec_ready_lo
//	thread_done1

// key registers:
//
// thread_done_copy			copy of thd_done_reg0 CSR.
// current_thread			current thread index
// rec_rdy_true				calc from rec_rdy AND !prev_port_mask
// current_port				current port index
// other_fields					receive request while being assembled



//---------------------------------definitions-------------------------------
#include "mem_map.h"
#include "stdmac.uc"
#include "mem.uc"			// memory allocation macros
#include "refdes_macros.uc"

#define FAST_PORT_ENABLED
#ifndef FAST_PORT1
#define FAST_PORT1 16
#endif



//-----------------------------------macros----------------------------------



// RxSched_RequestFastPort
//		schedule the receive request for a gigabit port				; 14 insns
//		we believe it is ready, and can do a speculative assign (without blocking the port)
//
#macro RxSched_RequestFastPort[const_portnum, current_threadx2]

	alu_shf[current_thread, --, B, current_threadx2, >>1]				; current threadx2 / 2
update_tdone_copy#:
// update thread_done_copy
	alu_shf[current_inv_threadx2, current_threadx2, B-A, 1, <<5]	; setup left indirect shift
	alu[--, current_inv_threadx2, B, 0]								; clear the wait encode 2 bits of 
	alu[thread_done_copy, thread_done_copy, AND~, 0x3, <<indirect]	; the thread_done_copy
// insert sequence number for fast mpackets
#if (FAST_PORT1 & 1)									; if fast port num is odd
	alu_shf[other_fields, --, B, 2, <<16]				; insert seq_field (fast port) into this receive request
#else
	alu_shf[other_fields, --, B, 1, <<16]				; insert seq_field (fast port) into this receive request
#endif
	alu[seq_num, seq_num, +, 1]							; insert mpacket sequence number
	alu[seq_num, seq_num, AND, 0xf]
	alu[other_fields, other_fields, OR, seq_num, <<22]
	immed[last_was_slow, 0]
// send request
req_send#:
	RxSched_SendExtendedReceiveRequest[const_portnum, current_thread, other_fields]		; 8 insns
#endm




//----------------------------------startup----------------------------------
	br=ctx[0, rec_scheduler#]					; receive scheduler
	br=ctx[1, tdone_rdybit_reader#]				; context to read thread done and rdy bits


// in hardware pass 0 these nops are needed instead of just ctx_arb[kill]to prevent a hang
kill_unused_contexts#:
	nop
	nop
	nop
	ctx_arb[kill]								; kill contexts 2-3


.xfer_order $xfer0 $xfer1 $xfer2


rec_scheduler#:
.local temp
	//initialize pwp to 0
	immed[temp, XMIT_PWP_VECTOR]
	alu[$xfer0, --, b, 0]
	scratch[write, $xfer0, 0, temp, 1], ctx_swap
	//imitialize gig_ele_count to 0
	immed[temp, XMIT_FPORT1_ELE_COUNT]
	scratch[write, $xfer0, 0, temp, 1], ctx_swap
.endlocal

// setup receive ready control to identify which thread fbi will auto_push ready bits to
//
// +--------+----------+-------+----+---------+-----+----------+
// |1thread |push count|enab ap|when| pushcmd | sig |thread id |
// |  14    |  12:10   |   9   |  8 |  7:6    |  5  |   4:0    |
// +--------+----------+-------+----+---------+-----+----------+
// field			value
// 1thread		for fast port put the thread of receive request to receive control(explicit thread mode)
// push count	delay timer for both receive and xmit
// enab ap		enable autopush
// when			after rec_rdycnt has incremented	
// pushcmd		rec_rdybits_lo<31:0> to $xfer1, rec_rdycnt to $xfer0
// sig			signal
// thread_id	rec_scheduler

setup_ctl#:
	immed[$xfer2, 0x4000]					; no autopush, explicit thread mode			
	csr [write, $xfer2, rcv_rdy_ctl], ctx_swap 


#if (FAST_PORT1 & 1)						; if fast port num is odd
		fast_wr[3, INCR_ENQ_NUM2]								; increment fast port 1 seq number
#else
		fast_wr[3, INCR_ENQ_NUM1]								; increment fast port 1 seq number
#endif

#define_eval FAST_PORT1_RDYBIT ((FAST_PORT1&1) + 24)

// call	#macro freelist_create[freelist_id, base_addr, stride, count]
//		initialize freelist 0 for BUFFER_COUNT 2KB_packet_descriptors
//		see also: mem_map.h, stdmac.uc, stdfunc.uc
//
#ifdef XMIT_INIT
	ctx_arb[inter_thread]					; wait for signal from xmit init
#else	// not XMIT_INIT
#ifndef BUFFER_COUNT
	#define BUFFER_COUNT FREELIST_BTYPE_SRAM8_SDRAM2K_COUNT
#endif

#ifdef ALT_BANKS
// create freelists 0 and 1 for odd and even banks
#define_eval HALF_BUFFER_COUNT (BUFFER_COUNT / 2)
freelist_create[0, SRAM_BUFF_DESCRIPTOR_BASE, 4, HALF_BUFFER_COUNT]
#define_eval NEXT_BUFF_DESCRIPTOR_BASE (SRAM_BUFF_DESCRIPTOR_BASE + (HALF_BUFFER_COUNT * 4))
freelist_create[1, NEXT_BUFF_DESCRIPTOR_BASE, 4, HALF_BUFFER_COUNT]

#else	// not ALT_BANKS
freelist_create[0, SRAM_BUFF_DESCRIPTOR_BASE, 4, BUFFER_COUNT]
#endif	// not ALT_BANKS
#endif	// not XMIT_INIT

freelist_created#:
// set the autopush synch rate for 8_1f port version
#ifdef SYNCH90
	immed[$xfer7, 90]
#else
	immed[$xfer7, 100]
#endif
	csr[write, $xfer7, rdybus_synch_count_default], ctx_swap


// signal receive threads to go
fast_wr[0, inter_thd_sig]
fast_wr[4, inter_thd_sig]
fast_wr[8, inter_thd_sig]

// program fbi bus mode and rdybus sequencer program for transmit/receive autopush and rdy bit polling
// note: Core can do this by writing to the FBI CSRs
// prerequisite: define constants as described in 
//		Fbus_SetupRdyProg,  refdes_macros.uc
//
#ifdef REAL_RDYBUS_PROG
setup_rdyprg#:
	Fbus_SetupRdyProg
#endif

	immed[fbi_req_outstanding, 0]								; initialize no fbi receive request in progress

	immed[const_0x101, 0x101]
	immed[port_mask_fairness, 0xffff]

// context 0: job is to select thread/port  and write receive requests
//
rx_sched_first#:
	ctx_arb[voluntary]											; allow other contexts to run
	alu[--, --, B, @thread_done_capture]
	br=0[rx_sched_first#]

rx_sched_major#:
	ctx_arb[voluntary]
	alu[thread_done_copy, thread_done_copy, OR, @thread_done_capture]
	immed[@thread_done_capture, 0]									; clear captured bits
	alu[thread_done_copy, thread_done_copy, OR, thread_done_skips]	; recover skipped threads due to port not ready
	br=0[rx_sched_major#], defer[2]									; if no threads then don't go anywhere.  wait for one.
	immed[thread_done_skips, 0]										; clear skip bits

//--------------------------rec scheduler loop -------------------------------

rx_find_next_thread#:
	find_bset[thread_done_copy], clr_results					; find first bit set in 15:0
rx_find_next_thread2#:
	find_bset[thread_done_copy, >>16]							; find first bit set in 31:16
	RxSched_UpdatePortMask[port_mask_prev, port_mask_current]

rx_service_thread_a#:
	load_bset_result1[current_threadx2a]
	br!=0[schedule_away#], defer[1]
	load_bset_result2[current_threadx2b]
	br!=0[schedule_b_away#], guess_branch
	br[rx_sched_major#]
schedule_away#:
	alu_shf[current_threadx2a, current_threadx2a, AND~, const_0x101]	; clear the valid bit and low order bit
// slow or fast port decision
	alu_shf[--, last_was_slow, AND, @rec_rdy_count_reg]							; fast port's turn and fast port is ready
	br>0[rx_do_fast_a#]													; if last was slow, fast rec ready, not slow continue
// do slow port
	RxSched_RequestSlowPort[current_threadx2a]							; ~27 insns, schedule receive request for slow thread
	br[rx_service_thread_b#], defer[1]
	alu_shf[last_was_slow, --, B, 1, <<FAST_PORT1_RDYBIT]
rx_do_fast_a#:		
	RxSched_RequestFastPort[FAST_PORT1, current_threadx2a]				; ~14 insns, schedule receive request for fast thread


rx_service_thread_b#:
	alu[--, --, B, current_threadx2b]									; test 23:16
	br=0[rx_find_next_thread#], defer[1]								; no thread done in 23:16
schedule_b_away#:
	alu_shf[current_threadx2b, current_threadx2b, AND~, const_0x101]	; clear the valid bit and low order bit
// slow or fast port decision
	alu_shf[--, last_was_slow, AND, @rec_rdy_count_reg]							; fast port's turn and fast port is ready
	br>0[rx_do_fast_b#]													; if last was slow, fast rec ready, not slow continue
// do slow port
	RxSched_UpdatePortMask[port_mask_prev, port_mask_current]
	RxSched_RequestSlowPort[current_threadx2b]							; ~27 insns, schedule receive request for slow thread
	find_bset[thread_done_copy], clr_results
	br[rx_find_next_thread2#], defer[1]
	alu_shf[last_was_slow, --, B, 1, <<FAST_PORT1_RDYBIT]
rx_do_fast_b#:		
	RxSched_RequestFastPort[FAST_PORT1, current_threadx2b]						; ~14 insns, schedule receive request for fast thread
	br[rx_find_next_thread2#], defer[1]									; iterate
	find_bset[thread_done_copy], clr_results							; find first bit set in 15:0


//---------------------end minor loop --------------------------



//---------------------context 1 --------------------------
// context 1: job is to read thread_done1 register to get receive thread done status
//				and to read ready bits and ready count
//
.operand_synonym $rec_rdy_count $xfer0
.operand_synonym $rec_rdy $xfer1
.operand_synonym $thread_done_reg0 $xfer2

tdone_rdybit_reader#:
	immed[const0, 0]
tdone_rdybit_reader_loop#:													; 7 instr overhead
	csr[read, $thread_done_reg0, THREAD_DONE_REG0], ctx_swap	; thread done two bits per thread
	alu[$thread_done_reg0, --, B, $thread_done_reg0]			; write back to thread done to clear captured bits
	csr[write, $thread_done_reg0, THREAD_DONE_REG0], ctx_swap, defer[1]
 	alu[@thread_done_capture, @thread_done_capture, OR, $thread_done_reg0]	; take thread done snapshot
// must read these two together
	csr[read, $rec_rdy_count, RCV_RDY_CNT]
	csr[read, $rec_rdy, RCV_RDY_LO], ctx_swap
	alu[@rec_rdy_count, const0, +8, $rec_rdy_count]
	alu_shf[@rec_rdy_count_reg, --, B, $rec_rdy_count]
	br[tdone_rdybit_reader_loop#], defer[1]
#if (FAST_PORT1 = 16)
	alu[@rec_rdy, const0, +16, $rec_rdy]
#else
	alu[@rec_rdy, const0, +8, $rec_rdy]
#endif

