//------------------------------------------------------------------------------------
//                                                                     
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  1998-2000 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
//-----------------------------------------------------------------------------------
// tx_scheduler.uc
// transmit scheduler
//-----------------------------------------------------------------------------------
//
//
// system: SA1200
// subsystem: transmit microcode
// usage: reference design
// author: dfh 12/08/97
// revisions:
//		dfh		5/20/98		base level 2	
//		dfh		12/16/98	BL4 remove include config.h
//      mff     9/10/99     ifdef A1_CHIP - workaround optimize_mem hw bug
//
//-----------------------------------------------------------------------------------

// design:
//
// 1. Arbitor decides for each port which priority queue to use
//		It is assumed a port can have n priorities. (for example n=8)
//		This is placed in an 32x4 array shared as a global GPR with Scheduler
//
// 2. Scheduler assigns ports and T-FIFO Elements to TFill threads
//		the task assignment message consists of
//		+------+---------+----------+---------------+
//		|valid |  unused |  Element |    queue      |
//		| 31   |  30:12  |   11:8   |     7:0       |
//		+------+---------+----------+---------------+
//		valid			1 = assign is valid
//		Element			identifies the tfifo element
//		Queue			identifies the queue
//
// 3. Four TFill threads will be used to copy packet data in SDRAM to T-FIFO
//		TFill can calculate the packet location given port, queue and 
//		queue_descriptor_base
//
// 4. The Scheduler writes predetermined task assignment mailboxes for
//	assignments to the TFills. A 16 word block is used as a fifo for these assignments.
//
// 7. The TFill read task assignment mailboxes to pick up task assignment. If the
//	assignment valid bit is on, it clears it, increments the global message id, and
//	signals the next tfill thread to go.
//
// 8. TFill runs with the assignment, reading packet descriptor from sram and transferring 
//	packet data from SDRAM to T-FIFO.
// 
// 9. When TFill has updated the packet queue linked list and freed the used descriptor, 
//	it writes status to FBI, then loops back to do the next assignment



// sdram, sram, scratch shared addresses
#include "mem_map.h"

// standard macros
#include "stdmac.uc"
#include "mem.uc"			// memory allocation macros



//--------------------------------macros---------------------------------------

// Ele_GetBinding									(6-8 cycles)
//      get port binding indexed by element number
//		up to 16 ports
//  input: ele_outptr
//	output: binding, which is port bound to this element
//
#macro Ele_GetBinding[binding, ele_outptr]
	alu[--, 1, AND, ele_outptr, >>3]
	br=0[ele_get_7_0#], defer[1]
	alu_shf[shift, 0x1c, AND, ele_outptr, <<2]								; x4
	alu[--, shift, B, 0]
	alu_shf[binding, 0xf, AND, bindings_15_8, >>indirect]
	br[ele_get_binding_done#]
ele_get_7_0#:
	alu[--, shift, B, 0]
	alu_shf[binding, 0xf, AND, bindings_7_0, >>indirect]
ele_get_binding_done#:
#endm



// Ele_SetBinding		
//		bind a port to an element
//		up to 16 ports
//  inputs:  ele_inptr
//			 current_port
//
#macro Ele_SetBinding[current_port, this_assign, ele_inptr]							
	alu[offset, const_3C, AND, ele_inptr, <<2]							; adjust for array element size
    jump[offset, ele_set0binding#], defer[3]
	load_bset_result1[current_port]										; get next ready port
	alu_shf[current_port, current_port, AND~, 1, <<8]					; clear the valid bit
	alu[this_assign, single_assign, OR, offset, <<6]					; insert element in assignment
ele_set0binding#:
	alu[bindings_7_0, bindings_7_0, AND~, 0xf]							; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu[bindings_7_0, bindings_7_0, OR, current_port]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<4]					; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<4]					; 
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<8]					; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<8]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<12]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<12]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<16]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<16]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<20]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<20]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<24]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<24]
	nop
	alu_shf[bindings_7_0, bindings_7_0, AND~, 0xf, <<28]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_7_0, bindings_7_0, OR, current_port, <<28]
	nop
	alu[bindings_15_8, bindings_15_8, AND~, 0xf]						; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu[bindings_15_8, bindings_15_8, OR, current_port]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<4]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<4]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<8]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<8]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<12]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<12]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<16]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<16]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<20]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<20]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<24]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<24]
	nop
	alu_shf[bindings_15_8, bindings_15_8, AND~, 0xf, <<28]				; clear the nibble
    br[ele_set_binding_done#], defer[1]
    alu_shf[bindings_15_8, bindings_15_8, OR, current_port, <<28]
	nop
ele_set_binding_done#:
#endm



// Sched_SetQueue
// get the current_queue for a port, given current_port
// this macro assumes up to 16 ports
//
#macro Sched_SetQueue[ports_in_tfifo, this_assign, current_port]
	alu_shf[temp, --, B, current_port, <<1]			; shift for 2 words per jump target
	jump[temp, queue_array#], defer[3]
	alu[temp, current_port, B-A, 1, <<5]						; setup left indirect
	alu[--, temp, B, 0]
	alu_shf[ports_in_tfifo, ports_in_tfifo, OR, 1, <<indirect]	; set bit to block port	
queue_array#:
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_0]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_1]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_2]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_3]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_4]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_5]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_6]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_7]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_8]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_9]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_10]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_11]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_12]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_13]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_14]	; insert queue in assignment
	br[got_queue#], defer[1]
	alu[this_assign, this_assign, OR, @queue_15]	; insert queue in assignment
got_queue#:
#endm


//  TFIFO_BindElement    
//						set port in binding array
//						set bit for this port in ports_in_tfifo 
// 						increment ele_inptr (TFIFO_inc_inptr)
// prereq: verify that elements_busy_count < 16
#macro TFIFO_BindElement[current_port, this_assign, ele_inptr, ele_outptr]
	Ele_SetBinding[current_port, this_assign, ele_inptr]		; set binding
	Sched_SetQueue[ports_in_tfifo, this_assign, current_port]
	alu[ele_inptr, 1, +4, ele_inptr]							; increment inptr
#endm


//  TFIFO_FreeElement	
//						extract port from TFIFO_bindings
//						clear bit for this port in ports_in_tfifo
//						increment element output pointer
//
#macro TFIFO_FreeElement[binding]
	alu[temp, binding, B-A, 1, <<5]					; setup left indirect
	alu[--, temp, B, 0]
	alu_shf[ports_in_tfifo, ports_in_tfifo, AND~, 1, <<indirect]	; clear bit	
	alu[ele_outptr, 1, +4, ele_outptr]								; increment ptr
#endm



#macro TFIFO_TryFreeElement[latest_tfifo_outptr, ele_outptr]
#ifdef RECORD_OUTPTRS
// TEST CAPTURE OUTPTR
	alu[--, prev_tfifo_outptr, -, latest_tfifo_outptr]
	br=0[end_capture#]
// if it has changed, record it
	move[$tfifo_outptr, latest_tfifo_outptr]
	sram[write, $tfifo_outptr, jou_addr, jou_base, 1]
	alu[jou_addr, 1, +16, jou_addr]
	move[prev_tfifo_outptr, latest_tfifo_outptr]
end_capture#:
// end TEST CAPTURE OUTPTR
#endif

// HARDWARE bug workaround
// the A series fbi occasionally returns a bogus outptr
#ifdef A1_CHIP
	.if (ele_outptr < latest_tfifo_outptr)
		alu[diff, latest_tfifo_outptr, -, ele_outptr]
	.else
		alu[diff, latest_tfifo_outptr, +, 0x10]
		alu[diff, diff, -, ele_outptr]
	.endif
	alu[--, 6, -, diff]					; if diff > 6, outptr is bogus or not changed
	br<0[try_free_done#]				; therefore skip
// end HARDWARE bug workaround
#else
	alu[--, latest_tfifo_outptr, -, ele_outptr]
	br=0[try_free_done#] 								; cannot advance yet
#endif

// otherwise we we can free this element
free_element#:
	Ele_GetBinding[current_binding, ele_outptr]			; 6-8 cycles
	TFIFO_FreeElement[current_binding]					; 4 cycles free current_element, increment ele_outptr
try_free_done#:
#endm

//------------------------------end macros-------------------------------------


.operand_synonym $tx_rdy_copy $xfer1
.operand_synonym $tfifo_outptr $xfer0
.operand_synonym $task_assignment1 $xfer4
.operand_synonym $task_assignment2 $xfer5

.xfer_order $xfer0 $xfer1 $xfer2 $xfer3 $xfer4

// startup
	br=ctx[0, tx_scheduler#]		; arbitor
	br=ctx[1, tx_arb#]				; transmit scheduler

// in hardware pass 0 these nops are needed instead of just ctx_arb[kill]to prevent a hang
kill_unused_contexts#:
	nop
	nop
	ctx_arb[kill]					; kill contexts 2-3





// on startup, branch here if this context is the transmit scheduler
//
tx_scheduler#:
//
// initialize free element pointer
    immed[ele_inptr, 0]
    immed[ele_outptr, 0]
	immed[msg_id, 0]
	immed[const_3c, 0x3C]
	immed[task_msg_addr, XMIT_TASK_MSG_BASE]
	immed32[single_assign, 0x80001000]
	immed[@queue_0, 0]								; assign qselect = 0
	immed[@queue_1, 0]								; assign qselect = 0
	immed[@queue_2, 0]								; assign qselect = 0
	immed[@queue_3, 0]								; assign qselect = 0
	immed[@queue_4, 0]								; assign qselect = 0
	immed[@queue_5, 0]								; assign qselect = 0
	immed[@queue_6, 0]								; assign qselect = 0
	immed[@queue_7, 0]								; assign qselect = 0
	immed[@queue_8, 0]								; assign qselect = 0
	immed[@queue_9, 0]								; assign qselect = 0
	immed[@queue_10, 0]							; assign qselect = 0
	immed[@queue_11, 0]							; assign qselect = 0
	immed[@queue_12, 0]							; assign qselect = 0
	immed[@queue_13, 0]							; assign qselect = 0
	immed[@queue_14, 0]							; assign qselect = 0
	immed[@queue_15, 0]							; assign qselect = 0
#ifdef RECORD_OUTPTRS
// TEST CAPTURE OUTPTR
	immed32[jou_base, SRAM_PROTOCOL_BASE]		; this area is safe for route-only cases (non-bridge)
// end TEST CAPTURE OUTPTR
#endif


// setup transmit ready control to identify which thread fbi will auto_push ready bits to
// xmit_rdy_ctl 
// +--------+--------+-------+----+---------+-----+----------+
// |tfifo v |reserved|enab ap|when| pushcmd | sig |thread id |
// | 31:16  |  15:10 |   9   |  8 |  7:6    |  5  |   4:0    |
// +--------+--------+-------+----+---------+-----+----------+
// field			value
// enab ap		1	enable autopush
// when			0   after xmit_rdybits lo/hi have been assembled
// pushcmd		2	xmit_rdybits_lo<31:0> to $xfer1, xmit_ptrs to $xfer0
// sig			1	signal
// thread_id		20	tx_scheduler

.operand_synonym ii temp
//	immed[$xfer2, 0x2b4]
	immed[$xfer2, 0xab4]		; test, enable xmit validate upon sdram t_fifo_wr completion
tsched_write_rdy_ctl#:
	csr [write, $xfer2, xmit_rdy_ctl], ctx_swap

#ifdef RECEIVE16
// setup receive ready control to identify which thread fbi will auto_push ready bits to
//
// +--------+----------+-------+----+---------+-----+----------+
// |1thread |push count|enab ap|when| pushcmd | sig |thread id |
// |  14    |  12:10   |   9   |  8 |  7:6    |  5  |   4:0    |
// +--------+----------+-------+----+---------+-----+----------+
// field		value
// 1thread		0	for fast port put the thread of receive request to receive control
// push count	3	delay timer for both receive and xmit
// enab ap		0	enable autopush
// when			0   after rec_rdycnt has incremented	
// pushcmd		0	push nothing
// sig			0	signal
// thread_id	16	not receive thread, (pushing nothing anyway)

setup_ctl#:
	immed[$xfer2, 0xc10]		
	csr [write, $xfer2, rcv_rdy_ctl], ctx_swap 


// call	#macro freelist_create[freelist_id, base_addr, stride, count]
//		initialize freelist 0 for BUFFER_COUNT 2KB_packet_descriptors
//		see also: mem_map.h, stdmac.uc, stdfunc.uc
//
#ifndef BUFFER_COUNT
	#define BUFFER_COUNT FREELIST_BTYPE_SRAM8_SDRAM2K_COUNT
#endif
// create freelists 0 and 1 for odd and even banks
#define_eval HALF_BUFFER_COUNT (BUFFER_COUNT / 2)
freelist_create[0, SRAM_BUFF_DESCRIPTOR_BASE, 4, HALF_BUFFER_COUNT]
#define_eval NEXT_BUFF_DESCRIPTOR_BASE (SRAM_BUFF_DESCRIPTOR_BASE + (HALF_BUFFER_COUNT * 4))
freelist_create[1, NEXT_BUFF_DESCRIPTOR_BASE, 4, HALF_BUFFER_COUNT]

freelist_created#:

// signal receive threads to go
fast_wr[0, inter_thd_sig]
fast_wr[4, inter_thd_sig]
fast_wr[8, inter_thd_sig]
fast_wr[12, inter_thd_sig]

#endif

// this is a dummy write inside the loop it will always ctx_arb[fbi] on the
// previous write to scratch
	immed[$task_assignment1, 0]
	scratch[write, $task_assignment1, task_msg_addr, msg_id, 1], sig_done
	br[tsched_loop#]

// initializations done
// come here if no ports ready to process
tsched_skip#:
	br_inp_state[push_protect, tsched_skip#], defer[1]
	alu[latest_tfifo_outptr, 0xF, AND, $tfifo_outptr]				; mask so as not to get gig lo water indicator
	ctx_arb[voluntary]											; let tx arb run
	TFIFO_TryFreeElement[latest_tfifo_outptr, ele_outptr]		; 10-12 cycles try to free elements
	ctx_arb[voluntary], defer[1]								; let tx arb run
	alu[ele_outptr, ele_outptr, AND, 0xf]						; mask off the carry


//---------------------begin the main loop --------------------------

tsched_loop#:
autopush_tx_rdy1#:
	br_inp_state[push_protect, autopush_tx_rdy1#], defer[2]
	alu[ports_rdy_to_proc, $tx_rdy_copy, AND~, ports_in_tfifo]
	alu[latest_tfifo_outptr, 0xF, AND, $tfifo_outptr]				; mask so as not to get gig lo water indicator

	ctx_arb[voluntary]													; let tx arb run
// determine which port to work on
	alu[ports_rdy_to_proc, @ports_with_new_packets, AND, ports_rdy_to_proc]
	br=0[tsched_skip#], defer[1]									; if ports blocked, do major loop
	find_bset[ports_rdy_to_proc], clr_results							; find first bit set in 15:0
	TFIFO_TryFreeElement[latest_tfifo_outptr, ele_outptr]


// assign a task to tfill
//
//		The tx task assignment mailbox
//+------+---------+----------+---------------+
//|valid	| unused  |  Element |    queue      |
//| 31   |  30:12  |   11:8   |     7:0       |
//+------+---------+----------+---------------+
//		valid			1 = assign is valid
//		Element			identifies the tfifo element
//		Queue			identifies the queue


tsched_assign1#:
	TFIFO_BindElement[current_port, this_assign, ele_inptr, ele_outptr]							; bind port to element, inc inptr

tsched_write_assign1#:
	ctx_arb[fbi], defer[1]
	alu[ele_outptr, 0xf, AND, ele_outptr]						; now is a good time to mask off the carry bit
	alu_shf[$task_assignment1, this_assign, OR, current_port, <<3]	; insert portx8
	scratch[write, $task_assignment1, task_msg_addr, msg_id, 1], sig_done
	br[tsched_loop#], defer[1]
	alu[msg_id, 1, +4, msg_id]									; 256 addresses then wrap






// on startup, branch here if this context is the transmit arbitor
//

tx_arb#:

// get ports with new packets and ports with empty queues
.operand_synonym $pwnp $xfer0		; ports with new packets	(set by receive)

	immed[pwp_addr, XMIT_PWP_VECTOR]		; read pwnp and pweq

tx_arb_loop#:
// for priority select, this would decide which one of 8 for each port
// note: this version uses 1 of the 8 queues per port
// for this version, there is 1 to 1 correspondence between pwnp and pweq
//
// with multiple queues per port it would update 8 bit vector for the port
// and then if all are 0, clear the bit at PWNP
tx_arb_got_signal#:
	scratch[read, $pwnp, pwp_addr, 0, 1], ctx_swap
	alu[@ports_with_new_packets, --, B, $pwnp]				; local pwnp to tx_scheduler	
	br=0[tx_arb_got_signal#]

// note: this version uses 1 of the 8 queues per port, so it always sets qselect 0

	immed[@queue_0, 0]								; assign qselect = 0
	immed[@queue_8, 0]								; assign qselect = 0

	br[tx_arb_loop#]
