/////////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119 
//
/////////////////////////////////////////////////////////////////////////////////////////
// 		
//		Change History
// 		--------------
//
// Date			Description											Whom
// ---------------------------------------------------------------------------------
//
// 7/14/03    	Round Robin Port Scheduler for Egress		 		urn             
//                                                                  
/**********************************************************************************/


#ifndef __PORT_RR_SCHEDULER_UC__
#define __PORT_RR_SCHEDULER_UC__

/**********************************************************************************/

#include "port_rr_scheduler.h"
#include "stdmac.uc"
#include "xbuf.uc"
#include "dl_buf.uc"

/**********************************************************************************/
		
// Transfer registers used for reflect write 

.reg visible global 			$$txd_p0 $$txd_p1 $$txd_p2 $$txd_p3 \
								$$txd_p4 $$txd_p5 $$txd_p6 $$txd_p7 \
								$$txd_p8 $$txd_p9 $$txd_p10 $$txd_p11 \
								$$txd_p12 $$txd_p13 $$txd_p14 $$txd_p15

.xfer_order						$$txd_p0 $$txd_p1 $$txd_p2 $$txd_p3 \
								$$txd_p4 $$txd_p5 $$txd_p6 $$txd_p7 \
								$$txd_p8 $$txd_p9 $$txd_p10 $$txd_p11 \
								$$txd_p12 $$txd_p13 $$txd_p14 $$txd_p15


//	Maximum number of buffers that can be enqueued for one port at given time.
//	Beyond this threshold packets will be dropped for this port.

#define	MAX_ENQ_PER_PORT	((BUF_SDRAM_SIZE/BUFFER_SIZE)/NUMBER_OF_PORTS)

// port 0 is invalid port for the QM 

#define INVALID_PORT_NUMBER 	0

/**********************************************************************************/

#macro port_rr_scheduler_init()

	.if (ctx() == 0)
		thread_0_init()
	.else
		ctx_arb[kill]
	.endif

#endm

/**********************************************************************************/

#macro thread_0_init()
.begin

	.reg 	ctx_enable_data 
	.reg	nn_ring_empty_val

	// Initialize the NN ring 

	local_csr_rd[ctx_enables]
	immed[ctx_enable_data, 0]

	// Bits [19:18] controls threadhold when NN_Empty is asserted.
	// Set [19:18] to 1:0 to specify that the message on NN-ring is 3 longwords

	move(nn_ring_empty_val, 0x80000)
	
	alu[ctx_enable_data, ctx_enable_data, OR, nn_ring_empty_val]
	local_csr_wr[ctx_enables, ctx_enable_data]
	local_csr_wr[nn_get, 0]
	local_csr_wr[nn_put, 0]

	// Initialize the packets transmitted registers and port structures in LM 

	init_packets_transmitted_xfer_regs()
	init_per_port_data_structs_in_lm()


	// Thread 0 waits for the signal that all blocks are to  wait on to indicate that 
	// system initialization is done
	ctx_arb[system_init_sig]

.end
#endm

/**********************************************************************************/

#macro init_per_port_data_structs_in_lm()

.begin

	.reg loop_cnt 
	.reg temp
		
	immed[loop_cnt, 0]
	.while (loop_cnt < NUMBER_OF_PORTS)

		alu_shf[temp, --, B, loop_cnt, <<PORT_STRUCTURE_SIZE]
		local_csr_wr[active_lm_addr_0, temp]

		nop
		nop
		alu[loop_cnt, loop_cnt, +, 1]

		alu[lm0_port_pkts_enqueued, --, B, 0]
		alu[lm0_port_pkts_scheduled	, --, B, 0]	 		
		
	.endw  

.end
#endm

/************************************************************************************/

#macro init_packets_transmitted_xfer_regs()
.begin

	.reg 	loop_cnt 
	.reg	temp
	.sig 	sig_init_done

	 xbuf_alloc($sr_write, 16, write)
	
	.set $sr_write[0] $sr_write[1] $sr_write[2] $sr_write[3] \
		 $sr_write[4] $sr_write[5] $sr_write[6] $sr_write[7] \
		 $sr_write[8] $sr_write[9] $sr_write[10] $sr_write[11] \
		 $sr_write[12] $sr_write[13] $sr_write[14] $sr_write[15]

	local_csr_wr[T_INDEX, &$sr_write[0]]
	nop
		
	move(temp, SRAM_ZERO_BLOCK)

	// init the array of transfer registers keep track of the packets 
	// transmitted per port
		
	immed[loop_cnt, 0]
	
	.while (loop_cnt < 16)
		immed[*$index++, 0]
		alu[loop_cnt, loop_cnt, +, 1]
	.endw

	sram[write, $sr_write[0], temp, 0, 8], sig_done[sig_init_done]
	ctx_arb[sig_init_done]
	sram[read, $$txd_p[0], temp, 0, 8], sig_done[sig_init_done]
	ctx_arb[sig_init_done]

	sram[write, $sr_write[8], temp, 0, 8], sig_done[sig_init_done]
	ctx_arb[sig_init_done]		
	sram[read, $$txd_p[8], temp, 0, 8], sig_done[sig_init_done]
	ctx_arb[sig_init_done]

	xbuf_free($sr_write)

.end
#endm


/**********************************************************************************/

#macro port_rr_schedule()

.begin

	.reg	enq_packet_size
	.reg	enq_sop_handle
	.reg	enq_port_number 
	.reg	enq_port_offset
	.reg	ports_with_data
	.reg	deq_port_number
	.reg	deq_port_offset
	.reg	port_rr_mask 
	.reg	port_mask 
	.reg 	packets_in_flight 
	.reg	xfer_byte_offset 
	.reg	minus_two
	.reg	@buf_size
	.reg 	@max_enq_per_port
	.reg 	@sched_pkt_drop

	// Mask for doing round robin through ports 

	alu[port_rr_mask, --, ~B, 0]
	alu[ports_with_data, --, B, 0]
	move[minus_two, 0xfffffffe]

	//	Some constant for tail dropping.

	immed32[@max_enq_per_port, MAX_ENQ_PER_PORT]

	#ifdef _DEBUG_COUNTERS_
	immed[@sched_pkt_drop, 0]
	#endif

	immed[@buf_size, BUFFER_SIZE]

handle_enqueue#:

	ctx_arb[voluntary]		; Will help WorkBench stop this ME while debugging.

	// Check if the ring from statistics is empty 

	br_inp_state[NN_EMPTY, no_enq_msg#]

	// Read the packet size, sop handle and port number from the enqueue message 

	alu[enq_packet_size, --, B, *n$index++]
	alu[enq_sop_handle,  --, B, *n$index++]
	alu[enq_port_number, --, B, *n$index++, >>PORT_BITS_START]

	// Calculate offset in local memory for port structure 
	
	alu_shf[enq_port_offset, --, B, enq_port_number, <<PORT_STRUCTURE_SIZE]
	
	// Need to wait 3 cycles before active_lm_addr_0 can be used

	local_csr_wr[active_lm_addr_0, enq_port_offset]

	nop
	nop
	nop

	//	Tail Drop: When a port is flow controlled (packets in flight exceeds
	// 	threshold) the scheduler/queue_manager continues to enqueue packets for 
	//	that port/queue. If that particular port takes long time to recover
	//	or never recovers we end up queueing all buffers to that port starving
	//	other ports for buffers. To avoid this we implement a tail drop for 
	//	flow controlled	ports.

	//	One problem in implementing tail drop is for large packet we need the
	//	eop handle. It's not available for now. This section will be rewritten 
	//	in the next release.

	alu[--, enq_packet_size, -, @buf_size]	; tail drop only small packets (< 2048)
	bgt[coninue_enqueue#]

	alu[--, lm0_port_pkts_enqueued, -, @max_enq_per_port]
	bgt[drop_packet#]

coninue_enqueue#:

	alu[lm0_port_pkts_enqueued, lm0_port_pkts_enqueued, +, 1]

	// Set bit mask to indicate port has packets 

	alu[--, enq_port_number, OR, 0]
	alu_shf[ports_with_data, ports_with_data , OR, 1, <<indirect]

	// increment enq_port_number since port 0 is invalid for QM

	alu[enq_port_number, enq_port_number, +, 1]

handle_dequeue#:

	// find a port with data 

	alu[port_mask, ports_with_data, AND, port_rr_mask]
	ffs[deq_port_number, port_mask]
	bne [found_port#]

	// if round robin round is over, reset the mask and try again

	ffs[deq_port_number, ports_with_data]
	bne [found_port#]

	// otherwise no port has data. Check if there is an enq, If not loop to start
	
	alu[--, enq_port_number, -, INVALID_PORT_NUMBER]
	beq[handle_enqueue#]

	// Otherwise there is an enq to send the enq to qm with invalid deq port # 

	alu[deq_port_number, --, B, INVALID_PORT_NUMBER]
	br[send_message_to_qm#]

found_port#:

	// Calculate offset in local memory for port structure 
	
	alu_shf[deq_port_offset, --, B, deq_port_number, <<PORT_STRUCTURE_SIZE]

	// Set up t_index register to read packets transmitted for port - 3 cycle wait

	alu_shf[xfer_byte_offset, --, B, deq_port_offset, >>PORT_OFFSET_TO_T_INDEX]
	local_csr_wr[T_INDEX, xfer_byte_offset]

	// Need to wait 3 cycles before active_lm_addr_0 can be used

	local_csr_wr[active_lm_addr_0, deq_port_offset]

	// update the mask 

	alu[--, deq_port_number, OR, 0]
	alu_shf[port_rr_mask , --, B, minus_two, <<indirect]

	nop
	
	// Get the packets in flight - packets scheduled - packets transmitted 

	alu[packets_in_flight, lm0_port_pkts_scheduled, -, *$$index]

	// Check if it exceeds the limit 

	alu[--, MAX_IN_FLIGHT, -, packets_in_flight]
	bgt[packets_in_flight_check_passed#]

	// Check if there is an enq, If not loop to start
	
	alu[--, enq_port_number, -, INVALID_PORT_NUMBER]
	beq[handle_enqueue#]

	// Otherwise there is an enq to send the enq to qm with invalid deq port # 

	alu[deq_port_number, --, B, INVALID_PORT_NUMBER]
	br[send_message_to_qm#]


packets_in_flight_check_passed#:

	// decrement packets in the queue and increment packets scheduled

	alu[lm0_port_pkts_scheduled, lm0_port_pkts_scheduled, +, 1]
	alu[lm0_port_pkts_enqueued, lm0_port_pkts_enqueued, -, 1]

	// check if there are zero packets in the port 

	bne[increment_deq_port#]

	// clear bit mask to indicate port has no packets 

	alu[--, deq_port_number, OR, 0]
	alu_shf[ports_with_data, ports_with_data , AND~, 1, <<indirect]

increment_deq_port#:

	alu[deq_port_number, deq_port_number, +, 1]

send_message_to_qm#:

	br_inp_state[NN_FULL, send_message_to_qm#]

	alu[*n$index++, enq_port_number, OR, deq_port_number, <<16]
	alu[*n$index++, --, B, enq_sop_handle]
	alu[*n$index++, --, B, 0xff]

	br[handle_enqueue#]

no_enq_msg#:

	// Set the port number to invalid port and branch to dequeue path 

	alu[enq_port_number, --, B, INVALID_PORT_NUMBER]
	alu[enq_sop_handle, --, B, 0]
	br[handle_dequeue#]

drop_packet#:

	//	Later add support for dropping large buffers.

	dl_buf_drop[enq_sop_handle]

	#ifdef _DEBUG_COUNTERS_
	alu[@sched_pkt_drop, @sched_pkt_drop, +, 1]
	#endif

	// Reset the enq port# as this packet has been dropped
	alu[enq_port_number, --, B, INVALID_PORT_NUMBER]
	br[handle_dequeue#]

.end

#endm


/////////////////////////////////////////////////////////////////////////////////////////

main#:

.begin						 
						 
	port_rr_scheduler_init()

	.if (ctx() == 0)
		.while (1)
			port_rr_schedule()
		.endw

	.endif

	nop; to avoid warnning 5133 
	
.end

/////////////////////////////////////////////////////////////////////////////////////////

#endif // __PORT_RR_SCHEDULER_UC__