/////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119 
//
//		
//		Change History
// 		--------------
//
// Date			Description											Whom
// ------------------------------------------------------------------------------------
//
// 11/11/01    	Ingress Scheduler for IXP2400  					Uday Naik            
// 10/06/01		Modified for the IXP2800						Prashant Chandra     
//                                                                      
/////////////////////////////////////////////////////////////////////////////////////

#ifndef __CSIX_SCHEDULER_UC__
#define __CSIX_SCHEDULER_UC__

/////////////////////////////////////////////////////////////////////////////////////

// header file with system constants

#include "dl_system.h"

// header file with constants for algorithm

#include "wrr_flat_scheduler.h"

// include stdmac.uc in IXPblocks Portable library

#include "stdmac.uc"

// include localmem.uc in IXPblocks Portable library for read/write local memory

#include "localmem.uc"

// include the flow control code 

#include "wrr_flat_scheduler_fc.uc"

// include the code for initializing scratch and next neighbor rings

#include "wrr_flat_scheduler_init.uc"

// run in 4 context mode
.num_contexts 4

/////////////////////////////////////////////////////////////////////////////////////

#define	PACKETS_IN_FLIGHT_PROCESSING


//////////////////////////////////////////////////////////////////////////////////////
// 
//  Scheduler performance analysis
//
//                                 Code Path                                        Cycle Count
// -----------------------------------------------------------------------------------------------
// 1. Add enq cell count + deq                                                         40
// 2. Add enq cell count + deq + reload credit                                         46
// 3. Add enq cell count + deq + remove Q from active list                             40
// 4. Add enq cell count + deq + all Qs inactive                                       38
// 5. Add enq cell count + add Q to active list + deq                                  45
// 6. Add enq cell count + add Q to active list + deq + reload credit                  51
// 7. Add enq cell count + add Q to active list + deq + remove Q from active list      45
// 8. Add enq cell count + add Q to active list + deq + all Qs inactive                43
// 9. Add enq cell count + 1st active Q + deq                                          50
// 10. Add enq cell count + 1st active Q + deq + reload credit                         56
// 11. Add enq cell count + 1st active Q + deq + remove Q from active list             50
// 12. Add enq cell count + 1st active Q + deq + all Qs inactive                       48
//
//////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_get_queue(out_queue_id, active)
.begin
	.reg port queue prev_port prev_queue
	.reg port_addr queue_addr qdesc_addr
	.reg cell_count next_queue temp

#ifdef PACKETS_IN_FLIGHT_PROCESSING

	// First check the packets in flight. if that exceeds a certain limit we 
	// will not schedule anything for this beat

	// Subtract packets xmited from packets scheduled 
	alu[@packets_in_flight, @packets_scheduled, - , packets_transmitted]

	// Mask off all but the lowest byte to account for wrap around on 256 
	alu[@packets_in_flight, @packets_in_flight, AND, 0xff]	; defer 1

	alu[--, @packets_in_flight, -, max_pkts_in_flight]

	bge[SCHEDULE_NONE#]

#endif

	// get next queue
	alu[queue, 0, +16, active]
	local_csr_wr[active_lm_addr_0, queue]
	nop

	// schedule nothing if linked list doesn't have any queue
	br_bset[active, LIST_INACTIVE_BIT, SCHEDULE_NONE#]

	// queue contains active queue address which was already <<3,
	// so need to <<13 to move value to high 16 bits.
	ld_field[out_queue_id, 1100, queue, <<13]

	// check for flow control
	br_bset[*l$index0, QUEUE_FC_BIT, FLOW_CONTROL_ON#]

	// increment packets scheduled count
	alu[@packets_scheduled, @packets_scheduled, + , 1]

	// decrement the cell count
	alu[*l$index0, *l$index0, -, cell_count_minus_one]
	alu_shf[--, --, B, *l$index0, >>8]
	// remove the queue if cell count becomes zero
	beq[REMOVE_QUEUE_FROM_LIST#], defer[3]
	ld_field_w_clr[prev_queue, 0011, active, >>16]
	local_csr_wr[active_lm_addr_1, prev_queue]
	alu[--, queue, -, prev_queue]

	// decrement credit
	alu[*l$index0[1], *l$index0[1], -, 1]
	alu_shf[--, 0, +16, *l$index0[1]]
	beq[RELOAD_CREDIT#]

	br[DONE#]
	

RELOAD_CREDIT#:
	ld_field_w_clr[temp, 0011, *l$index0[1], >>16]
	ld_field[*l$index0[1], 0011, temp]

	// advance to next queue
	ld_field_w_clr[next_queue, 0001, *l$index0]
	br[DONE#], defer[2]
	alu_shf[active, --, B, active, <<16]
	ld_field[active, 0011, next_queue, <<3]

FLOW_CONTROL_ON#:
	// don't schedule for this queue if its flow control is on
	ld_field[out_queue_id, 1100, 0]

	// update linked list
	ld_field_w_clr[prev_queue, 0011, active, >>16]
	local_csr_wr[active_lm_addr_1, prev_queue]
	alu[--, queue, -, prev_queue]

REMOVE_QUEUE_FROM_LIST#:

	beq[DONE#], defer[1]
	alu_shf[active, --, B, 1, <<LIST_INACTIVE_BIT]

	ld_field_w_clr[next_queue, 0001, *l$index0]
	ld_field[*l$index1, 0001, next_queue]
	br[DONE#], defer[2]
	alu_shf[next_queue, --, B, next_queue, <<3]
	alu_shf[active, next_queue, OR, prev_queue, <<16]

SCHEDULE_NONE#:
	ld_field[out_queue_id, 1100, 0], load_cc


DONE#:
.end
#endm






/////////////////////////////////////////////////////////////////////////////////////////////////// 
//
// Scheduler main loop. This runs in one thread and handles the schedules. 
//
// When we exit this macro, we will always have scheduled a packet. If no data
// is available on any port, we will loop inside this same macro. 
//
//
// Instructions: Takes 43 instuctions in the worst case		
// 
///////////////////////////////////////////////////////////////////////////////////////////////////

#macro scheduler()

.begin

.reg sched_msg_word0, sched_msg_word1, sched_msg_word2
.reg enq_queue_addr, enq_port_addr, enq_cell_count, cur_cell_count, prev_queue_addr
.reg temp, enq_port, next_index, enq_queue_num fc_bit

START_SCHEDULE#:

	// Read the enqueue message from the NN ring
	br_inp_state[NN_EMPTY, SCHEDULE_NO_ENQUEUE#]

	// Read the enqueue message from packet processing
	alu[sched_msg_word0, --, B, *n$index]
	// Calculate enqueue queue address
	ld_field_w_clr[enq_queue_addr, 0011, *n$index++, <<3]
	local_csr_wr[active_lm_addr_0, enq_queue_addr]
	alu[sched_msg_word1, --, B, *n$index++]
	alu[sched_msg_word2, --, B, *n$index++]
	// Get packet's cell count
	ld_field_w_clr[enq_cell_count, 0011, sched_msg_word0, >>16]

	// Get current cell count
	alu_shf[fc_bit, --, B, *l$index0, >>31]
	alu_shf[cur_cell_count, fc_count_mask, AND, *l$index0, >>8]

	// Current cell count = 0 means the queue has not yet been in the linked list
	beq[ADD_QUEUE_TO_LIST#], defer[3]

	// Add enqueue cell count to current cell count
	alu[cur_cell_count, cur_cell_count, +, enq_cell_count]
	alu[cur_cell_count, cur_cell_count, OR, fc_bit, <<23]
	ld_field[*l$index0, 1110, cur_cell_count, <<8]

	// Get dequeue queue id
	_scheduler_get_queue(sched_msg_word0, active)

WRITE_NN#:
	br_inp_state[NN_FULL, WRITE_NN_FULL#]

	// Take care of both enqueue and dequeue in this beat
	alu[*n$index++, --, B, sched_msg_word0]
	br[SCHEDULE_END#], defer[2]
	alu[*n$index++, --, B, sched_msg_word1]
	alu[*n$index++, --, B, sched_msg_word2]

WRITE_NN_FULL#:

	#ifdef _DEBUG_COUNTERS_
	alu[@sched_out_nn_full, @sched_out_nn_full, +, 1]
	#endif

	br[WRITE_NN#]

ADD_QUEUE_TO_LIST#:

	// load the prev queue addr to LM ADDR1
	ld_field_w_clr[prev_queue_addr, 0011, active, >>16]
	local_csr_wr[active_lm_addr_1, prev_queue_addr]

	// check if this is the first active queue
	br_bset[active, LIST_INACTIVE_BIT, FIRST_ACTIVE_QUEUE#]

	// Add the queue to linked list
	ld_field[*l$index0, 0001, active, >>3]
	ld_field[active, 1100, enq_queue_addr, <<16]
	ld_field[*l$index1, 0001, enq_queue_addr, >>3]

	// Get dequeue queue id
	_scheduler_get_queue(sched_msg_word0, active)

WRITE_NN2#:
	br_inp_state[NN_FULL, WRITE_NN2_FULL#]

	// Take care of both enqueue and dequeue in this beat
	alu[*n$index++, --, B, sched_msg_word0]
	br[SCHEDULE_END#], defer[2]
	alu[*n$index++, --, B, sched_msg_word1]
	alu[*n$index++, --, B, sched_msg_word2]

WRITE_NN2_FULL#:

	#ifdef _DEBUG_COUNTERS_
	alu[@sched_out_nn_full, @sched_out_nn_full, +, 1]
	#endif

	br[WRITE_NN2#]

FIRST_ACTIVE_QUEUE#:
	// Add the very first queue to linked list
	ld_field_w_clr[active, 0011, enq_queue_addr]
	ld_field[active, 1100, enq_queue_addr, <<16]
	ld_field[*l$index0, 0001, enq_queue_addr, >>3]

	// Get dequeue queue id
	_scheduler_get_queue(sched_msg_word0, active)

WRITE_NN3#:
	br_inp_state[NN_FULL, WRITE_NN3_FULL#]

	// Take care of both enqueue and dequeue in this beat
	alu[*n$index++, --, B, sched_msg_word0]
	br[SCHEDULE_END#], defer[2]
	alu[*n$index++, --, B, sched_msg_word1]
	alu[*n$index++, --, B, sched_msg_word2]

WRITE_NN3_FULL#:

	#ifdef _DEBUG_COUNTERS_
	alu[@sched_out_nn_full, @sched_out_nn_full, +, 1]
	#endif

	br[WRITE_NN3#]

SCHEDULE_NO_ENQUEUE#:
	// No enqueue request. See if dequeue request can be made.
	immed[sched_msg_word1, 0]

	// Dequeue doesn't need sop/eop handles
	immed[sched_msg_word2, 0]
	immed[sched_msg_word0, 0]

	// get the dequeue queue id
	_scheduler_get_queue(sched_msg_word0, active)

	// Output from the above macro:
	//	if sched_msg_word0 = 0: there's no dequeue, skip writing to NN ring
	//	if sched_msg_word0 != 0: there is dequeue
	beq[SCHEDULE_END#]

WRITE_NN4#:
	br_inp_state[NN_FULL, WRITE_NN4_FULL#]

	alu[*n$index++, --, B, sched_msg_word0]

	br[SCHEDULE_END#], defer[2]

	alu[*n$index++, --, B, sched_msg_word1]
	alu[*n$index++, --, B, sched_msg_word2]

WRITE_NN4_FULL#:

	#ifdef _DEBUG_COUNTERS_
	alu[@sched_out_nn_full, @sched_out_nn_full, +, 1]
	#endif
	
	br[WRITE_NN4#]

SCHEDULE_END#:
.end
#endm


/////////////////////////////////////////////////////////////////////////////////////
//
// Scheduler Main Entry Point
//
/////////////////////////////////////////////////////////////////////////////////////

MAIN#:

br=ctx[ 0, SCHEDULER#]

// All other threads should simply abort

ctx_arb[kill]			

SCHEDULER#:
	.begin		
	.reg fc_count_mask cell_count_minus_one active max_pkts_in_flight tx_sequence
	.reg $tx_seq
	.sig MSF_SIGNAL
	.reg packets_transmitted

	#ifdef _DEBUG_COUNTERS_
	.reg @sched_out_nn_full
	immed[@sched_out_nn_full, 0]
	#endif

		// Execute in thread 0 in an infinite loop

		scheduler_init()
		
		immed[tx_sequence, TX_SEQUENCE_0]
		alu[packets_transmitted, --, B, 0]

		msf[read, $tx_seq, tx_sequence, 0, 1], ctx_swap [MSF_SIGNAL]

SCHEDULE_LOOP#:

		// Ctx arbing in the loop allows this ME to stop running which
		// is useful when debugging on hardware.
		#ifdef _DEBUG_COUNTERS_
		ctx_arb[voluntary]
		#endif
		
		.io_completed MSF_SIGNAL
		alu[packets_transmitted, $tx_seq, AND, 0xff]
		msf[read, $tx_seq, tx_sequence, 0, 1], sig_done [MSF_SIGNAL]
	
		//	Check if the FCIFIFO has data
		br_inp_state[FCI_NOT_EMPTY, FLOW_CONTROL#]	

		// Do 8 schedules before handling flow control and packets in flight
		scheduler()
		scheduler()

		.io_completed MSF_SIGNAL
		alu[packets_transmitted, $tx_seq, AND, 0xff]
		msf[read, $tx_seq, tx_sequence, 0, 1], sig_done [MSF_SIGNAL]

		scheduler()
		scheduler()

		.io_completed MSF_SIGNAL
		alu[packets_transmitted, $tx_seq, AND, 0xff]
		msf[read, $tx_seq, tx_sequence, 0, 1], sig_done [MSF_SIGNAL]

		scheduler()
		scheduler()

		.io_completed MSF_SIGNAL
		alu[packets_transmitted, $tx_seq, AND, 0xff]
		msf[read, $tx_seq, tx_sequence, 0, 1], sig_done [MSF_SIGNAL]

		scheduler()
		scheduler()
   
		br[SCHEDULE_LOOP#]

FLOW_CONTROL#:

		_scheduler_handle_flow_control()

WAIT_FOR_MSF#:
		ctx_arb [MSF_SIGNAL]

		// The packets transmitted are in the first byte 
		alu[packets_transmitted, $tx_seq, AND, 0xff]

		br[SCHEDULE_LOOP#]

.end


/////////////////////////////////////////////////////////////////////////////////////

#endif 		// __CSIX_SCHEDULER_UC__

/////////////////////////////////////////////////////////////////////////////////////

