/////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119 
//
//		
//		Change History
// 		--------------
//
// Date			Description											Whom
// ------------------------------------------------------------------------------------
//
// 11/11/01    	Ingress Scheduler for IXP2400  					Uday Naik                 
//                                                                      
/////////////////////////////////////////////////////////////////////////////////////

#ifndef __CSIX_SCHEDULER_UC__
#define __CSIX_SCHEDULER_UC__

/////////////////////////////////////////////////////////////////////////////////////

// header file with system constants

#include "dl_system.h"

// header file with constants for algorithm

#include "scheduler.h"

// include stdmac.uc in IXPblocks Portable library

#include "stdmac.uc"

// include localmem.uc in IXPblocks Portable library for read/write local memory

#include "localmem.uc"

// include the queue manager message handling code 

#include "scheduler_qm.uc"

// include the flow control code 

#include "scheduler_fc.uc"

// include the code for initializing scratch and next neighbor rings

#include "scheduler_init.uc"


/////////////////////////////////////////////////////////////////////////////////////

#define	PACKETS_IN_FLIGHT_PROCESSING

/////////////////////////////////////////////////////////////////////////////////////
//
//
//  This macro sets up local memory pointer 0 to point to the queue group structure 
//  of the queue group passed in 
//
//  inputs:		groupNumber	:	queue group number from 0..31
// 
//  instruction estimate: 2 cycles
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_setup_queue_group_struct(group_number)

.begin
	
	.reg 	lm_offset

	// 16 bytes per queue group structure. The structures start from 0

	alu_shf[lm_offset, --, B, group_number, <<4] 
	localmem_set_address(0, lm_offset, LM_HANDLE_0)

.end

#endm


/////////////////////////////////////////////////////////////////////////////////////
//
// Get the credit from the credit word 
//
// Inputs:
//			credit_word:		credit word from local memory
//
// Constants:
//
// 			ODD_OR_EVEN:	if 1 then queue number is odd else even. 
//			
// Outputs:
//			credit:			credit extracted from credit word
//
// Size:
//			1 instruction
//
/////////////////////////////////////////////////////////////////////////////////////

#define ODD_QUEUE   1
#define EVEN_QUEUE  0

#macro _scheduler_get_credit(credit, credit_word, ODD_OR_EVEN)

#if (ODD_OR_EVEN == ODD_QUEUE)

	// if queue number is odd then credit is in byte 2 of word

	alu_shf[credit, 0xff, AND, credit_word, >>16]

#else

	// if queue number is even then credit is in byte 0 of word

	alu[credit, 0xff, AND, credit_word]

#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// Set the credit in the credit word 
//
// Inputs:
//			credit:			credit for queue
//
// Constants:
//
// 			ODD_OR_EVEN:	if 1 then queue number is odd else even. 
//			
// Outputs:
//			credit_word:	credit word into which to insert credit 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_set_credit(credit_word, credit, ODD_OR_EVEN)

#if (ODD_OR_EVEN == ODD_QUEUE)

	// if queue number is odd then credit is in byte 2 of word

	ld_field[credit_word, 0100, credit, <<16]

#else

	// if queue number is even then credit is in byte 0 of word

	ld_field[credit_word, 0001, credit]

#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// Get the credit quantum from the credit word 
//
// Inputs:
//			creditWord:		credit word from local memory
//
// Constants:
// 
// 			ODD_OR_EVEN:	if 1 then queue number is odd else even. 
//
// Outputs:
//			creditQuantum:	credit quantum extracted from credit word
//
// Cont
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_get_credit_quantum(credit_quantum, credit_word, ODD_OR_EVEN)

#if (ODD_OR_EVEN == ODD_QUEUE)

	// if queue number is odd then credit quantum is in byte 3 of word

	alu_shf[credit_quantum, 0xff, AND, credit_word, >>24]

#else

	// if queue number is even then credit quantum is in byte 1 of word

	alu_shf[credit_quantum, 0xff, AND, credit_word, >>8]

#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// Inputs:   	queue_number		:	queue number 
//				minus_two			:	0xfffffffe in a register
// 				credit_word			:	current credit word
//				WAIT_SIGNAL			:	signal to wait on 
//
// Constants:	ODD_OR_EVEN			:   To differentiate odd or even queues
//
// Side effects: This macro assumes that l$index0 points to group structure and
// 				 l$index1 points to queue structure and updates both
//
// This macro adjusts the WRR credit for a queue. If the credit goes to zero it 
// resets the credit and updates the mask to move to the next queue. 
//
// Instruction Count: 10 instructions in the worst case  
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_adjust_credit(queue_number, minus_two, credit_word, ODD_OR_EVEN, \
								WAIT_SIGNAL)
.begin

	.reg 	credit
	.reg	queue_mask

	// extract the credit for this queue 

	_scheduler_get_credit(credit, credit_word, ODD_OR_EVEN)

	// decrement the credit 

	alu[credit, credit, -, 1]

	// Check if the credit is 0

	bne[ADJUST_CREDIT_DONE#] 
			
	// if we get here the credit for the queue is 0. So reset credit to 
	// credit quantum

	// Get the Credit Quantum

	_scheduler_get_credit_quantum(credit, credit_word, ODD_OR_EVEN)
		
	// compute update the new queue mask in local memory

	alu[--, queue_number, OR, 0]
	alu_shf[queue_mask, --, B, minus_two, <<indirect]

	// write it out to local memory 

	alu[*l$index0[LM_GROUP_MASK_INDEX], --, B, queue_mask]

ADJUST_CREDIT_DONE#:

	// swap out. 

	ctx_arb[WAIT_SIGNAL] , defer[2]

	// write out the credit 

	_scheduler_set_credit(credit_word, credit, ODD_OR_EVEN)			; defer 1
	
	// write the credit to local memory

	alu[*l$index1[LM_GROUP_EMPTY_VECTOR_INDEX], --, B, credit_word]	; defer 2

.end
#endm

/////////////////////////////////////////////////////////////////////////////////////////////////// 
//
// Scheduler main loop. This runs in one thread and handles the schedules. 
//
// When we exit this macro, we will always have scheduled a packet. If no data
// is available on any port, we will loop inside this same macro. This will ensure
// that every time we leave the macro, we are guaranteed to get a signal (deqSignal)
//
//
// Instructions: Takes 43 instuctions in the worst case		
// 
///////////////////////////////////////////////////////////////////////////////////////////////////

#macro scheduler(ring, deq_message, minus_two, queue_lm_base,  group_mask, \
				 DEQ_SIGNAL, WAIT_SIGNAL)

.begin

	.reg 	group_number
	.reg 	queue_number
	.reg 	master_vector
	.reg 	masked_master_vector
	.reg	queue_vector
	.reg	masked_queue_vector
	.reg	queue_id
	.reg	lm_queue_offset
	.reg	credit_word

START_SCHEDULE#:

#ifdef PACKETS_IN_FLIGHT_PROCESSING

	// First check the packets in flight. if that exceeds a certain limit we 
	// will not schedule anything for this beat

	alu[--, @packets_in_flight, -, MAX_IN_FLIGHT]

	bge[NO_SCHEDULE#]

#endif

	// compute master bit vector with data AND flowcontrol

	alu[master_vector, @root_empty_vector, AND, @root_flow_control_vector]

	// AND the mask in

	alu[masked_master_vector, master_vector, AND, group_mask]

	// find the eligible queue group

	ffs[group_number, masked_master_vector]

	// Check if any bit was set. If a bit was set, we are done

	bne[FOUND_GROUP#] 

	// If we get here either the group mask needs to be reset or the port
	// vector is empty or totally flow controlled. First try and find the 
	// group again resetting the mask to all 1's again. 

	ffs[group_number, master_vector]

	// If we still cant find a group then swap out. 

	beq[NO_SCHEDULE#] 

FOUND_GROUP#:

	// Set up the local memory offset to point to structure for queue group.
	// 3 cycle latency before this takes effect

	_scheduler_setup_queue_group_struct(group_number) 

	// compute new group mask 
			
	alu[--, group_number, OR, 0]
	alu_shf[group_mask, --, B, minus_two, <<indirect]	

	// 3 cycle latency for localmem csr write needs to finish

	nop

	// Now read the queue group parameters. Assume that the local memory offset has 
	// already been set up.
			
	alu[queue_vector, --, B, *l$index0[LM_GROUP_EMPTY_VECTOR_INDEX]] 	

	// AND the flow control and empty vectors. 

	alu[queue_vector,  queue_vector, AND, *l$index0[LM_GROUP_FC_VECTOR_INDEX]]
		
	// AND the queue mask in

	alu[masked_queue_vector, *l$index0[LM_GROUP_MASK_INDEX], AND, queue_vector]

	// find the eligible queue

	ffs[queue_number, masked_queue_vector]

	// check if no bit is set, in that case we need to recompute the ffs. 

	bne[FOUND_QUEUE#] 
	
	// If we get here, then ffs failed. So the queue mask must be 0
	// since none of the others can be zero or we would not have scheduled
	// the group. Recompute ffs straight from temp. We are gauranteed to 
	// find a queue 

	ffs[queue_number, queue_vector]

FOUND_QUEUE#:
	
	// Recompute local memory offset for queue 

	alu_shf[queue_id, queue_number, OR, group_number, <<NUMBER_OF_BITS_FOR_GROUP]
	alu_shf[lm_queue_offset, --, B, queue_id, <<1] 
	alu[lm_queue_offset, lm_queue_offset, +, queue_lm_base]

CHECK_RING_FULL#:

	// check if the ring is full

	br_inp_state[RING_FULL_VALUE, RING_IS_FULL#] , defer[2] 

	// Set up the local memory offset. 3 cycle latency

	localmem_set_address(0, lm_queue_offset, LM_HANDLE_1)		; defer 1

	// create the message which is basically just the queue id with MSB bit set

	alu_shf[deq_message, queue_id, OR, 1,  <<31]				; defer 2

	// find the lsb bit of queue number and store in a register

	alu[--, queue_number, AND, 1] 

	// write the data on the scratch ring. Inserted in middle to reduce branch penalty

	scratch[put, deq_message, 0, ring, 1] , sig_done[DEQ_SIGNAL]
	
	beq[EVEN_QUEUE_NUMBER#], defer[1]

	// Read the credit word

	alu[credit_word, --, B, *l$index1]	; defer 1

    // adjust the credit and swap out in this macro 

	_scheduler_adjust_credit(queue_number, minus_two, credit_word, ODD_QUEUE, \
							 WAIT_SIGNAL)

	// Exit from the macro 

	br [SCHEDULE_DONE#] , defer[1]

	// update the packetsScheduled

	alu[@packets_scheduled, @packets_scheduled, + , 1]	; defer 1

RING_IS_FULL#:

	// ring is full. So we swap out and wait for the ring to have space

	ctx_arb[voluntary] , br[CHECK_RING_FULL#]

NO_SCHEDULE#:

	// If we get here then no queue is ready to send or in flight count is exceeded 
	// so we swap out and then loop to the top of the macro

	ctx_arb[voluntary] 	, br [START_SCHEDULE#] 
	
EVEN_QUEUE_NUMBER#:

 	// adjust the credit and swap out in this macro 

	_scheduler_adjust_credit(queue_number, minus_two, credit_word, EVEN_QUEUE, \
							 WAIT_SIGNAL)

	// update the packetsInFlight

	alu[@packets_scheduled, @packets_scheduled, + , 1]
	

SCHEDULE_DONE#:

.end
#endm

/////////////////////////////////////////////////////////////////////////////////////
//
//
// _scheduler_read_tbufs_transmitted
//
//	  Read the number of c-frames or TBUFS transmitted from the MSF in an 
//	  infinite loop
//
//
//  
//  Instruction estimate: 7 cycles in loop
// 
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_read_tbufs_transmitted()

.begin

	.sig			MSF_SIGNAL			// signal for msf read
	.reg 			$tx_seq 			// transfer register for reading tx sequence # register
	.reg			tx_sequence			// stores offset into MSF registers for Tx sequence reg
	.reg			packets_transmitted	// number of packets transmitted

	// load the start address for the ingress flow control fifo 

	immed[tx_sequence, TX_SEQUENCE_0]

	// first time it swap outs immediately

	ctx_arb[voluntary]


READ_SEQUENCE#:
	
	// Read the TX sequence number register

	msf[read, $tx_seq, tx_sequence, 0, 1], ctx_swap [MSF_SIGNAL], defer [1]
		
	// Mask off all but the lowest byte to account for wrap around on 256 

	alu[@packets_in_flight, @packets_in_flight, AND, 0xff]	; defer 1


	// start the loop again

	br[READ_SEQUENCE#] , defer [2]
		
	// The packets transmitted are in the first byte 

	alu[packets_transmitted, $tx_seq, AND, 0xff]	; defer 1

	// Subtract packets xmited from packets scheduled 

	alu[@packets_in_flight, @packets_scheduled, - , packets_transmitted];  defer 2
		
.end
#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// Scheduler Main Entry Point
//
/////////////////////////////////////////////////////////////////////////////////////

MAIN#:

br=ctx[ 0, SCHEDULER#]
br=ctx[ 1, QM_HANDLER#]
br=ctx[ 2, FLOW_CONTROL#]
br=ctx[ 3, PACKETS_IN_FLIGHT#]

// All other threads should simply abort

ctx_arb[kill]			

SCHEDULER#:

	.begin 

		.reg	minus_two
		.reg	group_mask
		.reg	ring
		.reg	queue_lm_base
		.reg	$deq1, $deq2, $deq3, $deq4
		.sig    DEQ_SIG1, DEQ_SIG2, DEQ_SIG3, DEQ_SIG4
		

		// Execute in thread 0 in an infinite loop

		scheduler_init[ring, minus_two, queue_lm_base, group_mask]
		
		// The Scheduler Macro does a scratch write. We dont want to wait for this
		// scratch write to finish. We exit the macro right away. The problem with
		// this is that the assembler has no way of knowing when the transfer 
		// register is safe to reuse. To work around that, we call the scheduler
		// macro with a different transfer register and signal each time. We also
		// pass it a signal to use in the ctx_arb. The first three times, we dont
		// sleep on any signal. Then every time, we sleep on the signal for the 
		// scratch write from 3 iterations ago.


SCHEDULE_START#:		

		scheduler(ring, $deq1, minus_two, queue_lm_base,  group_mask, DEQ_SIG1, \
				  voluntary)

		scheduler(ring, $deq2, minus_two, queue_lm_base,  group_mask, DEQ_SIG2, \
				  voluntary)

		scheduler(ring, $deq3, minus_two, queue_lm_base,  group_mask, DEQ_SIG3, \
				  voluntary)

		scheduler(ring, $deq4, minus_two, queue_lm_base,  group_mask, DEQ_SIG4, \
				  deq_sig1)

SCHEDULE_LOOP#:

		scheduler(ring, $deq1, minus_two, queue_lm_base,  group_mask, deq_sig1, \
				  deq_sig2)

		scheduler(ring, $deq2, minus_two, queue_lm_base,  group_mask, deq_sig2, \
				 deq_sig3)

		scheduler(ring, $deq3, minus_two, queue_lm_base,  group_mask, deq_sig3, \
				  deq_sig4)

		scheduler(ring, $deq4, minus_two, queue_lm_base,  group_mask, deq_sig4, \
				 deq_sig1)

SCHEDULE_LOOP_END#:

		br[SCHEDULE_LOOP#]

	.end

QM_HANDLER#:

	// Execute in thread 1 in an infinite loop

	_scheduler_qm_message_handler()

FLOW_CONTROL#:

	// Execute thread 2 in an infinite loop

	_scheduler_handle_flow_control()

PACKETS_IN_FLIGHT#:

	// Read Tbufs transmitted in an infinite loop

	_scheduler_read_tbufs_transmitted()

/////////////////////////////////////////////////////////////////////////////////////

#endif 		// __CSIX_SCHEDULER_UC__

/////////////////////////////////////////////////////////////////////////////////////

