////////////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119 
//
//		
//		Change History
// 		--------------
//
// Date			Description											Whom
// ------------------------------------------------------------------------------------
//
// 11/11/01    	Ingress Scheduler 								Uday Naik                 
//                                                                      
////////////////////////////////////////////////////////////////////////////////////////////

#include <ixp.h>

#define extern 

// header file with constants for algorithm

#include "scheduler.h"

////////////////////////////////////////////////////////////////////////////////////////////

#define	PACKETS_IN_FLIGHT_PROCESSING

//////////////////////////////////////////////////////////////////////////////////////////// 
//
// Get the credit quantum for a given queue. This should be read from a SRAM control block.
// For now we will simply set the credit to 1 for odd queues and 2 for even queues
//
////////////////////////////////////////////////////////////////////////////////////////////

#ifdef WORKAROUND

#define GET_CREDIT(queueId)	0x01010202

#else
	
#define GET_CREDIT(queueId)	((queueId & 1) ? 1 : 2)

#endif

////////////////////////////////////////////////////////////////////////////////////////////

#ifdef UNIT_TEST

void InitScratchRing()				
{																
																		
	SIGNAL	sig_cw1, sig_cw2, sig_cw3;												
																		
	__declspec(sram_write_reg) uint32_t base;						
	__declspec(sram_write_reg) uint32_t head;						
	__declspec(sram_write_reg) uint32_t tail;						
																		
	// set head and tail to 0											
																		
	head = tail = 0;													
																		
	// set base and size. size is in bits 30 and 31						
																		
	base = INGRESS_SCHEDULER_DEQUEUE_RING_BASE | (INGRESS_SCHEDULER_DEQUEUE_RING_SIZE << 30);								
																		
	cap_write(&base, INGRESS_SCHEDULER_DEQUEUE_RING_BASE_ID, 1, sig_initiator, sig_done, &sig_cw1);	
	cap_write(&head, INGRESS_SCHEDULER_DEQUEUE_RING_HEAD_ID, 1, sig_initiator, sig_done, &sig_cw2);	
	cap_write(&tail, INGRESS_SCHEDULER_DEQUEUE_RING_TAIL_ID, 1, sig_initiator, sig_done, &sig_cw3);	
																		
	// wait for all signals												
																		
	wait_for_all(&sig_cw1, &sig_cw2, &sig_cw3);										
																		
}													

#endif

////////////////////////////////////////////////////////////////////////////////////////////
//
// InitRings()
//
// Initialize the scratch ring used to send dequeue requests to the Queue Manager
// and the next neighbor ring used to receive QM messages from the Queue Manager
//
////////////////////////////////////////////////////////////////////////////////////////////

INLINE void InitRings(void)
{


#ifdef UNIT_TEST

	InitScratchRing();
	
#endif
	
	//
	// Set up the CTX_ENABLES local csr for NN ring 
	// 
	// bit 20 NN_MODE = 0 : next neighbor register are written
	//                                      by previous ME 
	//
	// bits [19:18] NN_RING_EMPTY = 0 : NN_EMPTY asserts when
	//                                                  NN_PUT == NN_GET
	//													(default)
	// bits [15:8] CTX enables for contexts 0 to 7
	//
		
   local_csr_write(local_csr_ctx_enables, 0xff00); 	

}

////////////////////////////////////////////////////////////////////////////////////////////
//
// SendSignal()
//
// Send a signal to a specific thread 
//
////////////////////////////////////////////////////////////////////////////////////////////

INLINE void SendSignal(int context, uint32_t signalNumber)
{

	uint32_t value;

	// set up the signal in bits 3..6 and the context in bits 0..2

	value = context | (signalNumber << 3);
	
	// write into local csr to send a signal 
		
	local_csr_write(local_csr_same_me_signal, value); 	

}

//////////////////////////////////////////////////////////////////////////////////////////// 
//
// Initialization routine for Scheduler
//
////////////////////////////////////////////////////////////////////////////////////////////


INLINE void _schedule_init(void)
{

	int 										i;
	__declspec(local_mem shared aligned(16)) 	queue_group_t*  pQueueGroup;
	__declspec(local_mem shared) 				queue_t* 		pQueue;

	// Set up the registers for next neighbor and scratch rings

	InitRings();

	// set packets scheduled , packets in flight and root empty vector to 0

	globalPacketsInFlight = globalPacketsScheduled = globalRootEmptyVector = 0;

	// Initialize the root flow control vector to all 1's. Flow control is off
	// on every queue group

	globalRootFlowControlVector = 0xffffffff;

	// initialize the queue group structures

	for (i = 0; i < NUMBER_OF_QUEUE_GROUPS; i++)
	{
		
		// get a pointer to the queue group structure 

		pQueueGroup = &globalQueueGroups[i];

		// initialize the structure 

		pQueueGroup->emptyVector 		 = 0;
		pQueueGroup->flowControlVector = 0xffffffff;
		pQueueGroup->mask				 = 0xffffffff;
		
	}

#ifndef WORKAROUND

	// initialize the queue structures 

	for (i = 0; i < (NUMBER_OF_QUEUE_GROUPS * NUMBER_OF_QUEUES_PER_GROUP); i++)
	{

		pQueue = &globalQueues[i];
		pQueue->currentCredit = pQueue->creditIncrement = GET_CREDIT(i);

	}

#else

	// initialize the queue structures 

	for (i = 0; i < (NUMBER_OF_QUEUE_GROUPS * NUMBER_OF_QUEUES_PER_GROUP) / 2; i++)
	{
		globalQueues[i] = GET_CREDIT(i);
	}

#endif


}



////////////////////////////////////////////////////////////////////////////////////////////
//
//
// ReadTbufsTransmitted
//
//	  Read the number of c-frames or TBUFS transmitted from the MSF in an infinite loop
// 
//
////////////////////////////////////////////////////////////////////////////////////////////

INLINE void SchedulerReadTbufsTransmitted(void)
{

	__declspec(sram_read_reg)  uint32_t 	tx_sequence;
	SIGNAL										msf_signal;
	unsigned	int								packets_transmitted;

	// first time it swap outs immediately waiting for the scheduler thread to finish
	// initialization

	wait_for_all(&sig_initDone);

	// In an infinite loop compute the packets in flight

	while (1)
	{

		// define the start address for the ingress flow control fifo 

		#define	TX_SEQUENCE_OFFSET		0x60
	
		// Read the TX sequence number register

		msf_read(&tx_sequence, (volatile void*) TX_SEQUENCE_OFFSET, 1, ctx_swap, &msf_signal);

		// The packets transmitted are in the first byte 

		packets_transmitted =  tx_sequence & 0xff;

		// Subtract packets xmited from packets scheduled. Mask all upper bytes to
		// account for wrap around on 256

		globalPacketsInFlight = (globalPacketsScheduled - packets_transmitted) & 0xff;

	}

}


///////////////////////////////////////////////////////////////////////////////////////////// 
//
// Scheduler main loop. This runs in one thread and handles the schedules. 
//
// When we exit this macro, we will always have scheduled a packet. If no data
// is available on any port, we will loop inside this same macro. This will ensure
// that every time we leave the macro, we are guaranteed to get a signal (deqSignal)
//	
// 
/////////////////////////////////////////////////////////////////////////////////////////////

INLINE void Schedule()
{

	uint32_t	queueGroupNumber;
	uint32_t 	queueNumber;
	uint32_t 	masterVector;
	uint32_t 	maskedMasterVector;
	uint32_t 	maskedQueueVector;
	uint32_t 	queueVector;
	uint32_t 	queueId;
	uint32_t 	groupMask;
	uint32_t 	creditWord;
	uint32_t	credit;
	SIGNAL		sig_scratch;

	__declspec(sram_write_reg)  uint32_t 		deqMessage;
	__declspec(local_mem shared aligned(16)) 	queue_group_t* pQueueGroup;
	__declspec(local_mem shared) 				queue_t* 		pQueue;

	// set the mask for round robin scheduling to all 1's 

	groupMask = 0xffffffff;

	// initialize the scheduler 

	_schedule_init();

	// Send a signal to the other threads

	SendSignal(1, __signal_number(&sig_initDone));
	SendSignal(2, __signal_number(&sig_initDone));
	SendSignal(3, __signal_number(&sig_initDone));

	// In an infinite loop schedule cframes 

	while (1)
	{

#ifdef PACKETS_IN_FLIGHT_PROCESSING
		
		// First check the packets in flight. if that exceeds a certain limit we 
		// will not schedule anything for this beat

		if (globalPacketsInFlight >= MAX_IN_FLIGHT) {
			
			ctx_swap();
			continue;

		}

#endif

		// compute master bit vector with data AND flowcontrol

		masterVector = globalRootEmptyVector & globalRootFlowControlVector;

		// AND the mask in

		maskedMasterVector = masterVector & groupMask;

		// find the eligible queue group

		if (maskedMasterVector)
			queueGroupNumber = ffs(maskedMasterVector);
		else {
		
			// If we get here either the group mask needs to be reset or the port
			// vector is empty or totally flow controlled. First try and find the 
			// group again resetting the mask to all 1's again. 
		
			if (masterVector)
				queueGroupNumber = ffs(masterVector);
			else {

				ctx_swap();
				continue;

			}

		}
		
		// get a pointer to the queue group structure 

		pQueueGroup = &globalQueueGroups[queueGroupNumber];

		// compute new group mask 
				
		groupMask = (0xfffffffe << queueGroupNumber);

		// Now read the queue group parameters. 

		queueVector = (pQueueGroup->emptyVector) & (pQueueGroup->flowControlVector);
		
		// AND the queue mask in

		maskedQueueVector = queueVector & pQueueGroup->mask;

		if (maskedQueueVector)
			queueNumber = ffs(maskedQueueVector);
		else {
	
			// If we get here, then ffs failed. So the queue mask must be 0
			// since none of the others can be zero or we would not have scheduled
			// the group. Recompute ffs straight from temp. We are gauranteed to 
			// find a queue 

			queueNumber = ffs(queueVector);

		}

		// get the actual queue id for the dequeue request 

		queueId = queueNumber | (queueGroupNumber <<NUMBER_OF_BITS_FOR_GROUP);
	

		// check if the dequeue request scratch ring is full

		while (inp_state_test(INGRESS_SCHEDULER_DEQUEUE_RING_FULL_VALUE)) 
			ctx_swap();

		// create the message which is basically just the queue id with MSB bit set

		deqMessage = queueId | (1 << 31);

		// write the data on the scratch ring. We dont wait for it to finish 

		scratch_put_ring( &deqMessage, \
						  (volatile __declspec(scratch) void*) (INGRESS_SCHEDULER_DEQUEUE_RING_ID << 2), \
						  1, sig_done, &sig_scratch);

#ifndef WORKAROUND

		// get a pointer to the queue structure 

		pQueue = &globalQueues[queueId];
	
		// decrement the wrr credit 

		pQueue->currentCredit--;

		// check if credit is 0

		if (pQueue->currentCredit == 0)	{

			// if we get here the credit for the queue is 0. So reset credit to 
			// credit quantum and move on to the next queue by shifting the mask

			pQueue->currentCredit = pQueue->creditIncrement;
		
			// compute and update the new queue mask in local memory

			pQueueGroup->mask = (0xfffffffe << queueNumber);

		}	

#else

		creditWord = globalQueues[queueId/2];

		// check if the queue id is odd or even 

		if ((queueId & 0x1) == 0)
		{

			// get the credit (lsb byte) and decrement it 

			credit = (creditWord & 0xff) - 1;

			// check if the credit has gone to zero

			if (credit == 0)
			{
				
				// reset credit to credit quantum

				credit = (creditWord >> 8) & 0xff;

				// compute and update the new queue mask in local memory

				pQueueGroup->mask = (0xfffffffe << queueNumber);
			
			}

			// set the credit into the credit word
			
			globalQueues[queueId/2] = (creditWord & ~0xff) | credit;
			
		}
		else
		{

			// get the credit (byte 3) and decrement it 

			credit = ((creditWord >> 16) & 0xff) - 1;

			// check if the credit has gone to zero

			if (credit == 0)
			{
				
				// reset credit to credit quantum

				credit = (creditWord >> 24) & 0xff;

				// compute and update the new queue mask in local memory

				pQueueGroup->mask = (0xfffffffe << queueNumber);
			
			}

			// set the credit into the credit word
			
			globalQueues[queueId/2] = (creditWord & 0xffffff) | credit;
			
		}

#endif

		// update the globalPacketsScheduled

		globalPacketsScheduled++;

		// swap out 

		ctx_wait(voluntary);

	}	// end while (1)

}


////////////////////////////////////////////////////////////////////////////////////////////
// 
// main program for the scheduler
//
////////////////////////////////////////////////////////////////////////////////////////////

void main(void )
{

	uint32_t 	context;		// the thread number 

	// Get current context.

	context = (local_csr_read(local_csr_active_ctx_sts)) & 0x7;

	// Switch based on thread number. Each function is an infinite loop 

	switch (context)  {

		case 0:	
			
				// schedule cframes

				Schedule();
				break;

		case 1:
				
				// handle queue manager messages

				SchedulerQmMessageHandler();
				break;

		case 2:
				
				// handle flow control cframes

				SchedulerHandleFlowControl();
				break;

		case 3:

				// read the tbufs transmitted for packets in flight processing

				SchedulerReadTbufsTransmitted();
				break;

		default:

				// all other threads simply abort 

				ctx_wait(kill);

	}

}

/////////////////////////////////////////////////////////////////////////////////////////////


