/********************************************************************************************
 *                                                                      
 *                  I N T E L   P R O P R I E T A R Y                   
 *                                                                      
 *     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
 *    RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
 *    BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
 *    RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
 *    LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
 *    MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
 *    THE PRIOR WRITTEN PERMISSION OF :                                
 *                                                                     
 *                       INTEL  CORPORATION                            
 *                                                                    
 *                    2200 MISSION COLLEGE BLVD                        
 *                                                                     
 *              SANTA  CLARA,  CALIFORNIA  95052-8119                  
 *                                                                
 *
 *******************************************************************************************/

#ifndef __SCHEDULER_H__
#define __SCHEDULER_H__

#include <dl_system.h>

/******************************************************************************************/
/*
 *	ISO C99: 7.18 Integer types <stdint.h> : For now define them here
 */

#ifndef _STDINT_H
#define _STDINT_H   1

/* Exact integral types.  */
/* Signed.  */

typedef signed char     int8_t;
typedef short int       int16_t;
typedef int             int32_t;
typedef long long int   int64_t;

/* Unsigned.  */

typedef unsigned char           uint8_t;
typedef unsigned short int      uint16_t;
typedef unsigned int            uint32_t;
typedef unsigned long long int  uint64_t;

#endif /* _STDINT_H */.

#ifndef INLINE
#define INLINE __forceinline
#endif

#define WORKAROUND

/*******************************************************************************************
 *
 * Constants for the algorithm. These should be replaced with import vars to make the code
 * more configurable
 *
 *******************************************************************************************/

/* Number of queue groups (first level of hierarchical bit vector */

#define 		NUMBER_OF_QUEUE_GROUPS       32

/* Number of queues per group */

#define 		NUMBER_OF_QUEUES_PER_GROUP   32


/* The queue group and queue in the group are encoded into one id. LSB 5 bits are used for 
 * queue and then 5 bits for queue group id
 */

#define 		NUMBER_OF_BITS_FOR_QUEUE	5

/* bits for queue group */

#define 		NUMBER_OF_BITS_FOR_GROUP	5

/* location in local memory where the data structures per queue are stored. The queue
 * structures are stored after the bit vectors per queue group. This is computed by 
 * using 16 bytes per queue group 
 */

#define		    QUEUE_LM_BASE               (NUMBER_OF_QUEUE_GROUPS << 4)

/*
 * Maximum number of packets (C-Frames actually) in flight
 */

#define 		MAX_IN_FLIGHT				32


/********************************************************************************************
 *
 * Global Data Structures
 *
 *******************************************************************************************/

/*
 *	RootEmptyVector				: Root vector with 1 bit per queue group. If that bit is 1,
 *  							  then the queue group has data.
 */

extern __declspec(shared) uint32_t globalRootEmptyVector;		

/*
 *	RootFlowControlVector		: Root vector with 1 bit per queue group. If that bit is 1,
 *							      then the queue group is not flow controlled
 */

extern __declspec(shared) uint32_t globalRootFlowControlVector;	
				

/*
 *	PacketsInFlight				: Number of c-frames in flight (scheduled but not transmitted)
 */

extern __declspec(shared) uint32_t globalPacketsInFlight;

/*
 *	PacketsScheduled				: Number of c-frames scheduled by scheduler 
 */

extern __declspec(shared) uint32_t globalPacketsScheduled;	

/*
 *	Signal for initial synchronization between threads
 */

SIGNAL sig_initDone;		

/********************************************************************************************
 *
 *
 *	For each queue group there are 4 words in local memory. (32 * 4 =  128 words)
 *
 *	emptyVector 					: Bit vector indicating which queues have data 
 *
 *	flowControlVector				: Bit vector indicating which queues are not flow controlled
 *
 *	mask							: Mask used to Round Robin among queues
 *
 *	Reserved						: Reserved
 *
 *******************************************************************************************/

typedef struct __queue_group_struct 
{

	uint32_t 	emptyVector;
	uint32_t	flowControlVector;
	uint32_t	mask;
	uint32_t	reserved;

} queue_group_t;

/* define an array of queue groups in local memory */

extern __declspec(local_mem shared aligned(16)) queue_group_t 
												globalQueueGroups[NUMBER_OF_QUEUE_GROUPS];

/*******************************************************************************************
 *
 *	For each queue there is 2 bytes in local memory (1024 * 1*2 = 512 words)
 *
 *	queueCurrentCredit			: Current Credit for the queue  (1 byte)
 *
 *	queueCreditIncrement        : The credit increment to be given to a queue at the end of
 *                                every round (1 byte)
 *
 *	Since we are doing WRR, the credit is in # of c-frames
 *
 *******************************************************************************************/

#ifndef WORKAROUND

typedef struct __queue_struct 
{

	uint8_t currentCredit;
	uint8_t creditIncrement;

} queue_t;

/* define an array of queue structures in local memory */

extern __declspec(local_mem shared) queue_t		 globalQueues[NUMBER_OF_QUEUE_GROUPS \
													* NUMBER_OF_QUEUES_PER_GROUP];

#else

typedef uint32_t queue_t;

/* since compiler does not do well with byte structures, we will store 2 WRR credits
 * in each words in the format above and do the byte access ourselves
 */

extern __declspec(local_mem shared) queue_t \
			globalQueues[(NUMBER_OF_QUEUE_GROUPS * NUMBER_OF_QUEUES_PER_GROUP) / 2];

#endif

/********************************************************************************************
 *
 * There are 64 ports and 16 classes per port. Ideally we would do Round Robin among ports 
 * and WRR on the queues in a port. This would require us to keep 2 32 bit parent vectors 
 * each with 32 16-bit vectors at the next level. 
 *
 * We have flattened this out. We keep one 32 bit vector for the parent and  32 32-bit 
 * vectors at the next level. This means that in each 32 bit vector on the leaf level, there
 * are queues from 2 ports. To keep the scheduling property the same as the one stated above
 * we will do the following. The sum of the credit quantums for the 16 queues on each of the
 * 2 ports must be the same. For e.g. if port 1 and 2 share the 32 bit vector, then the sum 
 * of the total credit for port 1 must equal the sum of total credit for port 2. This ensures 
 * that we are doing round robin among port 1 and port 2
 *
 ********************************************************************************************/ 

 /* function prototypes */

/********************************************************************************************/

extern void SchedulerHandleFlowControl(void);
extern void SchedulerQmMessageHandler(void);

/********************************************************************************************/

#endif

/********************************************************************************************/

