/////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
/////////////////////////////////////////////////////////////////////////////////////
//
// 		File Name: scheduler_qm.uc
// 
// 		Purpose:	  Macros for Egress QM message handling 
//
/////////////////////////////////////////////////////////////////////////////////////
//
// 		History:
//
// 		Date			Comment										By
//		---------------------------------------------------------------------------
//
//		03/17/02		Created										Uday Naik
//
/////////////////////////////////////////////////////////////////////////////////////


#ifndef __EGRESS_SCHEDULER_QM_UC__
#define __EGRESS_SCHEDULER_QM_UC__


/////////////////////////////////////////////////////////////////////////////////////
//
//
// The QM sends messages either on a scratch ring or a NN ring. Each message is 2 
// words. First word is for enqueue, the next for dequeue 
//
// Each word is as follows
//
//						--	Bit 31 : Valid bit. If 0, then dont use this word
//						--	Bit 30 : Enqueue/Dequeue Transition bit
//						--	Bit 29 : Invalid dequeue 
//						--	Bits 27..20 : packet length in CHUNK_SIZE units
//						--  Bits 19..0  : Queue Id
//
// For each beat, there may be an enqueue transition AND a dequeue transition in the
// worst case. 
//
//
/////////////////////////////////////////////////////////////////////////////////////

// Bit number for detecting enqueue transition messages (empty queue getting data)

#define QM_TRANSITION_BIT           30

// Bit number for detecting valid messages - if set message is valid

#define QM_VALID_MESSAGE_BIT	    31

// Bit number for invalid dequeues

#define QM_INVALID_DEQUEUE_BIT		29

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_get_packet_length()
//
// Description:
// 	
//		Get the packet length from the Queue Manager Message, bits 20 
//
// Outputs: 
//
//		out_packet_length:		return length of the packet
//
// Inputs:  
//
//		in_message:				QM Message
//
//
// Size: 
//
//		1 instruction
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_qm_get_packet_length(out_packet_length, in_message)

	alu_shf[out_packet_length, 0xff, AND, in_message, >>20]

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_get_port_number()
//
// Description:
// 	
//		Get the port number from the Queue Manager Message, bits 4..7 
//
// Outputs: 
//
//		out_port_number:		port number from 0..15
//
// Inputs:  
//
//		in_message:				QM Message
//
//
// Size: 
//
//		1 instruction
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_qm_get_port_number(out_port_number, in_message)

	alu_shf[out_port_number, 0xf, AND, in_message, >>4]

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_get_queue_number()
//
// Description:
// 	
//		Get the queue number from the Queue Manager Message, bits 0..3 
//
// Outputs: 
//
//		out_queue_number:		queue number from 0..15 
//
// Inputs:  
//
//		in_message:				QM Message
//
// Size: 
//
//		1 instruction
//
/////////////////////////////////////////////////////////////////////////////////////


#macro _scheduler_qm_get_queue_number(out_queue_number, in_message)

	alu[out_queue_number, 0xf, AND, in_message]

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_get_port_lm_offset()
//
// Description:
// 	
//		Get the local memory offset for the port in the QM message
//
// Outputs: 
//
//		out_port_lm_offset:		local memory offset for port in QM message
//
// Inputs:  
//
//		in_message:				QM Message
//		in_port_lm_mask			Mask used to extract offset from QM message 
//
// Size: 
//
//		1 instruction
//
/////////////////////////////////////////////////////////////////////////////////////


#macro _scheduler_qm_get_port_lm_offset(out_port_lm_offset, in_message, in_port_lm_base, in_port_lm_mask) 

 	// The port number is bits 4..7. Each entry is 32 bytes (left shift of 5). 
	// Combining the two gives us a left shift of 1 and a mask of 0x1e0 for 
	// zeroing out other bits

	alu_shf[out_port_lm_offset, in_port_lm_mask, AND, in_message, <<1 ]
#ifdef PORT_LM_BASE_NOT_ZERO	
	// Add the local memory base
	alu[out_port_lm_offset, out_port_lm_offset, +, in_port_lm_base]
#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_get_queue_lm_offset[]
//
// Description:
// 	
//		Get the local memory offset for the queue in the QM message
//
// Outputs: 
//
//		out_queue_lm_offset:		local memory offset for queue in QM message
//
// Inputs:  
//
//		in_message:					QM Message
//		in_queue_lm_mask			Mask used to extract offset from QM message 
//		in_queue_lm_base			Base in local memory for queue data structure
//
// Size: 
//
//		2 instruction
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_qm_get_queue_lm_offset(out_queue_lm_offset, in_message, \
										in_queue_lm_base, in_queue_lm_offset_mask)

	// compute the queue local memory offset. We use the queue id (0..255) and
	// multiply it by size of each entry (8 bytes) to compute the local memory 
	// offset. queue id is in first 8 bits. We left shift by 3 for 8 byte multiply
	// and that puts the lm offset in the first 11 bits

	alu_shf[out_queue_lm_offset, in_queue_lm_offset_mask, AND, in_message, <<3]

	// Add the local memory base

	alu[out_queue_lm_offset, out_queue_lm_offset, +, in_queue_lm_base] 	

#endm


/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_handle_enq_transition()
//
// Description:
// 	
//		Handle an enq transition message from the QM
//
// Inputs:  
//
//		in_message:				QM Message
//		in_port_lm_mask			Mask used to extract port lm offset from QM message 
//
// Size: 
//
//		10 instruction
//
/////////////////////////////////////////////////////////////////////////////////////


#macro _scheduler_qm_handle_enq_transition[in_message, in_port_lm_mask]

.begin

	.reg 	port_lm_offset		// offset in local memory for port data structure
	.reg	queue_number		// queue number in enq transition message
	.reg	queue_bitmask		// temporary variable storing 1 << queue_number
	.reg	port_number			// port number in enq transition message
	.reg	port_bitmask		// temporary variable storing 1 << port_number

	// compute queue group/port structure local memory offset

	_scheduler_qm_get_port_lm_offset(port_lm_offset, in_message, port_lm_base, in_port_lm_mask) 

	// Set up port offset index into local memory. 3 cycle latency for this to take
	// effect. 

	localmem_set_address(0, port_lm_offset, LM_HANDLE_1)

	// compute the queue number within the port/queue group

	_scheduler_qm_get_queue_number(queue_number, in_message)

	// compute 1 << queueNumber

	alu[--, queue_number, OR, 0]
	alu_shf[queue_bitmask, -- , B, 1, <<indirect]

	// compute the queue group number from 0..31

	_scheduler_qm_get_port_number(port_number, in_message)

	// compute 1 << port number. Cant use global in al_shf instruction

	alu[--, port_number, OR, 0]
	alu_shf[port_bitmask, -- , B, 1, <<indirect]

	// Set the bit for this queue in the queue empty vector.for the group 

	alu[*l$index1[PORT_QUEUE_EMPTY_VECTOR_INDEX], \
		*l$index1[PORT_QUEUE_EMPTY_VECTOR_INDEX] , OR , queue_bitmask]

	// Also if the bit for this group was 0 in the parent vector, we need to set it.
	// Rather than check for the bit being 0, we will simply set the bit	
	
	alu[ @port_empty_vector, @port_empty_vector, OR, port_bitmask]

.end		
#endm

/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_handle_deq_transition[]
//
// Description:
// 	
//		Handle an enq transition message from the QM
//
// Inputs:  
//
//		in_message:					QM Message
//		in_queue_bitmask			1 << queue_number 
//
// Size: 
//
//		6 instruction
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_qm_handle_deq_transition[in_message, in_queue_bitmask]

.begin

	.reg port_number		// port number for deq transition in qm message
	.reg port_bitmask 		// 1 << port_number

	// clear the bit in the empty vector

	alu[*l$index1[PORT_QUEUE_EMPTY_VECTOR_INDEX], \
		*l$index1[PORT_QUEUE_EMPTY_VECTOR_INDEX], AND~, in_queue_bitmask]

	// if the bit vector is now zero then clear the bit in the parent

	bne[HANDLE_DEQUEUE_DONE#] 
	
	// compute the port/queue group number 

	_scheduler_qm_get_port_number(port_number, in_message)

	// compute 1 << port_number

	alu[--, port_number, OR, 0]
	alu_shf[port_bitmask, --, B, 1, <<indirect]

	// Now clear the bit in the global empty vector.

	alu[@port_empty_vector, @port_empty_vector, AND~, port_bitmask]

HANDLE_DEQUEUE_DONE#:

.end
#endm


/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_adjust_credit[]
//
// Description:
// 	
//		Adjusts the DRR credit for a queue. If queue is negative it marks it as non 
//		schedulable and gives it another round of credit. If all queues are not 
//		schedulable, the DRR round is over and all queues are again schedulable
//
// Outputs: 
//
//								None
// Inputs:  
//
//		in_packet_length:			length of the current packet 
//		in_queue_bitmask			1 << queue_number 
//		in_q_mask					all 1s stored in a register
//
// Constants
//
//		EXIT_LABEL 					Label to branch to 
//
// Size: 
//
//		8 instruction 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_qm_adjust_credit(in_packet_length, in_queue_bitmask, \
								   in_q_mask, EXIT_LABEL)

.begin 

	.reg	credit_increment		// credit increment for queue
	.reg	schedule_vector			// schedule vector for port

	// decrement the credit. 

	alu[*l$index0[QUEUE_CURRENT_CREDIT_INDEX],*l$index0[QUEUE_CURRENT_CREDIT_INDEX],\
		-, in_packet_length]

	// If credit is still positive, we are done 

	bgt[ADJUST_CREDIT_DONE#] 

	// if credit is not positive, we need to mark the queue as non schedulable and 
	// give it another round of credit 

	// mark queue as non schedulable

	alu[*l$index1[PORT_SCHEDULE_VECTOR_INDEX],*l$index1[PORT_SCHEDULE_VECTOR_INDEX], \
			AND~, in_queue_bitmask]

	// read credit increment into GPR 

	alu[credit_increment, --, B, *l$index0[QUEUE_CREDIT_INCREMENT_INDEX]]

	// defer 3 - Give a round of credit 	

	alu[*l$index0[QUEUE_CURRENT_CREDIT_INDEX],*l$index0[QUEUE_CURRENT_CREDIT_INDEX],\
			+, credit_increment]


ADJUST_CREDIT_DONE#:

	ctx_arb[voluntary] , br [EXIT_LABEL]

.end

#endm  
	
///////////////////////////////////////////////////////////////////////////////////// 
//
// 	drain_nn_ring: Remove any junk data that may be present in the nn ring.
//
//	Problem: When using Core Components (xscale) the application fails to work
//	when the system is "powered ON" for the first time. (it works fine for subsequent
//	"reset").
//
//	This is caused by initialisation sequence in CC where there is considerable delay
//	between loading Microcode in "each" microengine. For some reason this causes the 
//	next neighbour ring between QM and scheduler to be non-empty (junk data) causing 
//	scheduler to read this data. Depending on this junk data, scheduler may go thro'
//	"invalid dequeue" path where it decrements "packets_scheduled", which was initialised
// 	to 0, which makes it negative. This leads to packets_in_flight crossing its threshold 
//	preventing scheduler from transmitting any packets.
//
//	Until the problem is resolved in CC (or resource manager) the workaround is to 
//	simply drain the nn ring of any junk data.
//
//	Note: This problem doesn't arise when downloading this app using Workbench or when
//	running in simulation mode.
/////////////////////////////////////////////////////////////////////////////////////

#macro drain_nn_ring[]
.begin

#ifdef	USE_IMPORT_VAR

	//	This fix is only when using Core Components on IXDP2400.

loop#:

	br_inp_state[NN_EMPTY, end#]

	alu[--, --, B, *n$index++]			; drain nn one LW at a time
	ctx_arb[voluntary]					; this helps drain the nn completely.

	br[loop#]

end#:

#endif	//	USE_IMPORT_VAR

.end
#endm
		
/////////////////////////////////////////////////////////////////////////////////////
//
// _scheduler_qm_message_handler()
//
// Description:
// 	
//		Thread that handles QM messages
//
// Size: 
//
//		40 instruction 
//
/////////////////////////////////////////////////////////////////////////////////////


#macro _scheduler_qm_message_handler()

.begin

	.reg  	message					// QM  message read from ring
	.reg	port_lm_mask			// Mask to get lm offset for port struct
  	.reg	port_lm_base			// Base in local mem for port data structures
	.reg	queue_lm_mask			// Mask to get local mem offset for queue struct
	.reg	queue_lm_base			// Base in local mem for queue data structures
	.reg	packet_length			// Packet Length
	.reg	queue_bitmask			// (1 << queueNumber)
	.reg	port_lm_offset			// offset into local mem for port data structure
	.reg	queue_lm_offset 		// offset into local mem for port data structure
	.reg	queue_number			// queue number 
	.reg	q_mask					// constant that stores \
									// (1 << number_of_queues_per_port) - 1

	// Compute port local memory mask. Set this up in a register so we dont waste 
	// cycles computing it 

	immed[port_lm_mask, 0x1e0]
	immed[port_lm_base, PORT_LM_BASE_OFFSET]

	// Queue Local Memory Mask and base. Constants stored in registers to save 
	// instructions

	immed[queue_lm_mask, 0x7ff]
	immed[queue_lm_base, QUEUE_LM_BASE_OFFSET]

	// register that stores (1 << number_of_queues_per_port) - 1 

	immed32[q_mask, QUEUE_MASK]

	drain_nn_ring[]					// Workaround: See the macro for details

QM_SWAP_OUT#:

	// Swap out. The first time the thread runs it will swap out immediately

	ctx_arb[voluntary]

	// First Read the QM message from a scratch or NN ring. In this case we 
	// use a NN ring

CHECK_NN_EMPTY#:

	// Check if the ring is empty. If it is not, then branch to swapping out

	br_inp_state[NN_EMPTY, QM_SWAP_OUT#]

	// Read the enqueue message

	alu[message, --, B, *n$index++] 

	// Check if it is an enqueue transition.  

	br_bclr[message, QM_TRANSITION_BIT, CHECK_INVALID_DEQUEUE#]

	// Handle the enqueue transition 

	_scheduler_qm_handle_enq_transition(message, port_lm_mask)


CHECK_INVALID_DEQUEUE#:

	// Read the dequeue message 

	alu[message, --, B, *n$index++] 

	// Check if it is a valid message - if not go to back to top of loop

	br_bclr[message, QM_VALID_MESSAGE_BIT, CHECK_NN_EMPTY#] 

	// compute queue structure local memory offset. 2 instructions

	_scheduler_qm_get_queue_lm_offset(queue_lm_offset, message, queue_lm_base, \
									  queue_lm_mask)

	// Check this is a invalid dequeue notification

	br_bset[message, QM_INVALID_DEQUEUE_BIT, HANDLE_INVALID_DEQUEUE#] , defer[3]

	// defer 1 - compute port local memory offset

	_scheduler_qm_get_port_lm_offset(port_lm_offset, message, port_lm_base, port_lm_mask)

	// defer 2 - Set up port offset index into local memory. 3 cycle latency for this to 
	// take effect. 

	localmem_set_address(0, port_lm_offset, LM_HANDLE_1)

	// defer 3 - Set up queue offset index into local memory. 3 cycle latency for this to
	// take effect. 

	localmem_set_address(0, queue_lm_offset, LM_HANDLE_0)

	// Check if it is a dequeue transition. If not go straight to adjust credit

	br_bclr[message, QM_TRANSITION_BIT, ADJUST_CREDIT#] , defer[3]

	// defer 1 - Compute queue number
	 
	_scheduler_qm_get_queue_number(queue_number, message)

	// defer 2 and 3 - Compute (1 << queueNumber)

	alu[--, queue_number, OR, 0]
	alu_shf[queue_bitmask, --, B, 1, <<indirect]
	
	// Now handle the dequeue transition 

	_scheduler_qm_handle_deq_transition(message, queue_bitmask)

ADJUST_CREDIT#:

	// get the packet length from the message 

	_scheduler_qm_get_packet_length(packet_length, message)

	// Adjust the DRR credit  

	_scheduler_qm_adjust_credit(packet_length, queue_bitmask, q_mask, CHECK_NN_EMPTY#)


HANDLE_INVALID_DEQUEUE#:

	// This message is sent in response to the scheduler issuing a schedule request 
	// on an empty queue. Decrement packets scheduled for this port. We assume that 
	// we should have already got a deq transition message. Then swap out and go to 
	// checking if ring is empty

	ctx_arb[voluntary] , br [CHECK_NN_EMPTY#] , defer[1]
	
	// defer 1 - decrement packets scheduled 

	alu[*l$index1[PORT_PACKETS_SCHEDULED_INDEX], \
		*l$index1[PORT_PACKETS_SCHEDULED_INDEX], -, 1]


.end

#endm
	
/////////////////////////////////////////////////////////////////////////////////////
	

#endif 		// __EGRESS_SCHEDULER_QM_UC__


/////////////////////////////////////////////////////////////////////////////////////
