///////////////////////////////////////////////////////////////////////
//                                                                   
//                  I N T E L   P R O P R I E T A R Y                
//                                                                   
//     COPYRIGHT (c)  2001-2002 BY  INTEL  CORPORATION.  ALL RIGHTS  
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY   
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A 
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER 
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL, 
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT 
//     THE PRIOR WRITTEN PERMISSION OF :                             
//                                                                   
//                        INTEL  CORPORATION                         
//                                                                   
//                     2200 MISSION COLLEGE BLVD                     
//                                                                   
//               SANTA  CLARA,  CALIFORNIA  95052-8119               
//                                                                   
///////////////////////////////////////////////////////////////////////
//                                                                   
//                                                                   
//      Filename: rr_scheduler_util.uc
//                                                                   
//      Description: This is a round robin scheduler used by the IPv4
//                   AAL5 Egress OC48 and quad OC12 applications.
//                   For a TM 4.1 scheduler, refer to the ATM diffserv
//                   application.
//
//      History:
//
//      07/22/2003  Created                               asv
//
///////////////////////////////////////////////////////////////////////

#ifndef _RR_SCHEDULER_UTIL_UC_
#define _RR_SCHEDULER_UTIL_UC_

#include <stdmac.uc>
#include <dl_system.h>
#include <rr_scheduler.h>

//------------------------------------------------------------------
// _rr_scheduler_init()
//
//    Description: Initialize local memory and NN ring.
//
//------------------------------------------------------------------
#macro _rr_scheduler_init()
.begin

	; Initialize local memory.

	.reg i, lm_addr_in_bytes

	; Initialize the entire queue structure.
	; For the queue structure format refer to rr_scheduler.h

	immed[i, 0]

	.while(i < 128)

	alu[lm_addr_in_bytes, QUEUE_STRUCTURE_LM_BASE, or, i, <<2]
	local_csr_wr[active_lm_addr_0, lm_addr_in_bytes]
	alu[--, --, b, 0]
	alu[--, --, b, 0]
	alu[--, --, b, 0]

	; Initialize queue cell count to 0.

	alu[*l$index0[LM_QUEUE_CELL_COUNT], --, b, 0]

	alu[i, i, +, 1]

	.endw

	; Initialize the entire group structure.
	; For the group structure format refer to rr_scheduler.h

	immed[i, 0]

	.while(i < 4)

	alu[lm_addr_in_bytes, _group_structure_lm_base, or, i, <<3]
	local_csr_wr[active_lm_addr_0, lm_addr_in_bytes]
	alu[--, --, b, 0]
	alu[--, --, b, 0]
	alu[--, --, b, 0]

	; Initialize the group vector to 0. All queues are initially empty.

	alu[*l$index0[LM_GROUP_VECTOR], --, b, 0]

	; Initialize the group mask to all 1's.

	alu[*l$index0[LM_GROUP_MASK], --, ~b, 0]

	alu[i, i, +, 1]

	.endw


	; Initialize the NN ring.

	; Set NN_RING_EMPTY threshold to 1 LW (or less) since we expect
	; 2 LW's per incoming message.
	.reg ctx_enable

	local_csr_rd[ctx_enables]
	immed[ctx_enable, 0]
	alu_shf[ctx_enable, ctx_enable, AND~, 3, <<18]
	alu_shf[ctx_enable, ctx_enable, OR, 1, <<18]
	local_csr_wr[ctx_enables, ctx_enable]

	; Initialize NN_PUT and NN_GET CSR's.
	local_csr_wr[nn_get, 0]
	local_csr_wr[nn_put, 0]	

	; Wait for 3 cycles to ensure NN ring initialization is complete.
	alu[--, --, b, 0]
	alu[--, --, b, 0]
	alu[--, --, b, 0]

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_read_nn_ring()
//
//    Description: Read incoming request from NN ring.
//
//------------------------------------------------------------------
#macro _rr_scheduler_read_nn_ring(out_enqueue_message, out_dequeue_message, NN_EMPTY_LABEL)
.begin

	br_inp_state[NN_EMPTY, NN_EMPTY_LABEL]

	alu[out_enqueue_message, --, b, *n$index++]
	alu[out_dequeue_message, --, b, *n$index++]

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_handle_lbr_enqueue()
//
//    Description: Handle low bit rate enqueue case.
//
//------------------------------------------------------------------
#macro _rr_scheduler_handle_lbr_enqueue(in_enqueue_message, inout_schedule_queue_sigmask)
.begin

	; Check if both enqueue valid bit and enqueue transition bit are set.

	.reg tmp
	alu[tmp, --, b, in_enqueue_message, >>30]
	alu[--, 0x3, -, tmp]
	bne[end#]

	; Both enqueue valid bit and enqueue transition bit are set;
	; schedule a dequeue request for this queue.

	; $schedule_queue_1 has valid bit 31 set, bits 17:16 contain the output
	; port number and bits 15:0 contain the queue number.

	alu[$schedule_queue_1, Ox8003FFFF, and, in_enqueue_message]

	; Write request to outgoing scratch ring.

write_request#:

	br_inp_state[OUT_SCR_RING_FULL, write_request#]

	scratch[put, $schedule_queue_1, out_scr_ring, 0, 1], sig_done[schedule_queue_sig_1]

	; Add scratch signal to signal mask.

	alu[inout_schedule_queue_sigmask, inout_schedule_queue_sigmask, or, 1, <<&schedule_queue_sig_1]

end#:

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_handle_lbr_dequeue()
//
//    Description: Handle low bit rate dequeue case.
//
//------------------------------------------------------------------
#macro 	_rr_scheduler_handle_lbr_dequeue(in_dequeue_queue_number, in_dequeue_message, inout_schedule_queue_sigmask)
.begin

	.reg output_port_number
	.reg schedule_queue_2

	; Check if valid bit is set, dequeue transition bit is not set and invalid dequeue bit is not set.

	.reg tmp
	alu[tmp, --, b, in_dequeue_message, >>29]
	alu[--, 0x4, -, tmp]
	bne[end#]

	; Valid bit is set, dequeue transition bit is not set and invalid dequeue bit is not set;
	; schedule a dequeue request for this queue.


	#if ((TX_PHY_MODE == SPHY_4_8) || (TX_PHY_MODE == MPHY_4))

	; Set output port number using bits 1:0 of queue number.
	; Refer to rr_scheduler.h for details.

	alu[output_port_number, 0x3, and, in_dequeue_queue_number]

	#endif

	#if (TX_PHY_MODE == SPHY_1_32)

	; Set output port number to 0.

	alu[output_port_number, --, b, 0]

	#endif

	; Consolidate queue_number and output port number.

	alu[schedule_queue_2, in_dequeue_queue_number, or, output_port_number, <<16]

	; $schedule_queue_2 has valid bit 31 set, bits 18:17 contain the output port number
	; and bits 16:0 contain the queue number.

	alu[$schedule_queue_2, schedule_queue_2, or, 1, <<31]

	; Write request to outgoing scratch ring.

write_request#:

	br_inp_state[OUT_SCR_RING_FULL, write_request#]

	scratch[put, $schedule_queue_2, out_scr_ring, 0, 1], sig_done[schedule_queue_sig_2]

	; Add scratch signal to signal mask.

	alu[inout_schedule_queue_sigmask, inout_schedule_queue_sigmask, or, 1, <<&schedule_queue_sig_2]

end#:

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_set_queue_status()
//
//    Description: Set a queue to either empty or non-empty.
//
//------------------------------------------------------------------
#macro _rr_scheduler_set_queue_status(STATUS, in_queue_number)
.begin

	#if (streq('STATUS', 'NON_EMPTY'))
	#define ACTION				or 			// Used to set a bit in a bit vector.
	#else // streq('STATUS', 'EMPTY')
	#define ACTION 				and~		// Used to clear a bit in a bit vector.
	#endif

	.reg group_number group_number_bit group_vector_lm_addr
	.reg queue_number queue_number_bit

	; Get the group number for this queue.

	alu[group_number, 0x3, and, in_queue_number, >>5]

	; Find the group vector in local memory.

	alu[group_vector_lm_addr, _group_structure_lm_base, or, group_number, <<3]
	local_csr_wr[active_lm_addr_0, group_vector_lm_addr]

	; After 3 instructions, *l$index0 will point to the group vector in local memory.

	; In the group vector, set the bit for this queue.
	; If ACTION is 'or' the bit for this queue is set, indicating that the queue is non-empty.
	; If ACTION is 'and~' the bit for this queue is cleared, indicating that the queue is empty.

	alu[queue_number, 0x1f, and, in_queue_number]

	alu[--, queue_number, or, 0]
	alu[queue_number_bit, -- , b, 1, <<indirect]

	; In the group vector, set the bit for this queue.

	alu[*l$index0[LM_GROUP_VECTOR], *l$index0[LM_GROUP_VECTOR], ACTION, queue_number_bit]

	; Next in the @root_vector set the bit for this group.
	; If STATUS is NON_EMPTY, the queue changed from empty to non-empty so we can simply
	; go ahead and set the bit for its group in @root_vector indicating that this group
	; is now non-empty.
	; If STATUS is EMPTY, the queue changed from non-empty to empty. In this case we
	; should clear the bit for its group in @root_vector only if the entire group turned
	; empty. The bne[] condition checks if the entire group turned empty.

	#if (streq('STATUS', 'EMPTY'))
	bne[end#]
	#endif

	; If ACTION is 'or' the bit for the group is set, indicating that the group is non-empty.
	; If ACTION is 'and~' the bit for the group is cleared, indicating that the group is empty.

	alu[--, group_number, or, 0]
	alu[group_number_bit, -- , b, 1, <<indirect]

	; In the @root_vector, set the bit for this group.

	alu[@root_vector, @root_vector, ACTION, group_number_bit]

end#:

	#undef ACTION

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_handle_hbr_enqueue()
//
//    Description: Handle high bit rate enqueue case.
//
//------------------------------------------------------------------
#macro _rr_scheduler_handle_hbr_enqueue(in_enqueue_queue_number, in_enqueue_message)
.begin

	.reg queue_structure_lm_addr cell_count

	; Find the queue structure in local memory.

	alu[queue_structure_lm_addr, QUEUE_STRUCTURE_LM_BASE, or, in_enqueue_queue_number, <<2]
	local_csr_wr[active_lm_addr_0, queue_structure_lm_addr]

	; After 3 instructions, *l$index0 will point to the queue structure in local memory.

	nop

	; Check if enqueue message valid bit is set.

	br_bclr[in_enqueue_message, 31, end#]

	; Valid bit is set; add cell count to current cell count of this queue in local memory.

	alu[cell_count, Ox7FF, and, in_enqueue_message, >>18]

	alu[*l$index0[LM_QUEUE_CELL_COUNT], *l$index0[LM_QUEUE_CELL_COUNT], +, cell_count]

	; Set this queue to non-empty.

	_rr_scheduler_set_queue_status(NON_EMPTY, in_enqueue_queue_number)

end#:

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_handle_hbr_dequeue()
//
//    Description: Handle high bit rate dequeue case.
//
//------------------------------------------------------------------
#macro 	_rr_scheduler_handle_hbr_dequeue(in_dequeue_queue_number, in_dequeue_message)
.begin

	; Check if valid bit is set, dequeue transition bit is set and invalid dequeue bit is not set.

	.reg tmp
	alu[tmp, --, b, in_dequeue_message, >>29]
	alu[--, 0x6, -, tmp]
	bne[end#]

	; Valid bit is set, dequeue transition bit is set and invalid dequeue bit is not set;
	; set this queue to empty.

	_rr_scheduler_set_queue_status(EMPTY, in_dequeue_queue_number)

end#:

.end
#endm


//------------------------------------------------------------------
// _rr_scheduler_schedule_hbr_queue()
//
//    Description: Schedule a high bit rate queue
//
//------------------------------------------------------------------
#macro _rr_scheduler_schedule_hbr_queue(inout_schedule_queue_sigmask)
.begin

	.reg masked_root_vector group_number
	.reg group_structure_lm_addr
	.reg group_vector group_mask masked_group_vector

	.reg queue_number
	.reg queue_structure_lm_addr
	.reg schedule_queue_number

	; Apply a mask to select the next set of groups.

	alu[masked_root_vector, @root_vector, and, @root_mask]

	; Find the eligible group.

	ffs[group_number, masked_root_vector]

	; Check if any bit was set.
	; If a bit was set, we have found a non-empty group.

	bne[found_group#] 

	; If we get here, we could not find a non-empty group using 
	; masked_root_vector. So try finding a non-empty group without using
	; masked_root_vector. This is ok since masked_root_vector is used to ensure
	; fairness across groups.

	ffs[group_number, @root_vector]

	; If we still can't find a group, do not schedule.

	beq[end#]


found_group#:

	; Find the group structure in local memory.

	alu[group_structure_lm_addr, _group_structure_lm_base, or, group_number, <<3]
	local_csr_wr[active_lm_addr_0, group_structure_lm_addr]

	; After 3 instructions, *l$index0 will point to the group structure in local memory.

	; Compute the new @root_mask. This will be used the next time to schedule
	; from a different group.

	.reg tmp
	alu[--, group_number, or, 0]
	alu[tmp, --, b, minus_two, <<indirect]	
	alu[@root_mask, --, b, tmp]


	; Now use the group vector and the group vector mask from local memory
	; to select an eligble queue.
			
	alu[group_vector, --, b, *l$index0[LM_GROUP_VECTOR]] 	

	alu[masked_group_vector, *l$index0[LM_GROUP_MASK], and, group_vector]

	; Find the eligible queue.

	ffs[queue_number, masked_group_vector]

	; Check if any bit was set.
	; If a bit was set, we have found a non-empty queue.

	bne[found_queue#] 
	
	; If we get here, we could not find a non-empty queue using 
	; masked_group_vector. So we will find a non-empty queue without using
	; masked_group_vector. This is ok since masked_group_vector is used to ensure
	; fairness across queues. We are guaranteed to find a non-empty queue since
	; otherwise this group would not have been scheduled.

	ffs[queue_number, group_vector]


found_queue#:

	; Schedule the queue.

	.reg schedule_queue_3

	; Consolidate queue_number and group_number to get a 7 bit (0..127) queue number.

	alu[schedule_queue_number, queue_number, or, group_number, <<5]

	; The group number can also be used as the output port number.
	; Refer to rr_scheduler.h for details.

	#if ((TX_PHY_MODE == SPHY_4_8) || (TX_PHY_MODE == MPHY_4))

	alu[schedule_queue_3, schedule_queue_number, or, group_number, <<16]

	#endif

	#if (TX_PHY_MODE == SPHY_1_32)

	alu[schedule_queue_3, schedule_queue_number, or, 0, <<16]

	#endif

	; $schedule_queue_3 has valid bit 31 set, bits 18:17 contain the output port number
	; and bits 16:0 contain the queue number.

	alu[$schedule_queue_3, schedule_queue_3, or, 0x1, <<31]

	; Write request to outgoing scratch ring.

write_request#:

	br_inp_state[OUT_SCR_RING_FULL, write_request#]

	scratch[put, $schedule_queue_3, out_scr_ring, 0, 1], sig_done[schedule_queue_sig_3]

	; Add scratch signal to signal mask.

	alu[inout_schedule_queue_sigmask, inout_schedule_queue_sigmask, or, 1, <<&schedule_queue_sig_3]


	; Finally update queue and group information.

	; Find the queue structure in local memory.

	alu[queue_structure_lm_addr, QUEUE_STRUCTURE_LM_BASE, or, schedule_queue_number, <<2]
	local_csr_wr[active_lm_addr_1, queue_structure_lm_addr]

	; After 3 instructions, *l$index1 will point to the queue structure in local memory.

	; Compute a new group_mask. This will be used the next time to schedule
	; from a different queue within the group.

	alu[--, queue_number, or, 0]
	alu[group_mask, --, b, minus_two, <<indirect]

	; Store the new group mask in local memory.

	alu[*l$index0[LM_GROUP_MASK], --, b, group_mask]

	; Decrement cell count for this queue by 1.

	alu[*l$index1[LM_QUEUE_CELL_COUNT], *l$index1[LM_QUEUE_CELL_COUNT], -, 1]

	bne[end#]

	; If we get here, the queue cell count has become 0. This queue (and possibly 
	; the group) should be marked as empty.

	_rr_scheduler_set_queue_status(EMPTY, schedule_queue_number)

end#:

.end
#endm

#endif // _RR_SCHEDULER_UTIL_UC_