#ifndef _ATM_TM_SCHEDULER_UTIL_UC_
#define _ATM_TM_SCHEDULER_UTIL_UC_

/*******************************************************************************
                             Intel Proprietary

 Copyright (c) 1998-2002 By Intel Corporation.  All rights reserved.
 No part of this program or publication may be reproduced, transmited,
 transcribed, stored in a retrieval system, or translated into any language
 or computer language in any form or by any means, electronic, mechanical,
 magnetic, optical, chemical, manual, or otherwise, without the prior
 written permission of:
                         Intel Corporation
                         2200 Mission College Blvd.
                         Santa Clara, CA  95052-8119
*******************************************************************************/

/*
 *      File Name: atm_tm_scheduler_util.uc                                         
 *                                                                   
 *      Description: This file contains the actual scheduler and write-out
 *					 microcode macros. 
 *
 *      History: ver 1.0                                             
 *
 */                            

#include <sram.uc>
#include <dram.uc>
#include <atm_tm.h>


//------------------------------------------------------------------
// atm_tm_write_out(in_rtn_reg)
//
//    Description: Writes into the time queues.
//
//    Parameters: None.
//		Inputs: 
//			in_rtn_reg - return point from atm_tm_write_out() macro.
//		Outputs:
//			None.
//------------------------------------------------------------------
#macro atm_tm_write_out(in_rtn_reg)
.begin
.reg curtq_wo
;--------------------------- WriteOut loop --------------------------------------
/* 
	Shaper -> Scheduler message format:

	1) UBR w/priority VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |         Reserved		   | PRI |    cell_count       |
       +---------------+---------------+---------------+---------------+

	2) Other LBR VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |   Reserved    	 |         t1						   |
       +---------------+---------------+---------------+---------------+

	1) HBR VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |         Reserved		         |    cell_count       |
       +---------------+---------------+---------------+---------------+

	R - Reserved
	S - Speed (0 - LBR, 1 - HBR)
*/
//The writeout functionality begins here.
//write_out#:
	; signal next thread
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
#if defined(FIRST_SCHEDULER_ME) || defined(SECOND_SCHEDULER_ME)
	alu[vcq_reg, --, B, $wo_msg[0]]
	beq[no_write#]
	alu[$vcq_wo, --, b, vcq_reg]
	alu[ti, --, B, $wo_msg[1]]	

#else
	; if NN is empty, there is nothing to writeout
	br_inp_state[nn_empty, no_write#]
	; read NN
	alu[vcq_reg, --, b, *n$index++]
	alu[$vcq_wo, --, b, vcq_reg]
	alu[ti, --, B, *n$index++]	

#endif


///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

//Check if the 	VC is HBR or LBR	
	br_bset[ti, 31, hbr_ubr_wo#]

#endif	//HBR_EXCLUDED
///////////////////////////////////////////////////////////////////////

	; extract port#
	alu[temp2, mask_upper21, AND, vcq_reg, >>19]	; get Port#

	; load PortInfo for given port
	_get_port_entry_wo(temp2)
    ld_field_w_clr[curtq_wo, 0011, *l$index0[0], >>8]
/*

	LM_index0 points PORTINFO Table

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| RT_for_LBR_TQ |             CurTQ			   |  Reserved  | F|
       +---------------+---------------+---------------+---------------+	
    LW1|            RTQnum			   |             NRTQnum		   |
       +---------------+---------------+---------------+---------------+
    LW2|             DQRTlen	       |            DQNRTlen           |
       +---------------+---------------+---------------+---------------+
    LW3|    RTQlen	   |    NRTQlen    | Working_RTQlen|Working_NRTQlen|
       +---------------+---------------+---------------+---------------+
    LW4|    Schcount   |     MaxTQ             | Reserved              |
       +---------------+---------------+---------------+---------------+
    LW5|            UBRTQnum		   |             DQUBRTlen		   |
       +---------------+---------------+---------------+---------------+
    LW6|             TQ_offset		   |   UBRTQlen    |Working_UBRTQle|
       +---------------+---------------+---------------+---------------+
    LW7|Rtdq_p |Rtdq_c |Nrtdq_p|Nrtdq_c|Ubrdq_p|Ubrdq_c| UBRwPRIcache  |
       +---------------+---------------+---------------+---------------+

 
 R - Reserved
 F - Flow Control
 Rtdq_p - Rtdq_lm_producer
 Rtdq_c - Rtdq_lm_consumer
 Nrtdq_p - Nrtdq_lm_producer
 Nrtdq_c - Nrtdq_lm_consumer
 Ubrdq_p - Ubrdq_lm_producer
 Ubrdq_c - Ubrdq_lm_consumer

  --------------------------------------------------------------------------------

	LM_index1 points UPBwPRI table

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0|              UBR_1vcq           |        UBR_1len             |
       +---------------+---------------+---------------+---------------+	
    LW1|              UBR_2vcq           |        UBR_2len             |
       +---------------+---------------+---------------+---------------+
    LW2|              UBR_3vcq           |        UBR_3len             |
       +---------------+---------------+---------------+---------------+
    LW3|              UBR_4vcq           |        UBR_4len             |
       +---------------+---------------+---------------+---------------+
    LW4|              UBR_5vcq           |        UBR_5len             |
       +---------------+---------------+---------------+---------------+
    LW5|              UBR_6vcq           |        UBR_6len             |
       +---------------+---------------+---------------+---------------+
    LW6|              UBR_7vcq           |        UBR_7len             |
       +---------------+---------------+---------------+---------------+
    LW7|              UBR_8vcq           |        UBR_8len             |
       +---------------+---------------+---------------+---------------+

*/
	 	
//Writeout for low bit rate traffic
//Compute the time queue number from the timeslot 
//If the traffic is non-real time branch to nrt_write#
lbr_wo#:

	alu[tqnum, @mask_upper13, AND, ti]	;get number of TQ to be serviced
	alu[code, 0x7, and, ti, >>28]

//Check if the cell belongs to real time or non real time VCQ
//by checking the CODE value.
check_code#:

	//Check for :
	//UBR code = 	000		don't needed GCRA calculate
	//CBR code = 	001		needed GCRA calculate
	//rtVBR code = 	010		needed GCRA calculate twice time
	//nrtVBR code = 011		needed GCRA calculate twice time
	//UBR w/PCR =	100		needed GCRA calculate
	//UBR w/MDCR =	101		needed GCRA calculate
	//GFR =			110		needed GFR_GCRA calculate
	jump[code, jump_table#],  targets[_UBR#, _CBR#, _rtVBR#, _nrtVBR#, \
							    	  _UBRwPCR#, _UBRwMDCR#, _GFR#] 

jump_table#:

_UBR#:		br[prio_write#]
_CBR#: 		br[CLASS_CBR]
_rtVBR#: 	br[CLASS_RTVBR]
_nrtVBR#: 	br[CLASS_NRTVBR]
_UBRwPCR#: 	br[CLASS_UBR_PCR]
_UBRwMDCR#: br[CLASS_UBR_MDCR]
_GFR#:		br[CLASS_GFR]

// ******************* UBR writeout *******************
prio_write#:
	;extract cell_count
	alu[cell_count, --, b, ti, <<QM_REQ_CELL_COUNT_CLR_SHIFT]
	alu[cell_count, --, b, cell_count, >>QM_REQ_CELL_COUNT_CLR_SHIFT]	
	;extract priority
	alu[temp, 0x7, and, ti, >>QM_REQ_CELL_COUNT_LEN]
	jump[temp, prio_jump_table#],	targets[_prio_1#, _prio_2#, _prio_3#, \
											_prio_4#, _prio_5#, _prio_6#, \
											_prio_7#, _prio_8#]
prio_jump_table#:
_prio_1#: br[_ubr_prio_1#]
_prio_2#: br[_ubr_prio_2#]
_prio_3#: br[_ubr_prio_3#]
_prio_4#: br[_ubr_prio_4#]
_prio_5#: br[_ubr_prio_5#]
_prio_6#: br[_ubr_prio_6#]
_prio_7#: br[_ubr_prio_7#]
_prio_8#: br[_ubr_prio_8#]

_ubr_prio_1#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[8]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[8], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1]				;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_2#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[9]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[9], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<1]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_3#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[10]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[10], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<2]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_4#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[11]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[11], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<3]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_5#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[12]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[12], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<4]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_6#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[13]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[13], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<5]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_7#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[14]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[14], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<6]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]
_ubr_prio_8#:
	;increment cell count
	alu[cell_count, cell_count, +, *l$index0[15]]	
	alu[cell_count, --, b, cell_count, <<VCQ_NUMBER_OF_BITS]
	dbl_shf[temp, vcq_reg, cell_count, >>VCQ_NUMBER_OF_BITS]
	alu[*l$index0[15], --, b, temp]		;store cell count
	alu[*l$index0[7], *l$index0[7], or, 1, <<7]		;set bit in the UBRwPRI cache byte
	br[_ubr_prio_end#]

_ubr_prio_end#:
	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]	
	alu[sig_mask, --, b, 1, <<&next_thread_sig]

	ctx_arb[--]

	.io_completed sram_read_dn_sched \
	sram_read_dn_newtq0 sram_read_dn_newtq1 sram_read_dn_newtq2 cap_read_head_dn cap_read_tail_dn

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	rtn[in_rtn_reg]
// ******************* End of UBR writeout *******************

// ******************* RTQ writeout *******************
//Write out for Real time (rtVBR) low bit rate traffic.
rt_write#:
.reg rtqnum
	alu[rtqnum, mask_upper16, AND, *l$index0[1], >>16] ; get RTQnum

	_check_shift(tqnum, rtqnum, curtq_wo)

	; Place request in TQ
	_write_lbr(sig_mask, tqnum, _rtqlen_sram_base, @_rtq_sram_base, $vcq_wo, \
			in_rtn_reg, sram_read_dn0_wo, sram_write_dn0, TQ_SIZE_SHIFT, TQ_MAX_LEN, TQLEN_ENTRY_SIZE_SHIFT)


// ******************* End of RTQ writeout *******************


// ******************* NRTQ writeout *******************
//Write out for Non Real time (nrtVBR) low bit rate traffic.
nrt_write#:
.reg nrtqnum
	alu[nrtqnum, mask_upper16, AND, *l$index0[1]] ; get NRTQnum

	_check_shift(tqnum, nrtqnum, curtq_wo)

	; Place request in TQ
	_write_lbr(sig_mask, tqnum, _nrtqlen_sram_base, @_nrtq_sram_base, $vcq_wo, \
			in_rtn_reg, sram_read_dn0_wo, sram_write_dn0, TQ_SIZE_SHIFT, TQ_MAX_LEN, TQLEN_ENTRY_SIZE_SHIFT)


// ******************* End of NRTQ writeout *******************


// ******************* UBRTQ writeout *******************
//Write out for UBR w/PCR & w/MDCR low bit rate traffic.
ubr_write#:
.reg ubrtqnum

	alu[ubrtqnum, mask_upper16, AND, *l$index0[5], >>16] ; get UBRTQnum

	_check_shift(tqnum, ubrtqnum, curtq_wo)

	; Place request in TQ
	_write_lbr(sig_mask, tqnum, _ubrtqlen_sram_base, @_ubrtq_sram_base, $vcq_wo, \
			in_rtn_reg, sram_read_dn0_wo, sram_write_dn0, TQ_SIZE_SHIFT, TQ_MAX_LEN, TQLEN_ENTRY_SIZE_SHIFT)

// ******************* End of UBRTQ writeout *******************

//Nothing to write out. Just do the signaling and go back to scheduler
//in the end.
no_write#:
	// Making threads to run synchronously
#ifndef PORTINFO_IN_SDRAM
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
#endif
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread

	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]	
	alu[sig_mask, --, b, 1, <<&next_thread_sig]

	ctx_arb[--]

	.io_completed sram_read_dn_sched \
	sram_read_dn_newtq0 sram_read_dn_newtq1 sram_read_dn_newtq2 cap_read_head_dn cap_read_tail_dn
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	rtn[in_rtn_reg]

///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

//Writeout for High Bit Rate
//Bump up the cell count (in LM) for the VCQ by the number of cells
//that are enqueued for the VCQ.
hbr_ubr_wo#:	
	alu[temp, --, b, vcq_reg, <<VCQ_NUMBER_OF_BITS_CLR_SHIFT]
#define_eval HBR_SHIFT (VCQ_NUMBER_OF_BITS_CLR_SHIFT -1)
	alu[temp, --, b, temp, >>HBR_SHIFT]
	alu[lm_offset, _vcqlen_lm_base, +, temp]
	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]  
	;extract cell_count
	alu[cell_count, --, b, ti, <<QM_REQ_CELL_COUNT_CLR_SHIFT]
	alu[cell_count, --, b, cell_count, >>QM_REQ_CELL_COUNT_CLR_SHIFT]
	;increment and store cell_count
	alu[--, vcq_reg, and, 0x1]
	bne[hi_word_wo#]
	alu[*l$index1[0], cell_count, +, *l$index1[0]]
	br[wo_cont#]
hi_word_wo#:
	ld_field_w_clr[hbr_count, 0011, *l$index1[0], >>16]	;extract stored cell count
	alu[hbr_count, cell_count, +, hbr_count]
	ld_field[*l$index1[0], 1100, hbr_count, <<16]

wo_cont#:

#ifndef PORTINFO_IN_SDRAM
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
#endif
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread

	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]
	alu[sig_mask, --, b, 1, <<&next_thread_sig]

	ctx_arb[--]

	.io_completed sram_read_dn_sched \
	sram_read_dn_newtq0 sram_read_dn_newtq1 sram_read_dn_newtq2 cap_read_head_dn cap_read_tail_dn
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	rtn[in_rtn_reg]

#endif	//HBR_EXCLUDED
///////////////////////////////////////////////////////////////////////
.end
#endm

//------------------------------------------------------------------
// atm_tm_scheduler_util(in_port_no)
//
//    Description: Schedules from the time queues and writes into the
//					time queues.
//
//    Parameters: None.
//		Inputs: 
//			in_port_no - 0...2047
//		Outputs:
//			None.
//------------------------------------------------------------------

#macro atm_tm_scheduler_util(in_port_no)
.begin

	.reg tq_base tq_offset		// TQ table base address and offset in SRAM
	.reg tq_len					// Number of elements (deq requests) in given TQ
	.reg tql_base tql_offset 	// TQlen table base address and offset in SRAM
	.reg lm_offset  			// General purpose LM address pointer
	.reg port_tq_ofs			// Offset from the beginning of TQ table for given port
	.reg ti 					// Carries second LW of the message from shaper
	.reg code					// Class of service code
	.reg vcq_reg				// VCq number
	.reg tqnum 					// TQ number calculated by shaper
	.reg ipq_tail 				// Points to last IPQ element in chain
	.reg sig_mask				// Carries signals mask used during ctx swaps
	.reg rtn_reg 				// Return address
	.reg cell_count				// Number of cells to be scheduled
	.reg load_newtq				// Indicator whether we need to load new TQ or not
	.reg qlen					// TQ length used to calculate offset in TQ table 
	.reg curtq					// Number of TQ currently serviced

	// Temorary registers
	.reg temp temp2

///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED
	.reg rt_lm_mask hbr_count
#endif
///////////////////////////////////////////////////////////////////////
	.reg $vcq_wo			// Contains DEQ request that is saved in TQ table
   	.reg $vcq 				// Contains DEQ request that has been read from TQ table

	.reg read $sr_head 				// registers used for num of cells sent by TX calculation (used when FLOW_CONTROL_NEW is defined)
	.reg read $sr_tail				// registers used for num of cells sent by TX calculation (used when FLOW_CONTROL_NEW is defined)
	.set $sr_head $sr_tail

	.reg $tqlen						// Register used to read-and-increment TQlen table entry for given Class of Service
	.reg $newtqlen0 $newtqlen1 $newtqlen2	// RTQlen/NRTQlen/UBRTQlen read from TQlen table


	.xfer_order $vcq_wo 
	.xfer_order $vcq
	.xfer_order $tqlen

#if defined(FIRST_SCHEDULER_ME) || defined(SECOND_SCHEDULER_ME)
	.reg $wo_msg[2]
	.xfer_order $wo_msg

	.sig scratch_read_sig
#endif

//Start the scheduling process by clearing the signal masks (sig_mask)
//and setting the load_newtq variable to zero. 
//Loading new TQs from SRAM happen every AGGREGATION slots and load_newtq is
//the flag variable to detect if SRAM read has been issued to load new TQ.
schedule#:
.set $newtqlen0 $newtqlen1 $newtqlen2 
	alu[sig_mask, --, B, 1, <<&next_thread_sig]
	alu[load_newtq, --, B, 0]

	; Reading message from shaper
#ifdef FIRST_SCHEDULER_ME
	scratch[get, $wo_msg[0], RING_TO_SCHEDULER_EVEN, load_newtq,  2], sig_done[scratch_read_sig]
#endif
#ifdef SECOND_SCHEDULER_ME
	scratch[get, $wo_msg[0], RING_TO_SCHEDULER_ODD, load_newtq, 2], sig_done[scratch_read_sig]
#endif

	; load port info to LM on position lm_offset in LM
	_get_port_entry(port)

#if defined(FIRST_SCHEDULER_ME) || defined(SECOND_SCHEDULER_ME)
	ctx_arb[scratch_read_sig]
#endif

	; saving lm_address base for serviced port
	local_csr_rd[ACTIVE_LM_ADDR_0]
	immed[serv_port_lm_base, 0]

; ------------------------- DQs servicing loop ------------------------
//check if we are servicing the last slot from current TQ
//If so we need to load new LBR time queue pointer to the departure rings in LM
//provided that the time queues have data in them.
	; get RT_for_the_LBR_TQ to compare with AGGREGATION-1
	br=byte[*l$index0[0], 3, AGGREGATION, issue_read_newtq_and_txcount#]

continue#:

/////////////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

	//Set the LM base to the current HBR TQ pointer
	//Increment the pointer to point to the next entry in the TQ
	//Take care of wraparounds in the process of incrementing
	alu[lm_offset, _hbrtq_lm_base, +, @_rt_hbr]
	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]

#endif	//HBR_EXCLUDED
///////////////////////////////////////////////////////////////////////

#if defined(FLOW_CONTROL) || defined(FLOW_CONTROL_NEW)
//Check the number of packets that have been transmitted for
//flow control purposes
	; check if port is blocked by flow controlled. If so, no cell needs to be scheduled
	alu[temp, 0x01, AND, *l$index0[0]]
	alu[--, --, B, temp]
	bne[FC_on#]
#endif

	//Priority queueing	 

///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

	//Priority1: HBR VC
	//Read the HBR VCQ from the current slot in the HBR TQ.
	//Check if the VCQ has data by looking up the LW that 
	//contains the HBR VCQ length in LM.
	//If length > 0, go to hbr_tx#
	alu[temp, 0x3, and, @_rt_hbr]		;offset in HBR TQ longword
	alu[temp, --, b, temp, <<3]
	alu[--, temp, and, 0]				;for indirect reference only
	ld_field_w_clr[vcq_reg, 0001, *l$index1[0], >>indirect]
	alu[temp, --, b, vcq_reg, <<1]
	alu[lm_offset, _vcqlen_lm_base, +, temp]
	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]  

#endif	//HBR_EXCLUDED
///////////////////////////////////////////////////////////////////////

	; increment RT_for_the_LBR_TQ
	alu[*l$index0[0], @one24, +, *l$index0[0]]


///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

	alu[@_rt_hbr, @_rt_hbr, +, 1]
	alu[@_rt_hbr, @_rt_hbr, and, RTLM_MASK]
	alu[temp, 0x10, AND, vcq_reg, <<4]
	alu[--, temp, and, 0]				;for indirect reference only
	ld_field_w_clr[hbr_count, 0011, *l$index1[0], >>indirect], load_cc
	bne[hbr_tx#]

#endif	//HBR_EXCLUDED
////////////////////////////////////////////////////////////////////////

	; setting LM1 to point beginning of DQ in LM(4 LWs RT, 4LWs NRT, 4LWs UBR)
	local_csr_wr[ACTIVE_LM_ADDR_1, dq_lm_offset]

	; Priority1: RT TQ
	//Check if the real time departure queue length is greater than 0
	//If so go to rt_tx#
	alu[--, 0, or, *l$index0[2], >>16] 	; check if DQRTlen is greater than 0
	bne[rt_tx#] 

	; Priority2: NRT TQ
    //Check if the real time departure queue length is greater than 0	
	//If not go to ubr_tx#
	ld_field[temp, 0011, *l$index0[2]], load_cc	; check if DQNRTlen is greater than 0
	bne[nrt_tx#]

	; Priority3: UBR TQ
    //Check if the ubr departure queue length is greater than 0	
	//If not go to ubr_tx#
	ld_field[temp, 0011, *l$index0[5]], load_cc	; check if DQUBRTlen is greater than 0
	bne[ubr_tx#]

	; Priority4: UBR_PRI TQs
	//Dequeue from UBR priority queues
	_ubr_pri_deq(temp)
	alu[vcq_reg, temp, or, port, <<QM_REQ_PORT_OFFSET]	;add port number	
	alu[$vcq, vcq_reg, or, 1, <<31]
	load_addr[rtn_reg, scratch_put#]
	br_bclr[temp, 31, write_out#]

    load_addr[rtn_reg, check_for_load_new_tq#]
	br[write_out#]

// ******************* Schedule from real-time TQ *******************
//Check if the current time queue length > 0.
//If not we need to load a new time queue from the DQ ring
rt_tx#:
.begin 
.reg rtdq_lm_consumer rtdq_lm_producer
.reg rtqlen working_rtqlen rtqnum 
.reg tq_offset_for_port tq_base_for_port

	alu[rtqlen, 0xff, AND, *l$index0[3], >>24]	; RTQlen
	alu[working_rtqlen, 0xff, AND, *l$index0[3], >>8]	; Working_RTQlen
	; compare RTQlen with Working_RTQlen
	alu[--, rtqlen, -, working_rtqlen]
    bgt[cur_rt_tx#]

//Load the new time queue that needs to be serviced from the DQ ring
//Update the scheduler port state accordingly with the new 
//rt time queue length and number
new_rt_tx#:
	; get Rtdq_lm_consumer
	alu[rtdq_lm_consumer, 0x3c, AND, *l$index0[7], >>22]	// to get address in bytes

	; calculate LM offset from it to get RT DQ
	alu[lm_offset, dq_lm_offset, +, rtdq_lm_consumer]
 	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]

	; increment Rtdq_lm_consumer by 1 LW
	alu[rtdq_lm_consumer, rtdq_lm_consumer, +, 4]
	; write Rtdq_lm_consumer to LM
	alu[rtdq_lm_consumer, RTDQ_SIZE_MASK, AND, rtdq_lm_consumer, >>2]
	alu[rtdq_lm_producer, 0xf0, AND, *l$index0[7], >>24]
	alu[temp, rtdq_lm_producer, OR, rtdq_lm_consumer]
	ld_field[*l$index0[7], 1000, temp, <<24]

	; load new RTQnum for servicing from DQ
	alu[rtqnum, mask_upper16, AND, *l$index1[0]]
	ld_field[*l$index0[1], 1100, rtqnum, <<16]

	; load new TQlen
	alu[rtqlen, --, b, *l$index1[0], >>16]
	; write TQlen to RTQlen[port] and zero Working_RTQlen
	ld_field[*l$index0[3], 1010, rtqlen, <<24]
   	alu[working_rtqlen, --, B, 0]	; Working_RTQlen

//Transmit from the rt TQ
//Compute the SRAM address to read the tq entry
//Issue SRAM read and jump to "write out"
cur_rt_tx#:
	; Calculate offset from the beginning of RTQ table for given port using TQ_offset
#define_eval INDIRECT_SHIFT 	(16 - TQ_SIZE_SHIFT)	; we need tq_offset_for_port to be in bytes
	alu[tq_offset_for_port, mask_tqofs, AND, *l$index0[6], >>INDIRECT_SHIFT] ; to get TQ offset in bytes
#undef INDIRECT_SHIFT

	; Add RTQ base to offset
	alu[tq_base_for_port, tq_offset_for_port, +, @_rtq_sram_base]

	; Calculate offset to get currentely serviced TQ
#define_eval INDIRECT_SHIFT 		(16 - TQ_SIZE_SHIFT)
	alu[tq_offset, mask_tqofs, AND, *l$index0[1], >>INDIRECT_SHIFT]	;get offset in bytes (RTQnum)
#undef INDIRECT_SHIFT

	alu[tq_base, tq_base_for_port, +, tq_offset]

	; calc. offset from beginning of TQ element to get time-slot 
	alu[tq_offset, --, B, working_rtqlen, <<2] ; each element is 4 bytes (LW) wide
	sram[read, $vcq, tq_base, tq_offset, 1], sig_done[sram_read_dn_sched]

rt_update_vars#:
	; decrement DQRTlen
	alu[*l$index0[2], *l$index0[2], -, @one16]
	; increment Working_RTQlen
	alu[temp, @one8, +, *l$index0[3]]
	ld_field[*l$index0[3], 0010, temp]

rt_finish_tx#:
	alu[sig_mask, sig_mask, or, 1, <<&sram_read_dn_sched]	
	load_addr[rtn_reg, deq#]
	br[write_out#]	
.end
// ******************* End of schedule from real-time TQ *******************

// ******************* Schedule from non real-time TQ *******************
//Check if the current time queue length > 0.
//If not we need to load a new time queue from the DQ ring
nrt_tx#:
.begin 
.reg nrtdq_lm_consumer nrtdq_lm_producer
.reg nrtqlen working_nrtqlen nrtqnum 
.reg tq_offset_for_port tq_base_for_port

	alu[nrtqlen, 0xff, AND, *l$index0[3], >>16]	; NRTQlen
	alu[working_nrtqlen, 0xff, AND, *l$index0[3]]	; Working_NRTQlen
	; compare NRTQlen with Working_NRTQlen
	alu[--, nrtqlen, -, working_nrtqlen] 
    bgt[cur_nrt_tx#]

//Load the new time queue that needs to be serviced from the DQ ring
//Update the scheduler port state accordingly with the new 
//nrt time queue length and number
new_nrt_tx#:
	; get Nrtdq_lm_consumer
	alu[nrtdq_lm_consumer, 0x3c, AND, *l$index0[7], >>14]	// to get address in bytes

	; calculate LM offset from it to get RT DQ
	alu[lm_offset, dq_lm_offset, +, nrtdq_lm_consumer]
	alu[lm_offset, 16, +, lm_offset]	; NRT DQ is + 16 bytes from RTDQ
 	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]

	; increment Nrtdq_lm_consumer by 1 LW
	alu[nrtdq_lm_consumer, nrtdq_lm_consumer, +, 4]
	; write Nrtdq_lm_consumer to LM
	alu[nrtdq_lm_consumer, NRTDQ_SIZE_MASK, AND, nrtdq_lm_consumer, >>2]
	alu[nrtdq_lm_producer, 0xf0, AND, *l$index0[7], >>16]
	alu[temp, nrtdq_lm_producer, OR, nrtdq_lm_consumer]
	ld_field[*l$index0[7], 0100, temp, <<16]

	; load new NRTQnum for servicing from DQ
	alu[nrtqnum, mask_upper16, AND, *l$index1[0]]
	ld_field[*l$index0[1], 0011, nrtqnum]

	; load new TQlen
	alu[nrtqlen, --, b, *l$index1[0], >>16]
	; write TQlen to NRTQlen[port] and zero Working_NRTQlen
	ld_field[*l$index0[3], 0101, nrtqlen, <<16]
   	alu[working_nrtqlen, --, B, 0]	; Working_NRTQlen

//Transmit from the nrt TQ
//Compute the SRAM address to read the tq entry
//Issue SRAM read and jump to "write out"
cur_nrt_tx#:
	; Calculate offset from the beginning of RTQ table for given port using TQ_offset
#define_eval INDIRECT_SHIFT 	(16 - TQ_SIZE_SHIFT)	; we need tq_offset_for_port to be in bytes
	alu[tq_offset_for_port, mask_tqofs, AND, *l$index0[6], >>INDIRECT_SHIFT] ; to get TQ offset in bytes
#undef INDIRECT_SHIFT

	; Add NRTQ base to offset
	alu[tq_base_for_port, tq_offset_for_port, +, @_nrtq_sram_base]

	; Calculate offset to get currentely serviced TQ
#define_eval INDIRECT_SHIFT 		(TQ_SIZE_SHIFT)
	alu[tq_offset, mask_tqofs, AND, *l$index0[1], <<INDIRECT_SHIFT]	;get offset in bytes (NRTQnum)
#undef INDIRECT_SHIFT

	alu[tq_base, tq_base_for_port, +, tq_offset]

	; calc. offset from beginning of TQ element to get time-slot 
	alu[tq_offset, --, B, working_nrtqlen, <<2] ; each element is 4 bytes (LW) wide
	sram[read, $vcq, tq_base, tq_offset, 1], sig_done[sram_read_dn_sched]

nrt_update_vars#:
	; decrement DQNRTlen
	alu[*l$index0[2], *l$index0[2], -, 1]
	; increment Working_NRTQlen
	alu[temp, 1, +, *l$index0[3]]
	ld_field[*l$index0[3], 0001, temp]

nrt_finish_tx#:
	alu[sig_mask, sig_mask, or, 1, <<&sram_read_dn_sched]	
	load_addr[rtn_reg, deq#]
	br[write_out#]	
.end
// ******************* End of schedule from non real-time TQ *******************

// ******************* Schedule from UBR TQ *******************
//Check if the current time queue length > 0.
//If not we need to load a new time queue from the DQ ring
ubr_tx#:
.begin 
.reg ubrdq_lm_consumer ubrdq_lm_producer
.reg ubrtqlen working_ubrtqlen ubrtqnum 
.reg tq_offset_for_port tq_base_for_port

	alu[ubrtqlen, 0xff, AND, *l$index0[6], >>8]	; UBRTQlen
	alu[working_ubrtqlen, 0xff, AND, *l$index0[6]]	; Working_UBRTQlen
	; compare UBRTQlen with Working_UBRTQlen
	alu[--, ubrtqlen, -, working_ubrtqlen] 
    bgt[cur_ubrt_tx#]

//Load the new time queue that needs to be serviced from the DQ ring
//Update the scheduler port state accordingly with the new 
//ubrt time queue length and number
new_ubrt_tx#:
	; get Ubrdq_lm_consumer
	alu[ubrdq_lm_consumer, 0x3c, AND, *l$index0[7], >>6]	// to get address in bytes

	; calculate LM offset from it to get RT DQ
	alu[lm_offset, dq_lm_offset, +, ubrdq_lm_consumer]
	alu[lm_offset, 32, +, lm_offset]	; UBRDQ is + 32 bytes from RTDQ
 	local_csr_wr[ACTIVE_LM_ADDR_1, lm_offset]

	; increment Ubtdq_lm_consumer by 1 LW
	alu[ubrdq_lm_consumer, ubrdq_lm_consumer, +, 4]
	; write Ubrdq_lm_consumer to LM
	alu[ubrdq_lm_consumer, UBRDQ_SIZE_MASK, AND, ubrdq_lm_consumer, >>2]
	alu[ubrdq_lm_producer, 0xf0, AND, *l$index0[7], >>8]
	alu[temp, ubrdq_lm_producer, OR, ubrdq_lm_consumer]
	ld_field[*l$index0[7], 0010, temp, <<8]

	; load new UBRTQnum for servicing from DQ
	alu[ubrtqnum, mask_upper16, AND, *l$index1[0]]
	ld_field[*l$index0[5], 1100, ubrtqnum, <<16]

	; load new TQlen
	alu[ubrtqlen, --, b, *l$index1[0], >>16]
	; write TQlen to UBRTQlen[port] and zero Working_UBRTQlen
	ld_field[*l$index0[6], 0011, ubrtqlen, <<8]
   	alu[working_ubrtqlen, --, B, 0]	; Working_UBRTQlen

//Transmit from the ubr TQ
//Compute the SRAM address to read the tq entry
//Issue SRAM read and jump to "write out"
cur_ubrt_tx#:
	; Calculate offset from the beginning of RTQ table for given port using TQ_offset
#define_eval INDIRECT_SHIFT 	(16 - TQ_SIZE_SHIFT)	; we need tq_offset_for_port to be in bytes
	alu[tq_offset_for_port, mask_tqofs, AND, *l$index0[6], >>INDIRECT_SHIFT] ; to get TQ offset in bytes
#undef INDIRECT_SHIFT

	; Add UBRTQ base to offset
	alu[tq_base_for_port, tq_offset_for_port, +, @_ubrtq_sram_base]

	; Calculate offset to get currentely serviced TQ
#define_eval INDIRECT_SHIFT 		(16 - TQ_SIZE_SHIFT)
	alu[tq_offset, mask_tqofs, AND, *l$index0[5], >>INDIRECT_SHIFT]	;get offset in bytes (UBRTQnum)
#undef INDIRECT_SHIFT

	alu[tq_base, tq_base_for_port, +, tq_offset]

	; calc. offset from beginning of TQ element to get time-slot 
	alu[tq_offset, --, B, working_ubrtqlen, <<2] ; each element is 4 bytes (LW) wide
	sram[read, $vcq, tq_base, tq_offset, 1], sig_done[sram_read_dn_sched]

ubrt_update_vars#:
	; decrement DQUBRTlen
	alu[*l$index0[5], *l$index0[5], -, 1]
	; increment Working_UBRTQlen
	alu[temp, *l$index0[6], +, 1]
	ld_field[*l$index0[6], 0001, temp]

ubrt_finish_tx#:
	alu[sig_mask, sig_mask, or, 1, <<&sram_read_dn_sched]	
	load_addr[rtn_reg, deq#]
	br[write_out#]	
.end
// ******************* End of schedule from UBR TQ *******************

///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

//Schedule from the High bit rate time queue
//VCQ entry is already known, since the length for this VCQ 
//was previously checked. 
//Just decrement the length of this VCQ by 1 and jump to "write out" 
hbr_tx#:
	alu[hbr_count, hbr_count, -, 1]
	alu[--, vcq_reg, and, 0x1]
	bne[hi_word#]
	ld_field[*l$index1[0], 0011, hbr_count]
	br[tx_cont#]
hi_word#:
	ld_field[*l$index1[0], 1100, hbr_count, <<16]
tx_cont#:
	alu[vcq_reg, vcq_reg, or, in_port_no, <<QM_REQ_PORT_OFFSET]
	alu[$vcq, vcq_reg, or, 1, <<31]
	load_addr[rtn_reg, scratch_put#]	
	br[write_out#]

#endif	//HBR_EXCLUDED
///////////////////////////////////////////////////////////////////////

//The actual cell dequeue message is sent out to the QM from here
//This is executed after the thread returns from finishing the functions
//of "write out"
/*
	DEQ request format:

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0|V|R|        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	

	V - Valid bit
	R - Reserved

*/
deq#:
	alu[$vcq, $vcq, or, 1, <<31]

scratch_put#:
    br_inp_state[ring_full, retry#]
 	scratch[put, $vcq, 0, qmring_num, 1], sig_done[scratch_put_dn]
	alu[sig_mask, sig_mask, or, 1, <<&scratch_put_dn]

//check if the new tq has to be loaded into the Deqprture queue in LM.
//This is done by polling the load_newtq flag variable
//If so jump to "load new tq"
check_for_load_new_tq#:
	br=byte[load_newtq, 0, 1, load_new_tq#]

//end of the scheduler functions.
end#:
   	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]	

   	ctx_arb[--], br[end_sched#]
; -------------------------------------------------------------------------------

#if defined(FLOW_CONTROL) || defined(FLOW_CONTROL_NEW)
//If Flow Control, there is nothing to schedule for this 
//cell tx slot. Jump to write out.
FC_on#:
	; increment RT_for_the_LBR_TQ
	alu[*l$index0[0], @one24, +, *l$index0[0]]
	load_addr[rtn_reg, check_for_load_new_tq#]
	br[write_out#]

#endif
//retry to see if the scratch ring has space to accomodate
//the current deq request,
retry#:
	br[scratch_put#]

;--------------------------------------------------------------------------------


issue_read_newtq_and_txcount#:

//This macro issues a read for new rt, nrt & ubr time queue
//to be loaded into the departure rings

	_issue_read_newtq_and_txcount($newtqlen0, $newtqlen1, $newtqlen2, \
		$sr_head, $sr_tail, load_newtq, sig_mask, serv_port_lm_base, \
		sram_read_dn_newtq0, sram_read_dn_newtq0, sram_read_dn_newtq2, \
		cap_read_head_dn, cap_read_tail_dn)

///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED
//Macro functionality: Check if it is time yet to poll for a 
//new HBR TQ in SRAM. A new HBR TQ will be available in SRAM whenever 
//a HBR VC arrives or leaves the system. If it is time, then, check if 
//a new HBR TQ is present in SRAM by examining a particular location 
//in SRAM. If new HBR TQ is present, load the TQ into LM, in
//place of the old one.

	_check_and_load_new_hbr_tq()
#endif
///////////////////////////////////////////////////////////////////////

//branch to where you left off.
	br[continue#]


load_new_tq#:
//happens deterministically once every AGGREGATION slots
//Put the <TQnum, TQlen> into respective software rings 
//for real and non real time  and ubr traffic, if TQlen > 0.
//Increase the lengths of the DQ by the current TQ length.
//Also reset the flag variable load_newtq.
	_load_tq_into_dqrings(load_newtq, $newtqlen0, $newtqlen1, $newtqlen2)


#if defined(FLOW_CONTROL) || defined(FLOW_CONTROL_NEW)
//This macro Checks for flow control to be asserted or de-asserted
//on a port.
    
	_flow_control($txcount, $sr_head, $sr_tail)
	.io_completed $sr_head, $sr_tail
#endif


//Branch to the end of the scheduler.
	br[end#]


;--------------------------- WriteOut loop --------------------------------------
/* 
	Shaper -> Scheduler message format:

	1) UBR w/priority VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |         Reserved		   | PRI |    cell_count       |
       +---------------+---------------+---------------+---------------+

	2) Other LBR VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |   Reserved    	 |         t1						   |
       +---------------+---------------+---------------+---------------+

	1) HBR VCs

	    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
        1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
       +---------------+---------------+---------------+---------------+
    LW0| R |        Port #       |R|R|               VCQ#              |
       +---------------+---------------+---------------+---------------+	
    LW1|S|CODE |         Reserved		         |    cell_count       |
       +---------------+---------------+---------------+---------------+

	R - Reserved

	S - Speed (0 - LBR, 1 - HBR)
*/

//The writeout functionality begins here.

#ifdef DOUBLE_WRITEOUT
write_out#:
	.begin 
	.reg local_rtn_reg 
		load_addr[local_rtn_reg, repeat_write_out#]
		atm_tm_write_out(local_rtn_reg)

repeat_write_out#:
		local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]
		alu[sig_mask, --, b, 1, <<&next_thread_sig]

		ctx_arb[--]

		.io_completed scratch_put_dn sram_write_dn0 
		atm_tm_write_out(rtn_reg)
	.end
#else // !DOUBLE_WRITEOUT
write_out#:

	atm_tm_write_out(rtn_reg)

#endif //DOUBLE_WRITEOUT
;--------------------------- The end of WriteOut loop --------------------------------
end_sched#:
	.io_completed scratch_put_dn sram_write_dn0 
.end 
#endm
//------------------------------------------------------------------
// _issue_read_newtq_and_txcount()
//
//    Description: This macro issues a read for new rt, nrt & ubr time queue
//					to be loaded into the departure rings. It also reads AAL5 Tx
//					scratch head and tail for flow control purposes.
//
//    Parameters: None.
//		  Inputs:
//			in_lmbase - LM index to currently serviced port's PortInfo table
//			in_rtqlen_sig, in_nrtqlen_sig, in_ubrtqlen_sig 
//					- signals for reading TQlens for all CoSes
//			in_head_sig, in_tail_sig - signals for reading AAL5 Tx scratch's head ad tail
//			io_flag_var:  Flag variable to indicate that a time queue read
//			out_sr_head, out_sr_tail - read values of AAL5 Tx head and tail pointers

//		Inputs/Outputs:
//			io_sig_mask - mask used to keep signals used by this macro

// 		 Outputs:
//			out_rtqlen, out_nrtqlen, out_ubrqlen - 
//					Real time/Non Real/UBR time queue lengths read from sram
//------------------------------------------------------------------

#macro _issue_read_newtq_and_txcount(out_rtqlen, out_nrtqlen, out_ubrqlen, \
				out_sr_head, out_sr_tail, io_flag_var, io_sig_mask,in_lmbase, \
				in_rtqlen_sig, in_nrtqlen_sig, in_ubrtqlen_sig, in_head_sig, in_tail_sig)
.begin

.reg tmp producer consumer tql_offset_for_port curtq
.reg tq_ofs rt_base, nrt_base ubrt_base old_lm

	; set RT_for_the_LBR_TQ to zero. Next read will be when RT=AGGREGATION
	ld_field[*l$index0[0], 1000, 0] 

//set load_newtq flag variable to one, indicating that the
//SRAM read for loading new tq will be issued shortly.
	alu[io_flag_var, --, b, 1]

//check if the departure queue rings are full. If yes,
//you cannot load the new tq into the departure queue.
	; get rt_producer and rt_consumer
	alu[producer, RTDQ_SIZE_MASK, AND, *l$index0[7], >>28]
	alu[consumer, RTDQ_SIZE_MASK, AND, *l$index0[7], >>24]
	alu[producer, 0x1, +, producer]
	;mask it
	alu[producer, RTDQ_SIZE_MASK, AND, producer]
	; calculate (producer+1)mod4 == consumer
	alu[tmp, consumer, -, producer]
	beq[lm_ringfull#]

	; get nrt_producer and nrt_consumer
	alu[producer, NRTDQ_SIZE_MASK, AND, *l$index0[7], >>20]
	alu[consumer, NRTDQ_SIZE_MASK, AND, *l$index0[7], >>16]
	alu[producer, 0x1, +, producer]
	;mask it
	alu[producer, NRTDQ_SIZE_MASK, AND, producer]
	; calculate (producer+1)mod4 == consumer
	alu[tmp, consumer, -, producer]
	beq[lm_ringfull#]

	; get ubr_producer and ubr_consumer
	alu[producer, UBRDQ_SIZE_MASK, AND, *l$index0[7], >>12]
	alu[consumer, UBRDQ_SIZE_MASK, AND, *l$index0[7], >>8]
	alu[producer, 0x1, +, producer]
	;mask it
	alu[producer, UBRDQ_SIZE_MASK, AND, producer]
	; calculate (producer+1)mod4 == consumer
	alu[tmp, consumer, -, producer]
	beq[lm_ringfull#]

#ifdef FLOW_CONTROL_NEW
// in this case we'll compute xmitcount based on value of head and tail pointers 
// to scratch ring between QM and AAL5 TX - get head pointer here
	#define_eval TX_SCR_RING 	QM_TO_TX_SCR_RING

	cap[read, out_sr_head, SCRATCH_RING_HEAD_/**/TX_SCR_RING], sig_done[in_head_sig]
	alu[io_sig_mask, io_sig_mask, or, 1, <<&in_head_sig]	

	cap[read, out_sr_tail, SCRATCH_RING_TAIL_/**/TX_SCR_RING], sig_done[in_tail_sig]
	alu[io_sig_mask, io_sig_mask, or, 1, <<&in_tail_sig]

    #undef TX_SCR_RING

#endif // FLOW_CONTROL_NEW

//Increment curTQ and mask it with MaxTQmask.
	; increment CurTQ
	alu[curtq, mask_upper16, AND, *l$index0[0], >>8]
	alu[curtq, curtq, +, 1]
	; load MaxTQmask
	alu[curtq, curtq, AND, *l$index0[4], >>12]
    ld_field[*l$index0[0], 0110, curtq, <<8]    
//issue read for the lengths of RTQ, NRTQ and UBRTQo curTQ from SRAM
/* 
	TQlen table
				LW0
			   +--------+
Element 1	   | TQ len |
			   +--------+
Element 2	   | TQ len |
			   +--------+
				  ...
			   +--------+	
Element TQ_TOT | TQ len |
			   +--------+
*/
len_read#:
	; get CurTQ
	alu[tq_ofs, --, b, curtq, <<TQLEN_ENTRY_SIZE_SHIFT]	; each TQlen entry is TQLEN_ENTRY_SIZE B
	; set clear masks to 0xffffffff
	alu[out_rtqlen, --, ~b, 0]
	alu[out_nrtqlen, --, ~b, 0]
	alu[out_ubrqlen, --, ~b, 0]
	; calculating base addres for that port
#define_eval INDIRECT_SHIFT 	(16 - TQLEN_ENTRY_SIZE_SHIFT)	; we need tq_offset_for_port to be in bytes
	alu[tql_offset_for_port, mask_tqlofs, AND, *l$index0[6], >>INDIRECT_SHIFT]
#undef INDIRECT_SHIFT
	alu[rt_base, _rtqlen_sram_base, +, tql_offset_for_port]
	alu[nrt_base, _nrtqlen_sram_base, +, tql_offset_for_port]
	alu[ubrt_base, _ubrtqlen_sram_base, +, tql_offset_for_port]

	sram[test_and_clr, out_rtqlen, rt_base, tq_ofs], sig_done[in_rtqlen_sig]
    // According to PRM all atomic operations are serviced in order so 
    // there is no need to wait for signals of first two test_and_clr operations.
    // It's enugh to wait for last one
    .io_completed in_rtqlen_sig
	sram[test_and_clr, out_nrtqlen, nrt_base, tq_ofs], sig_done[in_nrtqlen_sig]
	sram[test_and_clr, out_ubrqlen, ubrt_base, tq_ofs], sig_done[in_ubrtqlen_sig]

	alu[io_sig_mask, io_sig_mask, or, 1, <<&in_rtqlen_sig]
	alu[io_sig_mask, io_sig_mask, or, 1, <<(&in_rtqlen_sig+1)]
//	alu[io_sig_mask, io_sig_mask, or, 1, <<&in_nrtqlen_sig]
//	alu[io_sig_mask, io_sig_mask, or, 1, <<(&in_nrtqlen_sig+1)]
	alu[io_sig_mask, io_sig_mask, or, 1, <<&in_ubrtqlen_sig]
	alu[io_sig_mask, io_sig_mask, or, 1, <<(&in_ubrtqlen_sig+1)]

	br[end_macro#]

//At least one DQ ring is full. Can't do loading of new time queues.
//Hence set the flag to zero and go back to where you left off
lm_ringfull#:
	alu[io_flag_var, --, b, 0]

end_macro#:
.end
#endm



//------------------------------------------------------------------
// _load_tq_into_dqrings()
//
//    Description: Move the time queue poiner and length into real time
//					and non-real time departure queues in the local
//					memory of scheduler 
//
//    Parameters: None.
//		  Inputs:
//				flag_var: flag variable to indicate whether departure queues 
//				have to be loaded or not
//				rtqlen, nrtqlen: the real time and non real time time queue
//				lengths read from SRAM
//			
// 		 Outputs:
//			None.
//------------------------------------------------------------------

#macro _load_tq_into_dqrings(out_flag_var, in_rtqlen, in_nrtqlen, in_ubrqlen)
.begin

.reg lmem_ofs producer temp old_lm curtq_load
	; set lm base to serviced port 
	local_csr_rd[ACTIVE_LM_ADDR_0]
	immed[old_lm, 0]

	local_csr_wr[ACTIVE_LM_ADDR_0, serv_port_lm_base]
	nop
	nop
	alu[out_flag_var, --, b, 0]
	; increment CurTQ
	alu[curtq_load, mask_upper16, AND, *l$index0[0], >>8]

/*
	Departure Queue (DQ) Table in LM

    32               16                 0
    +-----------------+-----------------+---RTDQ cache
  0 |     RTQ len     |   RTQ number	|
    +-----------------+-----------------+
  1 |     RTQ len     |   RTQ number	|
    +-----------------+-----------------+
  2 |     RTQ len     |   RTQ number	|
    +-----------------+-----------------+
  3 |     RTQ len     |   RTQ number	|
    +-----------------+-----------------+---NRTDQ cache
  4 |     NRTQ len    |  NRTQ number	|
    +-----------------+-----------------+
  5 |     NRTQ len    |  NRTQ number	|
    +-----------------+-----------------+
  6 |     NRTQ len    |  NRTQ number	|
    +-----------------+-----------------+
  7 |     NRTQ len    |  NRTQ number	|
    +-----------------+-----------------+---UBRDQ cache
  8 |   UBRTQ len     |  UBRTQ number	|
    +-----------------+-----------------+
  9 |   UBRTQ len     |  UBRTQ number	|
    +-----------------+-----------------+
 10 |   UBRTQ len     |  UBRTQ number	|
    +-----------------+-----------------+
 11 |   UBRTQ len     |  UBRTQ number	|
    +-----------------+-----------------+---Reserved
 13 |              Reserved             |
    +-----------------+-----------------+
 14 |              Reserved             |
    +-----------------+-----------------+
 15 |              Reserved             |
    +-----------------+-----------------+
 16 |              Reserved             |
    +-----------------+-----------------+

*/

//Do the processing for real time time queue
//If the time queue length is zero, no need to move into
//Departure queue. Just move on to process the non real time 
//time queue
	rt_ring#:  
		; Checking if there is anything to load to TQ
		alu[tq_len, --, b, in_rtqlen]
	  	beq[nrt_ring#]
		; Make sure that TQlen < RT_MAX_LEN
		alu[--, tq_len, -, TQ_MAX_LEN]	
		bgt[rt_ring_full#]	; set tq_len to RT_MAX_LEN
	rt_ring_continue#:
		; loading rtdq_producer
		alu[producer, 0x3c, AND, *l$index0[7], >>26]	// >>28 ant <<2 to get address in bytes
		alu[lmem_ofs, dq_lm_offset, +, producer]	; dq_lm_offset - DQ table base address for given port
		local_csr_wr[ACTIVE_LM_ADDR_1, lmem_ofs]
		alu[producer, producer, +, 4]
		; prepare rtdq_lm_prod/cons byte
		alu[temp, 0x0f, AND, *l$index0[7], >>24]	; get consumer
		alu[producer, RTDQ_SIZE_SHIFTED_MASK, AND, producer, <<2]
		alu[producer, temp, OR, producer]	; producer | consumer
		ld_field[*l$index0[7], 1000, producer, <<24]

//write the time queue pointer and the time queue length
//into the departure queue
	rt_write_ring#:
		alu[*l$index1[0], curtq_load, or, tq_len, <<16]
		; increment DQRTlen by the length of the current TQ
		alu[temp, --, B, *l$index0[2], >>16]
		alu[temp, tq_len, +, temp]
		ld_field[*l$index0[2], 1100, temp, <<16]


//Do the processing for non real time time queue
//If the time queue length is zero, no need to move into
//Departure queue.
	nrt_ring#:
		; Checking if there is anything to load to TQ
		alu[tq_len, --, b, in_nrtqlen]
		beq[ubr_ring#]
		; Make sure that TQlen < NRT_MAX_LEN
		alu[--, tq_len, -, TQ_MAX_LEN]	
		bgt[nrt_ring_full#]	; set tq_len to NRT_MAX_LEN
	nrt_ring_continue#:
		; loading nrtdq_producer
		alu[producer, 0x3c, AND, *l$index0[7], >>18]	// >>20 ant <<2 to get address in bytes
		alu[lmem_ofs, dq_lm_offset, +, producer]	; dq_lm_offset - DQ table base address for given port
		alu[lmem_ofs, NRTDQ_LM_OFFSET, +, lmem_ofs]				; offset from RTdq
		local_csr_wr[ACTIVE_LM_ADDR_1, lmem_ofs]
		alu[producer, producer, +, 4]
		; prepare nrtdq_lm_prod/cons byte
		alu[temp, 0x0f, AND, *l$index0[7], >>16]
		alu[producer, NRTDQ_SIZE_SHIFTED_MASK, AND, producer, <<2]
		alu[producer, temp, OR, producer]
		ld_field[*l$index0[7], 0100, producer, <<16]
//write the time queue pointer and the time queue length
//into the departure queue
	nrt_write_ring#:
		alu[*l$index1[0], curtq_load, or, tq_len, <<16]
		; increment DQNRTlen by the length of the current TQ
		alu[temp, tq_len, +, *l$index0[2]]
		ld_field[*l$index0[2], 0011, temp]

//Do the processing for ubr time queue
//If the time queue length is zero, no need to move into
//Departure queue. 
	ubr_ring#:    
		; Checking if there is anything to load to TQ
		alu[tq_len, --, b, in_ubrqlen]  
		beq[end_macro#]
		; Make sure that TQlen < UBRT_MAX_LEN
		alu[--, tq_len, -, TQ_MAX_LEN]	
		bgt[ubrt_ring_full#]	; set tq_len to UBRT_MAX_LEN
	ubrt_ring_continue#:
		; loading ubrdq_producer
		alu[producer, 0x3c, AND, *l$index0[7], >>10]	// >>12 ant <<2 to get address in bytes
		alu[lmem_ofs, dq_lm_offset, +, producer]	; dq_lm_offset - DQ table base address for given port
		alu[lmem_ofs, UBRDQ_LM_OFFSET, +, lmem_ofs]				; offset from RTdq
		local_csr_wr[ACTIVE_LM_ADDR_1, lmem_ofs]
		alu[producer, producer, +, 4]
		; prepare ubrdq_lm_prod/cons byte
		alu[temp, 0x0f, AND, *l$index0[7], >>8]
		alu[producer, UBRDQ_SIZE_SHIFTED_MASK, AND, producer, <<2]
		alu[producer, temp, OR, producer]
		ld_field[*l$index0[7], 0010, producer, <<8]

//write the time queue pointer and the time queue length
//into the departure queue
	ubr_write_ring#:	
		alu[*l$index1[0], curtq_load, or, tq_len, <<16]
		; increment DQUBRTlen by the length of the current TQ
		alu[temp, tq_len, +, *l$index0[5]]
		ld_field[*l$index0[5], 0011, temp]
		br[end_macro#]

rt_ring_full#:
		alu[tq_len, --, B, TQ_MAX_LEN]
		br[rt_ring_continue#]

nrt_ring_full#:
		alu[tq_len, --, B, TQ_MAX_LEN]
		br[nrt_ring_continue#]

ubrt_ring_full#:
		alu[tq_len, --, B, TQ_MAX_LEN]
		br[ubrt_ring_continue#]

end_macro#:
	local_csr_wr[ACTIVE_LM_ADDR_0, old_lm]
.end
#endm


//------------------------------------------------------------------
// _flow_control()
//
//    Description: Checks for flow control to be asserted or de-asserted
//					on a port.
//
//    Parameters: None.
//		  Inputs:
//			xmitcount: The transmit count read from SRAM
// 		 Outputs:
//			None.
//------------------------------------------------------------------

#macro _flow_control(xmitcount, sr_head, sr_tail)
.begin
.reg diff head_temp tail_temp

	; compare head and tail, the diff contains number of cells awaiting transmission in SCR
	; make sure that value of head and tail are properly masked according to defined SCR size
	; consider that one cell occupies 2 LW in QM to TX SCR
	alu_shf[head_temp, ((QM_TO_TX_SCR_RING_SIZE / 2) - 1), AND, sr_head, >>3]
	alu_shf[tail_temp, ((QM_TO_TX_SCR_RING_SIZE / 2) - 1), AND, sr_tail, >>3]
	alu[diff, tail_temp, -, head_temp]
	bge[fc_delta_check#]

	; looks like tail turn arround occured 
	alu[tail_temp, tail_temp, +, (QM_TO_TX_SCR_RING_SIZE / 2)]
	alu[diff, tail_temp, -, head_temp]

fc_delta_check#:
	alu[--, diff, -, FC_DELTA]
	blt[reset_fc#]
	; set FC bit
	alu[*l$index0[0], *l$index0[0], OR, 1] 
	br[end_macro#]

reset_fc#:
	; clear FC bit
	alu[*l$index0[0], *l$index0[0], AND~, 1] 

end_macro#:



.end
#endm


///////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED

//------------------------------------------------------------------
// _check_and_load_new_hbr_tq()
//
//    Description: 	Check if it is time yet to poll for a new HBR TQ in SRAM
//					A new HBR TQ will be available in SRAM whenever a HBR VC arrives
//					or leaves the system.IF it is time, then, check if a new HBR TQ 
//					is present in SRAM by examining a particular location in SRAM.
//					If new HBR TQ is present, load the TQ into LM, in
//					place of the old one.
//
//    Parameters: None.
//		  Inputs:
//			None.
// 		 Outputs:
//			None.
//------------------------------------------------------------------

#macro _check_and_load_new_hbr_tq()
.begin
.reg _hbr_tq_sram_indicator_base 
.sig sram_read_dn_indicator


	alu[@reload_hbrtq_timer, @reload_hbrtq_timer, +, AGGREGATION]
	alu[--, @reload_hbrtq_timer_max, -, @reload_hbrtq_timer]
	bgt[end_macro#]

	alu[@reload_hbrtq_timer, --, b, 0]

	xbuf_alloc($indicator, 1, read_write)

	//read SRAM to see if there is an indication from Xscale that the HBR TQ
	//has changed
	//If yes read it.
	//If not, exit
	alu[$indicator[0], --, ~b, 0]
	immed32(_hbr_tq_sram_indicator_base, HBR_TQ_SRAM_INDICATOR)
	sram[test_and_clr, $indicator[0], _hbr_tq_sram_indicator_base, 0], sig_done[sram_read_dn_indicator]
	ctx_arb[sram_read_dn_indicator]    

	alu[--, --, b, $indicator[0]]
	beq[end_macro#]

    xbuf_free($indicator)

    _read_hbr_tq_sram()

end_macro#:
.end
#endm

#endif
///////////////////////////////////////////////////////////////////////


//------------------------------------------------------------------
// _get_port_entry(in_port_no)
//
//    Description: 	Checks if per port tables are in LM using CAM
//					If they are in CAM it returns LM address of it, otherwise it is doing
//					SRAM read, fill LM with PortInfo/UBRwPRI and DQ entry and return LM address of it
//
//    Parameters: 
//		  Inputs: in_port_no - 0...2047
// 		 Outputs: Sets LM index0/index1 base
//------------------------------------------------------------------
#macro _get_port_entry(in_port_no)
.begin
.reg cam_result  new_address_offset old_address_offset
.reg tmp cam_entry _dq_base
.reg entry_size  port_no lm_offset _portinfo_base

; allocate  buffers for SRAM/DRAM operation
///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	#define TX_REG_TYPE		$$port_
#else  
	#define TX_REG_TYPE		$port_
#endif

#ifdef PORTINFO_IN_SDRAM

	.reg TX_REG_TYPE/**/data_rw[28]
	.xfer_order TX_REG_TYPE/**/data_rw

#else	// PORTINFO_IN_SRAM

	.reg TX_REG_TYPE/**/data_rw[16]
	.xfer_order TX_REG_TYPE/**/data_rw

#endif	// PORTINFO_IN_SDRAM
///////////////////////////////////////


	; calculate offset address in SRAM/DRAM for PortInfo table for port#
	alu[new_address_offset, --, b, in_port_no, <<PORTINFO_XXRAM_OFFSET_LOG2]
	; offset in DRAM for DQ table for port# is the same as offset for PortInfo table

    cam_lookup[cam_result, new_address_offset]

	; Get the cam entry
	alu[cam_entry, 0xf, and, cam_result, >>3]

	; Get LRU's address offset in DRAM
	cam_read_tag[old_address_offset, cam_entry]

	; Signal next thread
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	

	; set constants
	immed32[_portinfo_base, PORTINFO_SDRAM_BASE]

#ifdef PORTINFO_IN_SRAM
	immed32[_dq_base, DQ_SRAM_BASE]
#endif

	; preparing LM base address for PortInfo table
	alu[lm_offset, 0x78, and, cam_result]	; entry number is <<3
	alu[lm_offset, --, B, lm_offset, <<3]	; <<3 
	alu[tmp, PORTINFO_LM_BASE, +, lm_offset]
	local_csr_wr[active_lm_addr_0, tmp]

	; preparing LM base address for DQ table
	alu[dq_lm_offset, @dq_lm_base, +, lm_offset]
	local_csr_wr[active_lm_addr_1, dq_lm_offset]
	; jump if lookup's missed
	br_bset[cam_result, 7, cam_hit#]
	nop

cam_miss#:
	; Read state
	cam_read_state[cam_result, cam_entry]
	; Making this CAM entry to be MRU
	cam_write[cam_entry, new_address_offset, 0]

/* *********** PortInfo table reloading *********** */
#ifdef PORTINFO_IN_SDRAM
	; Read data from SDRAM
	dram_read(TX_REG_TYPE/**/data_rw[0], _portinfo_base, new_address_offset, 14, sram_read_dn0_wo, SIG_NONE, ___);
#else	// PORTINFO_IN_SRAM
	; Read data from SRAM
	sram_read(TX_REG_TYPE/**/data_rw[0], _portinfo_base, new_address_offset, 16, sram_read_dn0_wo, SIG_NONE, ___)
#endif	// PORTINFO_IN_SDRAM

	; Copy PortInfo from LM to SRAM/DRAM	(0..7)
	; Copy UBRwPRI from LM to SRAM/DRAM	(8..15)
	#define_eval i 0
	#while (i < 16)
		alu[TX_REG_TYPE/**/data_rw/**/[i], --,b, *l$index0[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i

///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	br_bset[cam_result, 8, no_valid_dq#] ; Check if we have valid DQ in LM
	; Copy DepartureQueue from LM to DRAM
	#define_eval i 0
	#while (i < 12)
		#define_eval j	(i+16)
		alu[TX_REG_TYPE/**/data_rw/**/[j], --,b, *l$index1[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i
	#undef j
	; Saving old PortInfo, UBRwPRI and DQ tables
	dram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 14, sram_write_dn0, SIG_NONE, ___)
	br[valid_dq#]
no_valid_dq#:
	; Saving old PortInfo and UBRwPRI tables
	dram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 8, sram_write_dn0, SIG_NONE, ___)
valid_dq#:
#else 	// PORTINFO_IN_SRAM
	; Saving old PortInfo and UBRwPRI tables
	sram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 16, sram_write_dn0, SIG_NONE, ___)
#endif	// PORTINFO_IN_SDRAM
///////////////////////////////////////

	ctx_arb[sram_read_dn0_wo, sram_write_dn0, next_thread_sig]

	; Copy PortInfo from SRAM/DRAM to LM	(0..7)
	; Copy UBRwPRI from SRAM/DRAM to LM	(8..15)
	#define_eval i 0
	#while (i < 16)
		alu[*l$index0[/**/i], --,b, TX_REG_TYPE/**/data_rw/**/[i]]
		#define_eval i (i+1)
	#endloop
	#undef i

///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	; Copy DQ rings from DRAM to LM
	#define_eval i 0
	#while (i < 12)
		#define_eval j	(i+16)
		alu[*l$index1[/**/i], --,b, TX_REG_TYPE/**/data_rw/**/[j]]
		#define_eval i (i+1)
	#endloop
	#undef i
	#undef j
#else	//  PORTINFO_IN_SRAM

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
	sram_read(TX_REG_TYPE/**/data_rw[0], _dq_base, new_address_offset, 12, sram_read_dn0_wo, SIG_NONE, ___)

	; Copy DQ rings from LM to SRAM
	#define_eval i 0
	#while (i < 12)
		alu[TX_REG_TYPE/**/data_rw/**/[i], --, b, *l$index1[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i

	; write DQ to SRAM
	sram_write(TX_REG_TYPE/**/data_rw[0], _dq_base, old_address_offset, 12, sram_write_dn0, SIG_NONE, ___)

	ctx_arb[sram_read_dn0_wo, sram_write_dn0, next_thread_sig]

	; Copy DQ rings from SRAM to LM
	#define_eval i 0
	#while (i < 12)
		alu[*l$index1[/**/i], --,b, TX_REG_TYPE/**/data_rw/**/[i]]
		#define_eval i (i+1)
	#endloop
	#undef i

#endif	// PORTINFO_IN_SDRAM
///////////////////////////////////////

	br[cam_end#]

cam_hit#:	// ctx swap added to maintain thread sync.
	br_bclr[cam_result, 8, hit_valid_dq#] ; Check if we have valid DQ in LM
#ifdef PORTINFO_IN_SDRAM
	alu[new_address_offset, new_address_offset, +, 64]
	dram_read(TX_REG_TYPE/**/data_rw[0], _portinfo_base, new_address_offset, 6, sram_read_dn0_wo, SIG_NONE, ___);
#else
	sram_read(TX_REG_TYPE/**/data_rw[0], _dq_base, new_address_offset, 12, sram_read_dn0_wo, SIG_NONE, ___)
#endif
	cam_write_state[cam_entry, 0] ; Clear invalid DQ bit for that CAM entry
	ctx_arb[sram_read_dn0_wo, next_thread_sig]

	; Copy DQ rings from SRAM/DRAM to LM
	#define_eval i 0
	#while (i < 12)
		alu[*l$index1[/**/i], --,b, TX_REG_TYPE/**/data_rw/**/[i]]
		#define_eval i (i+1)
	#endloop
	#undef i

	br[hit_continue#]
hit_valid_dq#:
	ctx_arb[next_thread_sig]
hit_continue#:
#ifndef PORTINFO_IN_SDRAM
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
	ctx_arb[next_thread_sig]
#endif

cam_end#:
.end; cam_result 
#undef TX_REG_TYPE
#endm


//------------------------------------------------------------------
// _get_port_entry_wo(in_port_no)
//
//    Description: 	Checks if PortInfo[n] and DQ are in LM using CAM
//					If they aren't then swaps LRU with SRAM data for given port
//
//    Parameters: 
//		  Inputs: in_port_no - 0...2047
// 		 Outputs: Sets LM index0/index1 base
//------------------------------------------------------------------
#macro _get_port_entry_wo(in_port_no)
.begin
.reg cam_entry_number cam_result  new_address_offset old_address_offset
.reg tmp old_dq_lm_base no_dq_in_lm _dq_base
.reg entry_size port_no lm_offset _portinfo_base

	; calculate offset address in SRAM/DRAM for PortInfo table for port#
	alu[new_address_offset, --, b, in_port_no, <<PORTINFO_XXRAM_OFFSET_LOG2]

    cam_lookup[cam_result, new_address_offset]

	; preparing LM base address for PortInfo table
	alu[lm_offset, 0x78, and, cam_result]
	alu[lm_offset, --, B, lm_offset, <<3]
	alu[lm_offset, PORTINFO_LM_BASE, +, lm_offset]
	local_csr_wr[active_lm_addr_0, lm_offset]

	; jump if lookup's missed
	br_bset[cam_result, 7, gpi_cam_hit#]
	; Get the cam entry
	alu[cam_entry_number, 0xf, and, cam_result, >>3]
	; Get LRU's address in RAM
	cam_read_tag[old_address_offset, cam_entry_number]

	local_csr_rd[ACTIVE_LM_ADDR_1]	; store old LM1 base to be restored at the end
	immed[old_dq_lm_base, 0]

	; preparing LM base address for DQ table
	alu[lm_offset, @dq_lm_base, +, lm_offset]
	local_csr_wr[active_lm_addr_1, lm_offset]
		
gpi_cam_miss#:

	; Read state
	cam_read_state[cam_result, cam_entry_number]
	; set constants
	immed32[_portinfo_base, PORTINFO_SDRAM_BASE]
#ifdef PORTINFO_IN_SRAM
	immed32[_dq_base, DQ_SRAM_BASE]
#endif
	; Making this entry as MRU 
	cam_write[cam_entry_number, new_address_offset, 1]

.begin
/* *********** PortInfo table reloading *********** */
	; allocate  buffers for SRAM/DRAM operation
///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	#define TX_REG_TYPE		$$port_
#else  
	#define TX_REG_TYPE		$port_
#endif
///////////////////////////////////////

///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	.reg TX_REG_TYPE/**/data_rw[28]
    .xfer_order TX_REG_TYPE/**/data_rw
	; Read data from SDRAM
	dram_read(TX_REG_TYPE/**/data_rw[0], _portinfo_base, new_address_offset, 8, sram_read_dn0_wo, SIG_NONE, ___);

#else	// PORTINFO_IN_SRAM

	.reg TX_REG_TYPE/**/data_rw[15]
	.xfer_order TX_REG_TYPE/**/data_rw
	; Read data from SRAM
	sram_read(TX_REG_TYPE/**/data_rw[0], _portinfo_base, new_address_offset, 16, sram_read_dn0_wo, SIG_NONE, ___)

#endif	// PORTINFO_IN_SDRAM

	; Copy PortInfo from LM to SRAM/DRAM	(0..7)
	; Copy UBRwPRI from LM to SRAM/DRAM	(8..15)
	#define_eval i 0
	#while (i < 16)
		alu[TX_REG_TYPE/**/data_rw/**/[i], --,b, *l$index0[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i

///////////////////////////////////////
#ifdef PORTINFO_IN_SDRAM
	br_bset[cam_result, 8, no_valid_dq#]
	; Copy DepartureQueue from LM to DRAM
	#define_eval i 0
	#while (i < 12)
		#define_eval j	(i+16)
		alu[TX_REG_TYPE/**/data_rw/**/[j], --,b, *l$index1[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i
	#undef j
	dram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 14, sram_write_dn0, SIG_NONE, ___)
	br[valid_dq#]
no_valid_dq#:
	dram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 8, sram_write_dn0, SIG_NONE, ___)
valid_dq#:
#else 	// PORTINFO_IN_SRAM
	sram_write(TX_REG_TYPE/**/data_rw[0], _portinfo_base, old_address_offset, 16, sram_write_dn0, SIG_NONE, ___)
#endif	// PORTINFO_IN_SDRAM
///////////////////////////////////////

	ctx_arb[sram_read_dn0_wo, sram_write_dn0, next_thread_sig]

	; Signal next thread
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	

	; Copy PortInfo from SRAM/DRAM to LM	(0..7)
	; Copy UBRwPRI from SRAM/DRAM to LM	(8..15)
	#define_eval i 0
	#while (i < 16)
		alu[*l$index0[/**/i], --,b, TX_REG_TYPE/**/data_rw/**/[i]]
		#define_eval i (i+1)
	#endloop
	#undef i

#ifndef PORTINFO_IN_SDRAM
	br_bset[cam_result, 8, no_valid_dq#] ; check if there is valid DQ in LM
	; Copy DQ rings from LM to SRAM
	#define_eval i 0
	#while (i < 12)
		alu[TX_REG_TYPE/**/data_rw/**/[i], --, b, *l$index1[/**/i]]
		#define_eval i (i+1)
	#endloop
	#undef i

	; write DQ to SRAM
	sram_write(TX_REG_TYPE/**/data_rw[0], _dq_base, old_address_offset, 12, sram_write_dn0, SIG_NONE, ___)

	ctx_arb[sram_write_dn0, next_thread_sig]
	br[valid_dq#]
no_valid_dq#:
	ctx_arb[next_thread_sig]
valid_dq#:
	; Signal next thread
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	
#endif
	local_csr_wr[active_lm_addr_1, old_dq_lm_base] ; restoring old LM1 base
	br[gpi_cam_end#]
.end

gpi_cam_hit#:
	// Making threads to run synchronously
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
#ifndef PORTINFO_IN_SDRAM
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	; signal next thread
#endif
gpi_cam_end#:
.end; cam_result cam_entry_number
#undef TX_REG_TYPE
#endm


//------------------------------------------------------------------
// _check_shift()
//
//    Description: 	Adjusts TQnum get from shaper in the way that we aren't
//					trying to schedule for TQ that is cached in DQ ring
//
//    Parameters:
//		  Inputs: 
//				in_curtq - number of TQ we should be servicing
//				in_xxtqnum - number of currently serviced TQ
//		  Inputs/Outputs:
//				io_tqnum - number of TQ to which we should schedule
//------------------------------------------------------------------
#macro _check_shift(io_tqnum, in_xxtqnum, in_curtq)
.begin
.reg diff_1 	; CurTQ  - xxtqnum
.reg diff_2 	; xxTQnum - CurTQ
.reg diff_3		; CurTQ - tqnum
.reg diff_4		; tqnum - CurTQ
.reg diff_5		; tqnum - xxTQnum
.reg diff_6		; xxTQnum - tqnum

.reg tqnumA		; tqnum register in GPR A
.reg xxtqnumA	; in_xxtqnum register in GPR A
.reg maxtqmask  ; MaxTQmask

// if (xxTQnum <= tqnum <= CurTQ) then tqnum = CurTQ + 1
	alu[diff_1, in_curtq, -, in_xxtqnum]
	bmi[wrap#] ; jump if RTQ > CurTQ

	; If | curTQ - RTQnum| >= RTDQ_SIZE then
	; there is no need to check if tqnum is between them
	alu[--, diff_1, -, RTDQ_SIZE]
	bgt[no_wrap#]

	move[tqnumA, tqnum]
	alu[diff_3, in_curtq, -, tqnum] ; CurTQ - tqnum
    beq[shift_needed#]
	alu[diff_5, tqnumA, -, in_xxtqnum] ; tqnum - RTQnum
    beq[shift_needed#]
	alu[--, diff_3, OR, diff_5]
	bmi[end#] ;jump if tqnum isn't in <xxTQnum, CurTQ> range

	alu[tqnum, in_curtq, +, 1] ; tqnum = CurTQ + 1
	; get MaxTQmask to mask incremented value
	alu[tqnum, tqnum, AND, *l$index0[4], >>12]
	br[end#]

wrap#:
    alu[maxtqmask, mask_upper21, AND, *l$index0[4], >>12]
	alu[diff_2, 1, +, maxtqmask] ; (xxTQnum - CurTQ)
	alu[diff_2, diff_2, +, diff_1] 
	alu[--, diff_2, -, RTDQ_SIZE]
	bgt[no_wrap#]

	move[xxtqnumA, in_xxtqnum]
	move[tqnumA, tqnum]
	alu[diff_4, tqnumA, -, in_curtq] ; tqnum - CurTQ
    beq[shift_needed#]
	alu[diff_6, xxtqnumA, -, tqnum] ; xxTQnum - tqnum
    beq[shift_needed#]
	alu[--, diff_4, OR, diff_6]
	bpl[end#] ;jump if tqnum isn't in <CurTQ, xxTQnum> range

	alu[tqnum, in_curtq, +, 1] ; tqnum = CurTQ + 1
	; get MaxTQmask to mask incremented value
	alu[tqnum, tqnum, AND, *l$index0[4], >>12]
	br[end#]

no_wrap#:
	alu[--, tqnum, -, in_curtq]
	bne[end#]

shift_needed#:
	alu[tqnum, in_curtq, +, 1] ; tqnum = CurTQ + 1
	; get MaxTQmask to mask incremented value
	alu[tqnum, tqnum, AND, *l$index0[4], >>12]

end#:
.end
#endm


//------------------------------------------------------------------
// _write_lbr()
//
//    Description: 	Puts cell into TQ - for LBR VCs
//
//    Parameters:
//		  Inputs: 
//				in_curtq - number of TQ we should be servicing
//				in_xxtqnum - number of currently serviced TQ
//		  Inputs/Outputs:
//				io_tqnum - number of TQ to which we should schedule
//------------------------------------------------------------------
#macro _write_lbr(io_sig_mask, in_tqnum, in_tqlen_sram_base, \
 	in_tq_sram_base, in_vcq, in_rtn_req, in_rd_sig, in_wr_sig, \
  	TQSIZE_SHIFT, TQMAX_LEN, TQL_SHIFT)

.begin
.reg $tq_len	; length of TQ 
.reg tq_offset	; offset within TQ on this port
.reg tq_base	; base addres for TQ given by tqnum
.reg tql_offset_for_port tql_base_for_port
.reg tq_offset_for_port tq_base_for_port
.reg tmp

	; get TQlen_offset for given port
#define_eval INDIRECT_SHIFT 	(16 - TQLEN_ENTRY_SIZE_SHIFT)	; we need tq_offset_for_port to be in bytes
	alu[tql_offset_for_port, mask_tqlofs, AND, *l$index0[6], >>INDIRECT_SHIFT]	; TQlen offset in bytes
#undef INDIRECT_SHIFT
	; calculate offset from the beginning of TQ length table for that port
	alu[tql_base_for_port, in_tqlen_sram_base, +, tql_offset_for_port]
	; calculating offset from first TQlen element for that port
	alu[tql_offset, --, B, in_tqnum, <<TQL_SHIFT]

	; increment TQlen
	sram[test_and_incr, $tq_len, tql_base_for_port, tql_offset], sig_done[in_rd_sig]

	alu[io_sig_mask, io_sig_mask, OR, 1, <<&in_rd_sig]
	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, io_sig_mask]
	alu[io_sig_mask, --, b, 1, <<&next_thread_sig]	; clear mask

	ctx_arb[--]

	.io_completed sram_read_dn_sched in_rd_sig \
	 sram_read_dn_newtq0 sram_read_dn_newtq1 sram_read_dn_newtq2 cap_read_head_dn cap_read_tail_dn

	; Calculate offset from the beginning of RTQ table for given port using TQ_offset

#define_eval INDIRECT_SHIFT 	(16 - TQSIZE_SHIFT)	; we need tq_offset_for_port to be in bytes

	alu[tq_offset_for_port, mask_tqofs, AND, *l$index0[6], >>INDIRECT_SHIFT] ; to get TQ offset in bytes

#undef INDIRECT_SHIFT

	alu[tq_base_for_port, tq_offset_for_port, +, in_tq_sram_base]

prepare#:
	alu[--, $tq_len, -, TQ_MAX_LEN]	; check if TQ is full
	bge[find_another#]		; If this TQ is full let's find another

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	; Preparing pointers for this TQ write-out
	alu[qlen, --, B, $tq_len]	; $tqlen should be <0,TQ_MAX_LEN)

	; Calculate TQ address from TQ number
	alu[tq_offset, --, B, tqnum, <<TQSIZE_SHIFT] ; to get TQ addres in bytes

	alu[tq_base, tq_offset, +, tq_base_for_port]

write_vcq#:
	; prepare address for writing VCQ
	alu[tq_offset, --, B , qlen, <<2]	; calc. offset in bytes

	sram[write, in_vcq, tq_base, tq_offset, 1], sig_done[in_wr_sig]
	alu[io_sig_mask, io_sig_mask, OR, 1, <<&in_wr_sig]
	rtn[in_rtn_req]

find_another#:
	; Current TQ is FULL so we need to try another one (recursively)
	local_csr_rd[TIMESTAMP_LOW]
	immed[tmp, 0]
	alu[tmp, 0x3, AND, tmp]	; Randomizing the choice of TQ
	alu[tmp, tmp, +, 1]
	alu[tqnum, tqnum, +, tmp]

	; get MaxTQmask to mask incremented value
	alu[tqnum, tqnum, AND, *l$index0[4], >>12]

	alu[tql_offset, --, b, tqnum, <<TQL_SHIFT]

	; increment TQlen
	sram[test_and_incr, $tqlen, tql_base_for_port, tql_offset], ctx_swap[in_rd_sig]
	br[prepare#]
.end	
#endm

//------------------------------------------------------------------
// _init_16_port_shaping_entries()
//
//    Description: 	Load 16 entries of PortShaping from SRAM to LM
//					starting from entry no. in_start_entry
//					Assuming that size of PortShaping Size is a multiply of 16 entries
//					Checks if there is a need to sleep for time gain
//
//    Parameters:
//		  Inputs: in_start_entry
// 		 Outputs:
//------------------------------------------------------------------
#macro _init_16_port_shaping_entries()
.begin
.sig init_sram_read_dn          //signal for reading portshaping @ init
.reg lm_address sram_address_offset
.reg portshaping_base portshaping_offset tmp

	immed32[portshaping_base, PORTSHAPING_SRAM_BASE]
	immed32[portshaping_offset,0]

	; Preparing LM base address
	immed32[lm_address, PORTSHAPING_LM_BASE]
	local_csr_wr[active_lm_addr_1, lm_address]

	xbuf_alloc($r_portshaping, 8,  read)
	sram[read, $r_portshaping[0], portshaping_base, portshaping_offset, 8], sig_done[init_sram_read_dn]
	ctx_arb[init_sram_read_dn]
; Copy PortShaping from SRAM to LM.
	nop
	alu[*l$index1[0], --,b, $r_portshaping[0]]
	alu[*l$index1[1], --,b, $r_portshaping[1]]
	alu[*l$index1[2], --,b, $r_portshaping[2]]
	alu[*l$index1[3], --,b, $r_portshaping[3]]
	alu[*l$index1[4], --,b, $r_portshaping[4]]
	alu[*l$index1[5], --,b, $r_portshaping[5]]
	alu[*l$index1[6], --,b, $r_portshaping[6]]
	alu[*l$index1[7], --,b, $r_portshaping[7]]

	alu[portshaping_offset, 0x20, +, portshaping_offset]
	sram[read, $r_portshaping[0], portshaping_base, portshaping_offset, 8], sig_done[init_sram_read_dn]
	ctx_arb[init_sram_read_dn]

	alu[*l$index1[8], --,b, $r_portshaping[0]]
	alu[*l$index1[9], --,b, $r_portshaping[1]]
	alu[*l$index1[10], --,b, $r_portshaping[2]]
	alu[*l$index1[11], --,b, $r_portshaping[3]]
	alu[*l$index1[12], --,b, $r_portshaping[4]]
	alu[*l$index1[13], --,b, $r_portshaping[5]]
	alu[*l$index1[14], --,b, $r_portshaping[6]]
	alu[*l$index1[15], --,b, $r_portshaping[7]]
	xbuf_free($r_portshaping)
.end;
#endm


//------------------------------------------------------------------
// _init_8_port_shaping_entries()
//
//    Description: 	Load 8 entries of PortShaping from SRAM to LM
//					starting from entry no. 0
//					Assuming that size of PortShaping Size is a multiply of 8 entries
//
//    Parameters: 
//		  Inputs:
// 		 Outputs: Sets LM index1 base
//------------------------------------------------------------------
#macro _init_8_port_shaping_entries()
.begin
.sig init_sram_read_dn          //signal for reading portshaping @ init
.reg lm_address sram_address_offset
.reg portshaping_base portshaping_offset tmp 


	immed32[portshaping_base, PORTSHAPING_SRAM_BASE]
	immed32[portshaping_offset,0]

	; Preparing LM base address
	immed32[lm_address, PORTSHAPING_LM_BASE]
	local_csr_wr[active_lm_addr_1, lm_address]

	xbuf_alloc($r_portshaping, 8,  read)
	sram[read, $r_portshaping[0], portshaping_base, portshaping_offset, 8], sig_done[init_sram_read_dn]
	ctx_arb[init_sram_read_dn]
; Copy PortShaping from SRAM to LM.
	nop
	alu[*l$index1[0], --,b, $r_portshaping[0]]
	alu[*l$index1[1], --,b, $r_portshaping[1]]
	alu[*l$index1[2], --,b, $r_portshaping[2]]
	alu[*l$index1[3], --,b, $r_portshaping[3]]
	alu[*l$index1[4], --,b, $r_portshaping[4]]
	alu[*l$index1[5], --,b, $r_portshaping[5]]
	alu[*l$index1[6], --,b, $r_portshaping[6]]
	alu[*l$index1[7], --,b, $r_portshaping[7]]
	xbuf_free($r_portshaping)
.end;
#endm


//------------------------------------------------------------------
// _get_16_port_shaping_entries(in_start_entry, out_lm_offset)
//
//    Description: 	Load 16 entries of PortShaping from SRAM to LM 
//					starting from entry no. in_start_entry
//					Assuming that size of PortShaping Size is a multiply of 16 entries
//
//    Parameters: 
//		  Inputs: in_start_entry  
// 		 Outputs: Sets LM index1 base, out_lm_offset
//------------------------------------------------------------------
#macro _get_16_port_shaping_entries(in_start_entry, out_lm_offset)
.begin
.reg lm_address sram_address_offset
.reg portshaping_base portshaping_offset tmp 
	
	immed32[portshaping_base, PORTSHAPING_SRAM_BASE]
	alu[tmp, --, B, in_start_entry]
	alu[portshaping_offset, --, B, tmp, <<2]

	; Preparing LM base address
	immed32[lm_address, PORTSHAPING_LM_BASE]
	local_csr_wr[active_lm_addr_1, lm_address]
	alu[out_lm_offset, --, B, lm_address]

	xbuf_alloc($r_portshaping, 8,  read)
	sram[read, $r_portshaping[0], portshaping_base, portshaping_offset, 8], sig_done[get_sram_read_dn]
	ctx_arb[get_sram_read_dn]
; Copy PortShaping from SRAM to LM.
	alu[*l$index1[0], --,b, $r_portshaping[0]]
	alu[*l$index1[1], --,b, $r_portshaping[1]]
	alu[*l$index1[2], --,b, $r_portshaping[2]]
	alu[*l$index1[3], --,b, $r_portshaping[3]]
	alu[*l$index1[4], --,b, $r_portshaping[4]]
	alu[*l$index1[5], --,b, $r_portshaping[5]]
	alu[*l$index1[6], --,b, $r_portshaping[6]]
	alu[*l$index1[7], --,b, $r_portshaping[7]]

	alu[portshaping_offset, 0x20, +, portshaping_offset]
	sram[read, $r_portshaping[0], portshaping_base, portshaping_offset, 8], sig_done[get_sram_read_dn]
	ctx_arb[get_sram_read_dn]

	alu[*l$index1[8], --,b, $r_portshaping[0]]
	alu[*l$index1[9], --,b, $r_portshaping[1]]
	alu[*l$index1[10], --,b, $r_portshaping[2]]
	alu[*l$index1[11], --,b, $r_portshaping[3]]
	alu[*l$index1[12], --,b, $r_portshaping[4]]
	alu[*l$index1[13], --,b, $r_portshaping[5]]
	alu[*l$index1[14], --,b, $r_portshaping[6]]
	alu[*l$index1[15], --,b, $r_portshaping[7]]
	xbuf_free($r_portshaping)

.end;
#endm

//------------------------------------------------------------------
// _ubr_pri_deq(out_vcq)
//
//    Description: 	Strict priority dequeue algorithm
//
//    Parameters: 
//		  Inputs: 
//				UBRwPRI cache byte in the local memory *l$index0[7]
//				UBR w/priority table in the local memory *l$index0[8-15]
// 		 Outputs: out_vcq - VCQ number to dequeue from
//				  			31st bit set when all priority queue are empty
//------------------------------------------------------------------
#macro  _ubr_pri_deq(out_vcq)
.begin
.reg _entry _pos

	alu[out_vcq, --, b, 1, <<31]	;all priority queues are empty
	ld_field_w_clr[_entry, 0001, *l$index0[7]]
	ffs[_pos, _entry]				;find first priority with cells
	beq[_end#]


	jump[_pos, prio_jump_table#],	targets[_pri_0#, _pri_1#, _pri_2#, _pri_3#, \
											_pri_4#, _pri_5#, _pri_6#, _pri_7#]
prio_jump_table#:
_pri_0#: br[_ubr_pri_0#]
_pri_1#: br[_ubr_pri_1#]
_pri_2#: br[_ubr_pri_2#]
_pri_3#: br[_ubr_pri_3#]
_pri_4#: br[_ubr_pri_4#]
_pri_5#: br[_ubr_pri_5#]
_pri_6#: br[_ubr_pri_6#]
_pri_7#: br[_ubr_pri_7#]


_ubr_pri_0#:
	alu[*l$index0[8], *l$index0[8], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[8]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x1]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_1#:
	alu[*l$index0[9], *l$index0[9], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[9]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x2]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_2#:
	alu[*l$index0[10], *l$index0[10], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[10]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x4]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_3#:
	alu[*l$index0[11], *l$index0[11], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[11]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x8]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_4#:
	alu[*l$index0[12], *l$index0[12], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[12]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x10]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_5#:
	alu[*l$index0[13], *l$index0[13], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[13]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x20]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_6#:
	alu[*l$index0[14], *l$index0[14], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[14]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x40]		;clear bit in UBRwPRI cache
	br[_ext_vc#]
_ubr_pri_7#:
	alu[*l$index0[15], *l$index0[15], -, 1]			;decrement cell counter
	alu[_entry, --, b, *l$index0[15]]
	alu[--, --, b, _entry, <<VCQ_NUMBER_OF_BITS]
	bne[_ext_vc#]
	alu[*l$index0[7], *l$index0[7], and~, 0x80]		;clear bit in UBRwPRI cache
_ext_vc#:
	alu[out_vcq, --, b, _entry, >>CELL_COUNT_NUMBER_OF_BITS]	;vcq# 
_end#:
.end
#endm

#endif 		//_ATM_TM_SCHEDULER_UTIL_UC_
