#ifndef _ATM_TM_SHAPER_UTIL_UC_
#define _ATM_TM_SHAPER_UTIL_UC_

/*******************************************************************************
                             Intel Proprietary

 Copyright (c) 1998-2002 By Intel Corporation.  All rights reserved.
 No part of this program or publication may be reproduced, transmited,
 transcribed, stored in a retrieval system, or translated into any language
 or computer language in any form or by any means, electronic, mechanical,
 magnetic, optical, chemical, manual, or otherwise, without the prior
 written permission of:
                         Intel Corporation
                         2200 Mission College Blvd.
                         Santa Clara, CA  95052-8119
*******************************************************************************/

/*
 *      File Name: atm_tm_shaper_util.uc                                         
 *                                                                   
 *      Description: This file contains the actual shaper
 *					 microcode macros. 
 *                                                                   
 *      History: ver 1.0                                             
 *
 */                            

#include <atm_tm.h>
#include <atm_tm_shaper_gcra.uc>
#include <qm_atm_defs.h>


//////////////////////////////////////////////////////////
// 	atm_tm_update_sched_time()
//
//	description:
//		Synchronize Sceduler and Shaper in order to ensure
//		correctly time-queue calculation by shaper
//
//	input:		all directly from sheduler ME
//		$tq_shap -		time-queue of sheduler
//		$tslo_shap -	time-slot of sheduler
//		$sync_shap -	equal to $tq_shap and used for 
//						synchronization
//
//	output:
//		mul_result_lo - store least significant qword of 
//						sheduler (time-queue * 2^15) operation
//		mul_result_hi - store most significant qword of 
//						sheduler (time-queue * 2^15) operation
//		
//
//////////////////////////////////////////////////////////
#macro atm_tm_update_sched_time(in_mul_result_lo, in_mul_result_hi)
.begin
.reg sched_tslot_ctx tmp_mul_result_hi tmp_mul_result_lo

	br!=ctx[0, skip_time#]

get_sched_time#:
	alu[@sched_tslot, --, b, $tq_shap]
	alu[@sched_tstamp, --, b, $tslo_shap] ; these registers are loaded by scheduler
	alu[--, @sched_tstamp, -, $sync_shap] ; check if read values are consistent
	bne[skip_time#]   ; no - try again later

	alu[sched_tslot_ctx, --, b, @sched_tslot]

	alu[tmp_mul_result_lo, --, B, sched_tslot_ctx, <<POW_OF_PRECISION]		; least significant byte
	alu[in_mul_result_lo, --, B, tmp_mul_result_lo]	

#ifdef HIGH_PRECISION_TQ_CALCULATION
	alu[tmp_mul_result_hi, mask_for_mul_result, AND, sched_tslot_ctx, >>POW_OF_PRECISION]	; most significant byte
    alu[in_mul_result_hi, --, B, tmp_mul_result_hi]
#endif

skip_time#:
.end
#endm


//////////////////////////////////////////////////////////
//    prepare_and_send_message_to_scheduler()
//
//  Sends message to scheduler block
//
// ****************************************************************************
// * Communicate the following state to the writeout block through NN.
// ****************************************************************************
// Message format for LBR VCs
//
//    3 3 3 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
//    1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
//   +---------------+---------------+---------------+---------------+
//   | R |       Port(11)  	   |R|R|        VCQ#(17)      	         |
//   +---------------+---------------+---------------+---------------+
//   |S|CODE | Resv(9)         |	          ti(19)                 |
//   +---------------+---------------+---------------+---------------+
//  VCQ#: the VCQ number. 
//  ti - time queue number.
//  S: Speed, set to 0, indicating that the VC is LBR
//  CODE: 0 for UBR, 1 for CBR, 2 for rt-VBR and 3 for nrt-VBR
//          4 for UBR w/PCR, 5 for UBR w/MDCR, 6 for GFR
// ****************************************************************************
// Message format for UBR w PRI VCs
//
//    3 3 3 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
//    1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
//   +---------------+---------------+---------------+---------------+
//   | R |       PORT(11)  	   |R|R|        VCQ#(17)      	         |
//   +---------------+---------------+---------------+---------------+
//   |S|CODE |             Resv(14)      | PRI |      #cells(11)     |
//   +---------------+---------------+---------------+---------------+
//
//  VCQ#: the VCQ number. 
//	R: Reserved
//  PORT: Port number
//  ti - time queue number.
//  #cells: the number of cells that are being enqueued for this VC
//  CODE: 0 for UBR, 1 for CBR, 2 for rt-VBR and 3 for nrt-VBR
//          4 for UBR w/PCR, 5 for UBR w/MDCR, 6 for GFR
// ****************************************************************************
// Message format for HBR VCs
//
//    3 3 3 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
//    1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
//   +---------------+---------------+---------------+---------------+
//   | R |       Port(11)  	   |R|R|        VCQ#(17)      	         |
//   +---------------+---------------+---------------+---------------+
//   |S|            Resv(20)	               |     #cells(11)      |
//   +---------------+---------------+---------------+---------------+
//  VCQ#: the VCQ number. Composed of Port number(11bits) and Queue number(7bits)
//  #cells: the number of cells that are being enqueued for this HBR VC
//  S: Speed, set to 1, indicating that the VC is HBR
//
// ****************************************************************************
#macro prepare_and_send_message_to_scheduler(in_mult_fac, in_code, in_half_of_ts, in_thmask, io_ti)
.begin
.reg band_div   // constant used to calculate ti from slots to TQ
.reg tslot      // time slot/time queue
.reg res_lo res_hi
.reg diff

// express ti in cell slots. ti(slots) = ti(timestamp)*16/106 = ti(timestamp)*8/53 
//									  = {ti(timestamp)*4946/2^18}*8
//									  = ti(timestamp)*4946/2^15

	alu[band_div, 0xf, AND, *l$index0[7], >>24]	;extract Line speed
	alu[band_div, (POW_OF_PRECISION + AGGREGATION_LOG2), +, band_div]

#ifdef DEBUG
	alu[debug_temp, --, b, io_ti]
#endif

	alu[diff, io_ti, -, @sched_tstamp]   ; calculate delta

// Difference multiplication by MULT_FAC_FOR_DIVIDE
	mul_step[in_mult_fac, diff], 32x32_start
	mul_step[in_mult_fac, diff], 32x32_step1
	mul_step[in_mult_fac, diff], 32x32_step2
	mul_step[in_mult_fac, diff], 32x32_step3
	mul_step[in_mult_fac, diff], 32x32_step4
	mul_step[tslot, --], 32x32_last

    alu[tslot, tslot, +, in_half_of_ts]

#ifdef HIGH_PRECISION_TQ_CALCULATION

	alu[res_lo, @mul_result_lo, +, tslot]
	alu[res_hi, @mul_result_hi, +carry, 0]

    // Dividing by 2^(15 + LR + LOG_AGGREGATION)
	alu[--, band_div, OR, 0]
	dbl_shf[io_ti, res_hi, res_lo, >>indirect]	
	alu_shf[io_ti, in_thmask, AND, io_ti]

#else /* HIGH_PRECISION_TQ_CALCULATION */
	alu[res_lo, @mul_result_lo, +, tslot]

    // Dividing by 2^(15 + LR + LOG_AGGREGATION)
	alu[--, band_div, OR, 0]
    alu[io_ti, in_thmask, AND, res_lo, >>indirect]

#endif /* HIGH_PRECISION_TQ_CALCULATION */

	alu_shf[io_ti, io_ti, OR, in_code, <<28]

#ifdef DEBUG
	alu[tmp_d, --, B, @sched_tstamp]
	ld_field[tmp_d, 1000, 0xe0, <<24]
	alu[$debug0, --, B, tmp_d]
	alu[$debug1, --, B, @sched_tslot]
	alu[$debug2, --, B, debug_temp]
	alu[$debug3, --, B, tlo]

	alu[debug_temp, --, B, @debug_base]
	sram[write, $debug0, debug_temp, 0, 4], sig_done[debug_done]
//	sram[write, $debug0, debug_temp, 0, 1], sig_done[debug_done]
	.io_completed debug_done
	alu[@debug_base, @debug_base, +, 16]
//	alu[@debug_base, @debug_base, +, 4]
#endif


.end
#endm


//////////////////////////////////////////////////////////
// _get_gcra_table()
//
//	description:
//		Prepares GCRA table cache in LM for given vcq#
//		Can be used with or without CAM
//
//	input:	
//		in_vcq - number of VC we need to get GCRA for.
//		in_gcra_param_sram_base - BASE address for GCRA table
//		in_gcra_lm_offs - start LM address for GCRA table
//	output:
//
//////////////////////////////////////////////////////////
#macro _get_gcra_table(in_vcq, in_gcra_param_sram_base, in_gcra_lm_offs)
.begin 
.reg vcq_ofs	; offset from the beginning of GCRA table
.reg cam_result	; result of CAM operation
.reg cam_entry	; CAM element we need to use
.reg lm_addr	; LM pointer
.reg lru_ofs	; LRU offset in LM
.reg old_vcq_ofs	; Previous VCQ offset

.sig sram_read_dn		//sram read done signal
.sig sram_write_dn		//sram write done signal

; Xfer registers for GCRA table reloading
    xbuf_alloc($gcra, 9, read_write)

#ifdef SHAPER_WITH_CAM

   	alu_shf[vcq_ofs, --, b, in_vcq, <<GCRA_ENTRY_SIZE_LOG]

    #ifdef QM_WITH_SHAPER
    	cam_lookup[cam_result, in_vcq]
    	alu_shf[cam_entry, 0xF, AND, cam_result, >>3]
        alu[lm_addr, --, B, cam_entry, <<GCRA_ENTRY_SIZE_LOG]
        alu[lm_addr, lm_addr, +, @_qm_shaper_lm_base]
        local_csr_wr[active_lm_addr_0, lm_addr]
    #else /* QM_WITH_SHAPER */
	    cam_lookup[cam_result, vcq_ofs], lm_addr0[0]
    	alu_shf[cam_entry, 0xF, AND, cam_result, >>3]
	    nop
    	br_bset[cam_result, 7, hit#]

	miss#:

	    alu[--, --, b, @_indirect_read9]
		sram[read, $gcra[0], in_gcra_param_sram_base, vcq_ofs, max_9], indirect_ref, sig_done[sram_read_dn]

	    cam_read_tag[lru_ofs, cam_entry]
	   	cam_write[cam_entry, vcq_ofs, 0]

		//writeback LRU value to SRAM
		//Need to write back only the TAT values
		alu[$gcra[0], --, b, *l$index0[0]]
		alu[$gcra[1], --, b, *l$index0[1]]
		alu[$gcra[2], --, b, *l$index0[2]]
		alu[$gcra[3], --, b, *l$index0[3]]
		alu[$gcra[4], --, b, *l$index0[4]]	;X (32bits) to restore GFR credit info

		sram[write, $gcra0, in_gcra_param_sram_base, lru_ofs, 5], sig_done[sram_write_dn]

		ctx_arb[sram_read_dn, sram_write_dn, next_thread_sig]

		//Move ALL GCRA parameters read from SRAM, from Xfer to LM
		alu[*l$index0[0], --, b, $gcra[0]]
		alu[*l$index0[1], --, b, $gcra[1]]
		alu[*l$index0[2], --, b, $gcra[2]]
		alu[*l$index0[3], --, b, $gcra[3]]
		alu[*l$index0[4], --, B, $gcra[4]]
		alu[*l$index0[5], --, b, $gcra[5]]
		alu[*l$index0[6], --, b, $gcra[6]]
		alu[*l$index0[7], --, b, $gcra[7]]
	   	alu[*l$index0[8], --, b, $gcra[8]]

	    xbuf_free($gcra)

		br[end#]

	hit#:
    #endif /* QM_WITH_SHAPER */

	ctx_arb[next_thread_sig],	br[end#]

#else // SHAPER_WITH_CAM

	// First we have to set LM address
	local_csr_wr[active_lm_addr_0, in_gcra_lm_offs] ; setup local memory for the Shaper

	// Wait until it is constituated (we need 3 instructions - don't remove following lines)
	alu[vcq_ofs, --, B, in_vcq, <<GCRA_ENTRY_SIZE_LOG]
    
	// Get VC parameters from the SRAM memory
    alu[--, --, b, @_indirect_read9]
	sram[read, $gcra[0], in_gcra_param_sram_base, vcq_ofs, max_9], indirect_ref, sig_done[sram_read_dn]
    
	// Get old VCQ offset to calculate SRAM address for write back the GCRA params
	alu[old_vcq_ofs, --, B, *l$index0[9]]

	//Need to write back only the TAT values - these were changed, the rest is constant
	//Issue read for new data
	alu[$gcra[0], --, b, *l$index0[0]]
	alu[$gcra[1], --, b, *l$index0[1]]
	alu[$gcra[2], --, b, *l$index0[2]]
	alu[$gcra[3], --, b, *l$index0[3]]
	alu[$gcra[4], --, b, *l$index0[4]]

	// Write all GCRA parameters to SRAM
	sram[write, $gcra[0], _gcra_param_sram_base, old_vcq_ofs, 5], sig_done[sram_write_dn]

	ctx_arb[sram_write_dn, sram_read_dn, next_thread_sig]

	//Move ALL GCRA parameters read from SRAM, from Xfer to LM
	alu[*l$index0[0], --, b, $gcra[0]]
	alu[*l$index0[1], --, b, $gcra[1]]
	alu[*l$index0[2], --, b, $gcra[2]]
	alu[*l$index0[3], --, b, $gcra[3]]
	alu[*l$index0[4], --, B, $gcra[4]]
	alu[*l$index0[5], --, b, $gcra[5]]
	alu[*l$index0[6], --, b, $gcra[6]]
	alu[*l$index0[7], --, b, $gcra[7]]
   	alu[*l$index0[8], --, b, $gcra[8]]

	// Store also VCQ offset in LM - it will be used to write it back to SRAM
	alu[*l$index0[9], --,  B, vcq_ofs]

#endif

end#:
.end
#endm
 

//////////////////////////////////////////////////////////
// _flush_gcra_table()
//
//	description:
//		Flushes GCRA table cache in LM to SRAM - called if there is nothing to enq/deq
//		Can be used with or without CAM
//
//	input:	
//		in_gcra_param_sram_base - BASE address for GCRA table
//		in_gcra_lm_offs - start LM address for GCRA table
//	output:
//
//////////////////////////////////////////////////////////
#macro _flush_gcra_table(in_gcra_param_sram_base, in_gcra_lm_offs)
.begin 
.reg vcq_ofs	; offset from the beginning of GCRA table
.reg cam_result	; result of CAM operation
.reg cam_entry	; CAM element we need to use
.reg lm_addr	; LM pointer
.reg lru_ofs	; LRU offset in LM
.reg old_vcq_ofs	; Previous VCQ offset

.sig sram_write_dn		//sram write done signal

; Xfer registers for GCRA table reloading
    xbuf_alloc($gcra, 5, read_write)


#ifdef SHAPER_WITH_CAM

	ctx_arb[next_thread_sig],	br[end#]

#else // SHAPER_WITH_CAM

	// First we have to set LM address
	local_csr_wr[active_lm_addr_0, in_gcra_lm_offs] ; setup local memory for the Shaper
	nop
	nop
	nop
	// Get old VCQ offset to calculate SRAM address for write back the GCRA params
	alu[old_vcq_ofs, --, B, *l$index0[9]]

	//Need to write back only the TAT values - these were changed, the rest is constant
	//Issue read for new data
	alu[$gcra[0], --, b, *l$index0[0]]
	alu[$gcra[1], --, b, *l$index0[1]]
	alu[$gcra[2], --, b, *l$index0[2]]
	alu[$gcra[3], --, b, *l$index0[3]]
	alu[$gcra[4], --, b, *l$index0[4]]

	// Write all GCRA parameters to SRAM
	sram[write, $gcra[0], _gcra_param_sram_base, old_vcq_ofs, 5], sig_done[sram_write_dn]
	// Store also VCQ offset in LM - it will be used to write it back to SRAM
	alu[*l$index0[9], --,  B, 0]
	ctx_arb[sram_write_dn, next_thread_sig]


#endif

end#:
.end
#endm


//------------------------------------------------------------------
// atm_tm_send_message()
//
//    Description: Writes Shaper's message to Scheduler
//				   Uses NNring or scratchring
//
//    Parameters: None.
//		Inputs: 
//			in_msg0:	First LW of message
//			in_msg1:	Second LW of message
//		Outputs:
//			None.
//		Latency:
//			2 instr. for NN_INTERFACE
//------------------------------------------------------------------
#macro atm_tm_send_message(in_msg0, in_msg1)
.begin
.reg zero
#ifdef SCRATCH_INTERFACE

.sig scratch_write_dn	//scratch write done signal
	
	xbuf_alloc($msg, 2, write)

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]

	send_out#:

	move($msg[0], in_msg0)
	move($msg[1], in_msg1)
	move(zero, 0)
	br_bset[in_msg0, 19, odd_port#]

	even_port#:
    br_inp_state[SCHEDULER_FULL_EVEN, scr_full#]
	scratch[put, $msg[0], RING_TO_SCHEDULER_EVEN, zero, 2], sig_done[scratch_write_dn]
	br[swap_ctx#]

	odd_port#:
    br_inp_state[SCHEDULER_FULL_ODD, scr_full#]
	scratch[put, $msg[0] ,RING_TO_SCHEDULER_ODD, zero, 2], sig_done[scratch_write_dn]
	br[swap_ctx#]

	scr_full#:
	br[send_out#]
	
	swap_ctx#:
	ctx_arb[next_thread_sig, scratch_write_dn]
	xbuf_free($msg)

#else

	#ifdef NN_INTERFACE

		// Check if we are able to write data into a ring
		nn_ring_full#:
		br_inp_state[NN_FULL, nn_ring_full#]

		alu[*n$index++, --, B, in_msg0]
		alu[*n$index++, --, B, in_msg1]

	#else
		#error "Undefined Shaper to Scheduler interface type !"
	#endif
#endif	
.end
#endm






//------------------------------------------------------------------
// atm_tm_shaper_util()
//
//    Description: Reads the messages from queue manager, shapes the
//					VC if needed and writes the output to Writeout block.
//
//    Parameters: None.
//		Inputs: 
//			in_mask:
//				mask value for masking upper 15 bits of a LW
//			in_mult_fac:
//				multiply factor for conversion from timestamps
//				to cell tx slots
//          in_thmask_extr_mask:
//              mask used to extract thmask
//		Outputs:
//			None.
//------------------------------------------------------------------


#macro atm_tm_shaper_util(in_mask, in_mult_fac, in_gcra_lm_offset, in_ring_delay, in_thmask_extr_mask)
.begin
.reg enq_mesg deq_mesg vcq vcq_lm num_cells  
.reg code tslot port PRI 
.reg @mul_result_lo, @mul_result_hi 
.reg quantization_error thmask

	// Load scheduler's time
	// For thread 0 synchronize with schedulers realtime data
	br=ctx[0, get_sched_time#]

rtdata_sync_continue#:

    #ifndef QM_WITH_SHAPER

    	//If the NN ring is empty, do the NULL signalling
	    local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]

    #endif /* QM_WITH_SHAPER */
 
//Otherwise, Read the ENQ and DEQ messages from the QM
//The messages are ALWAYS read in pairs.
//Understanding is that the first message is ALWAYS the ENQ and the second is 
//DEQ
//The messages are of the following format:
//
//    3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 
//    1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 <- bits
//   +---------------+---------------+---------------+---------------+
//   |V|T|R|S|    #cells (11bits)  |       VCQ# (17 bits)            |
//   +---------------+---------------+---------------+---------------+
//  V: Valid Bit -- indicates if the DEQ is valid or not. NOT USED
//  T: Transition Bit -- Indicates if the ENQ/DEQ is accompanied by transition
//  S:SOP bit, needed to GFR algorithm
//  #cells: number of cells enqueued
//  VCQ#: The VCQ# into which the cells were enqueued.Composed of 
//	Port number(11bits) and Queue-number(6bits)

#ifndef QM_USE_REG_INTF_TO_SHAPER
	br_inp_state[nn_empty,null_loop#]

	alu[enq_mesg,  --, b, *n$index++]
	alu[deq_mesg,  --, b, *n$index++]
#else	//QM_USE_REG_INTF_TO_SHAPER

	#define enq_mesg sched_message_enq
	#define deq_mesg sched_message_deq

#endif


//-----------------------------------------------------------------------------
//Processing of the ENQ message
//-----------------------------------------------------------------------------
//extract the VCQ#. 
//If the VCQ# is NULL, jump to DEQ processing
enq_process#:

	alu[vcq, in_mask, and, enq_mesg]		;extract VCQ
	beq[no_enq_process#]

;-------------------------------------------------------------
// Reloading GCRA table cache in LM
   	_get_gcra_table(vcq, _gcra_param_sram_base, in_gcra_lm_offset)

;------------------------------------------------------------

//Process Enqueue Message for Low Bit Rate VC
//Shape only if there is a vcq enqueue transition
lbr_enq#:

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]

	alu_shf[port, mask_for_port_extraction, AND, *l$index0[4], >>16]	;extract port nr

/////////////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED
	alu[--, HBR_THRESHOLD, -, vcq]
	bge[hbr_enq#]
#endif
/////////////////////////////////////////////////////////////////////////////

    // Check if ti's UBRwPRI class
	alu[code, *l$index0[6], and, 0xff]		;extract CODE
	br=byte[*l$index0[6], 0, 0, ubr_pri#]

//check transition bit
	br_bclr[enq_mesg, 30, no_nnwrite#]

    alu[quantization_error, in_thmask_extr_mask, ~AND, *l$index0[8]]
    alu[thmask, in_thmask_extr_mask, AND, *l$index0[8]]


	_gcra_shaping_enqueue(vcq, enq_mesg, in_mult_fac, code, port, in_ring_delay, quantization_error, thmask)
	br[deq_process#]	

//Process Enqueue Message for High Bit Rate VC
//No GCRA shaping required.
ubr_pri#:
// Add PRI(3)
	alu_shf[num_cells, mask_for_port_extraction, AND, enq_mesg, >>18] 	;extract number of cells
	alu_shf[enq_mesg, 0x7, AND, *l$index0[4], >>27]	;extract PRI as enq_mesg
													;in order to save GPR
	alu_shf[num_cells, num_cells, OR, enq_mesg, <<11]
// code(3) is qual 0, so we do not need chande it
	br[ubr_pri2#]

/////////////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED
hbr_enq#:
	alu_shf[num_cells, mask_for_port_extraction, AND, enq_mesg, >>18] 	;extract number of cells
	alu[enq_mesg, --, b, 1]			;set speed bit as enq_mesg
									;in order to save GPR
	alu_shf[num_cells, num_cells, or, enq_mesg, <<31]
#endif
/////////////////////////////////////////////////////////////////////////////
ubr_pri2#:
	alu[vcq, vcq, OR, port, <<19]

	atm_tm_send_message(vcq, num_cells)



//-----------------------------------------------------------------------------
//Processing of the DEQ message
//-----------------------------------------------------------------------------
//extract the VCQ#. If the VCQ# is NULL, jump to the end
//check if the VCQ# is high bit rate or low bit rate and
//branch accordingly.
deq_process#:

#ifdef SCRATCH_INTERFACE
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]	
#endif

	alu[vcq, in_mask, and, deq_mesg]
	beq[no_deq_process#]

//If there is transition, no need to write the timestamp to the 
//writeout.
	br_bset[deq_mesg, 30, deq_process_end#]

///////////////////////////////////////////////////////////////////////////////
#ifndef HBR_EXCLUDED
	alu[--, HBR_THRESHOLD, -, vcq]
//	beq[no_deq_process#]
	bne[deq_process_continue2#]
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig], br[end_shaper#]
#else 	// NN_INTERFACE
	ctx_arb[next_thread_sig], br[end_shaper#]
#endif	// SCRATCH_INTERFACE
deq_process_continue2#:
#endif
///////////////////////////////////////////////////////////////////////////////
;-------------------------------------------------------------

// Reloading GCRA table cache in LM

   	_get_gcra_table(vcq, _gcra_param_sram_base, in_gcra_lm_offset)

;------------------------------------------------------------

//Process Dequeue Message for Low Bit Rate VC
//Extract the transition bit and invoke shaper.
//If there has been a transition, that means we dont need to 
//shape. In such a case just the GCRA data structures are updated
//based on the departure time of the last cell.
//Else the GCRA data structures are updated based on the departure 
//time of the pervious cell and the next HOL cell in the 
//VCQ is shaped.
lbr_deq#:

#ifdef SCRATCH_INTERFACE
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
#endif

	alu_shf[port, mask_for_port_extraction, AND, *l$index0[4], >>16]		;extract port nr

//be transpalent if it is UBR (plane) class
	alu[code, *l$index0[6], and, 0xff]		;extract CODE
	br=byte[*l$index0[6], 0, 0, ubr_deq_process#]

    alu[quantization_error, in_thmask_extr_mask, ~AND, *l$index0[8]]
    alu[thmask, in_thmask_extr_mask, AND, *l$index0[8]]

	_gcra_shaping_dequeue(vcq, deq_mesg, mult_fac, \
						code, port, in_ring_delay, quantization_error, thmask)
	br[end_shaper#]		

//-----------------------------------------------------------------------------
// Updating real-time data from scheduler
//-----------------------------------------------------------------------------
get_sched_time#:
.begin
.reg sched_tslot_ctx tmp_mul_result_hi tmp_mul_result_lo

	alu[@sched_tslot, --, b, $tq_shap]
	alu[@sched_tstamp, --, b, $tslo_shap] ; these registers are loaded by scheduler
	alu[--, @sched_tstamp, -, $sync_shap] ; check if read values are consistent
	bne[rtdata_sync_continue#]   ; no - try again next time

	alu[sched_tslot_ctx, --, b, @sched_tslot]
//	alu[tmp_mul_result_hi, mask_for_mul_result, AND, sched_tslot_ctx, >>POW_OF_PRECISION]	; most significant byte
  //  alu[@mul_result_hi, --, B, tmp_mul_result_hi]
	alu[tmp_mul_result_lo, --, B, sched_tslot_ctx, <<POW_OF_PRECISION]		; least significant byte
	alu[@mul_result_lo, --, B, tmp_mul_result_lo]	
	br[rtdata_sync_continue#]   ; no - try again later
.end
//-----------------------------------------------------------------------------
//Processing of the null message
//-----------------------------------------------------------------------------
//The thread executes this path, if there are no messages 
//in the NN ring from the QM.
//There are two sleeps involved to sync up with the 2-phased
//nature of the shaper macros.
#ifndef QM_USE_REG_INTF_TO_SHAPER
null_loop#:
	ctx_arb[next_thread_sig]

#ifdef SCRATCH_INTERFACE
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig]

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig]
#endif	// SCRATCH_INTERFACE

	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig], br[end_shaper#]
#endif // QM_USE_REG_INTF_TO_SHAPER

no_enq_process#:
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]

#ifdef SCRATCH_INTERFACE

	ctx_arb[next_thread_sig], br[deq_process#]

#else 	// NN_INTERFACE

    br[deq_process#]

#endif	// SCRATCH_INTERFACE

no_nnwrite#:
#ifndef SHAPER_WITH_CAM
    // invalidate LM contents because there is no need to flush it to SRAM
	alu[*l$index0[9], --,  B, 0]

#endif

#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig], br[deq_process#]
#else 	// NN_INTERFACE
	br[deq_process#]
#endif	// SCRATCH_INTERFACE

no_deq_process#:
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig], br[end_shaper#]
#else 	// NN_INTERFACE
	ctx_arb[next_thread_sig], br[end_shaper#]
#endif	// SCRATCH_INTERFACE

deq_process_end#:
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig]
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
	ctx_arb[next_thread_sig], br[end_shaper#]
#else 	// NN_INTERFACE
	ctx_arb[next_thread_sig], br[end_shaper#]
#endif	// SCRATCH_INTERFACE

ubr_deq_process#:
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig]
#endif	// SCRATCH_INTERFACE

end_shaper#:

.end
#endm	// atm_tm_shaper_util()




//------------------------------------------------------------------
// _gcra_shaping_enqueue()
//
//    Description: Provides the earliest departure time for the 
//					HOL cell in the VCQ based on GCRA when a
//					fresh enqueue happens into the VCQ.
//
//    Parameters:
//		Inputs:
//			in_vcq: 
//				the vcq for which the shaping needs to be done
//			in_mult_fac:
//				multiply factor for conversion from timestamps
//				to cell tx slots
//			in_thmask:
//				mask for the time horizon to detect wraparounds
//          in_half_of_ts - the value that represents mean error value for diff. substracting
//------------------------------------------------------------------

#macro _gcra_shaping_enqueue(in_vcq, in_enq_mesg, in_mult_fac, \
			in_code, in_port, in_ring_delay,in_half_of_ts, in_thmask)
.begin

.reg tlo thi
.reg ti t11 t12 tau X_low X_high X ial1 iah1 T1 T2
.reg code4jump
#ifdef DEBUG
	.reg tmp_d
#endif


gcra#:
//		local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]

	//Check for :
	//UBR code = 	000		don't needed GCRA calculate
	//CBR code = 	001		needed GCRA calculate
	//rtVBR code = 	010		needed GCRA calculate twice time
	//nrtVBR code = 011		needed GCRA calculate twice time
	//UBR w/PCR =	100		needed GCRA calculate
	//UBR w/MDCR =	101		needed GCRA calculate
	//GFR =			110		needed GFR_GCRA calculate

    alu[code4jump, 0xfc, AND, in_code, <<2]


	jump[code4jump, jump_table#], targets[_UBR#, _CBR#, _rtVBR#, _nrtVBR#, \
							    	  _UBRwPCR#, _UBRwMDCR#, _GFR#] 


jump_table#:

_UBR#:		
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
        nop
        nop
        br[UBR#]
_CBR#: 		
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[CBR#]			
_rtVBR#:
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
       	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        ld_field_w_clr[T2, 0111, *l$index0[7]]		;extract T2
     	br[VBR#]
_nrtVBR#: 	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
       	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        ld_field_w_clr[T2, 0111, *l$index0[7]]		;extract T2
        br[VBR#]
_UBRwPCR#: 	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[UBRwPCR#]	
_UBRwMDCR#: 
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[UBRwMDCR#]
_GFR#:	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    immed[ti, 0]		;I am not shure this part of code ...
	    ld_field_w_clr[X, 0011, *l$index0[4]]	;extract X
        br[GFR#]


//The VCQ happens to be VBR. Dual GCRA shaping needed
//GCRA(1/PCR, CDVT) and GCRA(1/SCR, CDVT+BT)
//The earliest departure time is computed as the maximum
//of the departure times from the two GCRAs

;---------------------------------------------------------------
VBR#:
// GCRA starts here.
.reg  pcr_scr_delta debit_limit

    ld_field_w_clr[pcr_scr_delta, 0111, *l$index0[6], >>8]		;extract pcr_scr_delta
    ld_field_w_clr[debit_limit, 1111, *l$index0[3]]			;extract debit_limit

    dual_gcra(t11, *l$index0[0], *l$index0[1], *l$index0[2], *l$index0[4], debit_limit, T1, T2, pcr_scr_delta, in_ring_delay)
	
	alu[ti, --, b, t11]		; shape to PCR

;--------------------------------------------------------------
nnwrite#:

    prepare_and_send_message_to_scheduler(in_mult_fac, in_code, in_half_of_ts, thmask, ti)

	atm_tm_send_message(in_vcq, ti)

	br[end_gcra#]


;--------------------------------------------------------------

//The VCQ happens to be UBR w/PCR. Single GCRA shaping needed
//GCRA(1/PCR, CDVT)
UBRwPCR#:

	gcra_computed_enq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)
	br[nnwrite#]
;--------------------------------------------------------------

//The VCQ happens to be CBR. Single GCRA shaping needed
//GCRA(1/PCR, CDVT)
CBR#:

	gcra_computed_enq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)

	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be GFR. Dual GCRA shaping needed
//GCRA(1/PCR, CDVT) and FGCRA(1/MCR, CDVT+BT)
//The earliest departure time is computed as the maximum
//of the departure times from the two GCRAs
GFR#:
	//Get Time
	local_csr_rd[TIMESTAMP_LOW]
	immed[tlo, 0]
	local_csr_rd[TIMESTAMP_HIGH]
	immed[thi, 0]

	immed[ti, 0]		;I am not shure this part of code ...
	ld_field_w_clr[X, 0011, *l$index0[4]]	;extract X

    br_bclr[in_enq_mesg, 30, gfr13#]

//Start MCR control

	alu[X_low, X, +, *l$index0[2]]	
	alu[X_high, *l$index0[3], +carry, 0]		;add TAT2
	ld_field_w_clr[T1, 0111, *l$index0[7]]		;extract T2 (as T1)
	alu[X_low, X_low, -, T1]	
	alu[X_high, X_high, -carry, 0]				;sub T2

	alu[tau, --, B, *l$index0[5], >>24]		;extract tau1
	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1

// check if SOP is valid
    br_bclr[in_enq_mesg, 17, gfr01#]

//t - X >= 0 ?
	alu[--, tlo, -, X_low]
	alu[--, thi, -carry, X_high]
	alu[t11, --, B, X_low]		;set t11=tif as X_low if t-X>=0
	blt[gfr11#]

gfr01#:
	alu[t11, --, B, 0]			;clear t11=tif if t-X<0
gfr11#:

//Start PCR control
//t - TAT > 0  ?
	alu[ial1, tlo, -, *l$index0[0]]
	alu[iah1, thi, -carry, *l$index0[1]]
	alu[*l$index0[0], *l$index0[0], +, T1]		;increment TAT1  LSB
	alu[*l$index0[1], *l$index0[1], +carry, 0]	;increment TAT1  MSB	
	alu[t12, --, B, tlo]			;t-TAT>0
	bgt[gfr12#]

//t-TAT<0 so lets check: t - TAT +tau > 0  ?
	alu[--, ial1, +, tau]
	alu[--, iah1, +carry, 0]
	bgt[gfr12#],defer[1]
	alu[t12, --, B, tlo]			;t-TAT+tau >0

	alu[t12, *l$index0[0], -, tau]	;t-TAT+tau <=0
//End PCR control

gfr12#:
	alu[tau, --, B, *l$index0[6], >>8]	;extract tau2
//find MAX(tif=t11,tic=t12)
	alu[ti, --, B, t11]				;t12 < t11
	alu[--, t11, -, t12]
	bgt[maxt11#]

	alu[ti, --, B, t12]				;t12 > t11
maxt11#:

	alu[ial1, X, +, *l$index0[2]]
	alu[iah1, *l$index0[3], +carry, 0]	;ia1 = X + TAT2

	alu[*l$index0[2], --, B, ti] 		;upgrade TAT2_low
	alu[*l$index0[3], --, B, 0] 		;upgrade TAT2_high

// find MAX(0,ia1)
	alu[ial1, ial1, -, tau]
	alu[--, iah1, -carry, 0]
	alu[X, ial1, +, tau]	;ia1 > 0
	bgt[gfr13#]

	alu[X, --, B, tau] 		;ia1 < 0
gfr13#:

//End MCR control
	
	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be UBR w/MDCR. Single GCRA shaping needed
//GCRA(1/MDCR, 1/MDCR+1)
UBRwMDCR#:
	gcra_computed_enq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)

	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be UBR (Plain). No shaping needed
UBR#:
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig], br[end_gcra#]
#else	// NN_INTERFACE
	br[end_gcra#]
#endif	// SCRATCH_INTERFACE
//-----------------------------------------------------------

//NN ring for writing to next ME is full. Keep polling until 
//next ME drains an elemnt from the ring.
nnfull#:
	br[nnwrite#]


//-----------------------------------------------------------

end_gcra#:
.end
#endm





//------------------------------------------------------------------
// _gcra_shaping_dequeue()
//
//    Description: Provides the earliest departure time for the 
//					HOL cell in the VCQ based on GCRA when a dequeue
//					happens in a VCQ.
//
//    Parameters:
//		Inputs:
//			in_vcq: 
//				the vcq for which the shaping needs to be done
//			in_mult_fac:
//				multiply factor for conversion from timestamps
//				to cell tx slots
//			in_thmask:
//				mask for the time horizon to detect wraparounds
//          in_half_of_ts - the value that represents mean error value for diff. substracting 
//  NOTE:
//			transition:
//				indicates whether the queue underwent transition upon
//				dequeue. If this is set to 1, then there is no HOL cell
//				in the VCQ and no deaprture time is provided to writeout

//------------------------------------------------------------------

#macro _gcra_shaping_dequeue(in_vcq, in_deq_mesg, in_mult_fac, \
			in_code, in_port, in_ring_delay, in_half_of_ts, in_thmask)
.begin

.reg tlo thi 
.reg ti t11 t12 T1 T2 tau  X_low X_high X ial1 iah1 
.reg code4jump

#ifdef DEBUG
	.reg tmp_d
#endif


//old reg
//.reg tlo thi ial1 iah1 ial2 iah2 ti t11 t12 T1 tau1 T2 tau2

gcra#:

#ifndef QM_WITH_SHAPER
	local_csr_wr[SAME_ME_SIGNAL, next_thread_sig_csr_val]
#endif

	//Check for :
	//UBR code = 	000		don't needed GCRA calculate
	//CBR code = 	001		needed GCRA calculate
	//rtVBR code = 	010		needed GCRA calculate twice time
	//nrtVBR code = 011		needed GCRA calculate twice time
	//UBR w/PCR =	100		needed GCRA calculate
	//UBR w/MDCR =	101		needed GCRA calculate
	//GFR =			110		needed GFR_GCRA calculate

    alu[code4jump, 0xfc, AND, in_code, <<2]


	jump[code4jump, jump_table#], targets[_UBR#, _CBR#, _rtVBR#, _nrtVBR#, \
							    	  _UBRwPCR#, _UBRwMDCR#, _GFR#] 


jump_table#:

_UBR#:		
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
        nop
        nop
        br[UBR#]
_CBR#: 		
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[CBR#]			
_rtVBR#:
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
       	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        ld_field_w_clr[T2, 0111, *l$index0[7]]		;extract T2
     	br[VBR#]
_nrtVBR#: 	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
       	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        ld_field_w_clr[T2, 0111, *l$index0[7]]		;extract T2
        br[VBR#]
_UBRwPCR#: 	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[UBRwPCR#]	
_UBRwMDCR#: 
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    alu_shf[tau, --, b, *l$index0[5], >>24]		;extract tau1
    	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1
        br[UBRwMDCR#]
_GFR#:	
    	alu[in_vcq, in_vcq, OR, in_port, <<19]
	    immed[ti, 0]		;I am not shure this part of code ...
	    ld_field_w_clr[X, 0011, *l$index0[4]]	;extract X
        br[GFR#]


//The VCQ happens to be VBR. Dual GCRA shaping needed
//GCRA(1/PCR, CDVT) and GCRA(1/SCR, CDVT+BT)
//The earliest departure time is computed as the maximum
//of the departure times from the two GCRAs
;---------------------------------------------------------------
VBR#:
.reg debit_limit pcr_scr_delta

    ld_field_w_clr[pcr_scr_delta, 0111, *l$index0[6], >>8]		;extract pcr_scr_delta
    ld_field_w_clr[debit_limit, 1111, *l$index0[3]]			;extract debit_limit

    dual_gcra(t11, *l$index0[0], *l$index0[1], *l$index0[2], *l$index0[4], debit_limit, T1, T2, pcr_scr_delta, in_ring_delay)

//set ti as t11
	alu[ti, --, b, t11]	; shape to PCR

;--------------------------------------------------------------
nnwrite#:
    
    prepare_and_send_message_to_scheduler(in_mult_fac, in_code, in_half_of_ts, thmask, ti)

	atm_tm_send_message(in_vcq, ti)
	br[end_gcra#]

;--------------------------------------------------------------

//The VCQ happens to be UBR w/PCR. Single GCRA shaping needed
//GCRA(1/PCR, CDVT)
UBRwPCR#:
	gcra_computed_deq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)
	br[nnwrite#]
;--------------------------------------------------------------------

//The VCQ happens to be CBR. Single GCRA shaping needed
//GCRA(1/PCR, CDVT)
CBR#:
	gcra_computed_deq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)
	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be GFR. Dual GCRA shaping needed
//GCRA(1/PCR, CDVT) and FGCRA(1/MCR, CDVT+BT)
//The earliest departure time is computed as the maximum
//of the departure times from the two GCRAs
GFR#:
	//Get Time
	local_csr_rd[TIMESTAMP_LOW]
	immed[tlo, 0]
	local_csr_rd[TIMESTAMP_HIGH]
	immed[thi, 0]

//Start MCR control
	ld_field_w_clr[X, 0011, *l$index0[4]]	;extract X

	alu[X_low, X, +, *l$index0[2]]	
	alu[X_high, *l$index0[3], +carry, 0]		;add TAT2
	ld_field_w_clr[T1, 0111, *l$index0[7]]		;extract T2 (as T1)
	alu[X_low, X_low, -, T1]	
	alu[X_high, X_high, -carry, 0]				;sub T2

	alu[tau, --, B, *l$index0[5], >>24]		;extract tau1
	ld_field_w_clr[T1, 0111, *l$index0[5]]		;extract T1

// check if SOP is valid
    br_bclr[in_deq_mesg, 17, gfr01#]

//t - X >= 0 ?
	alu[--, tlo, -, X_low]
	alu[--, thi, -carry, X_high]
	alu[t11, --, B, X_low]		;set t11=tif as X_low if t-X>=0
	blt[gfr11#]

gfr01#:
	alu[t11, --, B, 0]			;clear t11=tif if t-X<0
gfr11#:

//Start PCR control
//t - TAT > 0  ?
	alu[ial1, tlo, -, *l$index0[0]]
	alu[iah1, thi, -carry, *l$index0[1]]
	alu[*l$index0[0], *l$index0[0], +, T1]		;increment TAT1  LSB
	alu[*l$index0[1], *l$index0[1], +carry, 0]	;increment TAT1  MSB	
	alu[t12, --, B, tlo]			;t-TAT>0
	bgt[gfr12#]

//t-TAT<0 so lets check: t - TAT +tau > 0  ?
	alu[--, ial1, +, tau]
	alu[--, iah1, +carry, 0]
	alu[t12, --, B, tlo]			;t-TAT+tau >0
	bgt[gfr12#]

	alu[t12, *l$index0[0], -, tau]	;t-TAT+tau <=0
//End PCR control

gfr12#:
	alu[tau, --, B, *l$index0[6], >>8]	;extract tau2
//find MAX(tif=t11,tic=t12)
	alu[ti, --, B, t11]				;t12 < t11
	alu[--, t11, -, t12]
	bgt[maxt11#]

	alu[ti, --, B, t12]				;t12 > t11
maxt11#:

	alu[ial1, X, +, *l$index0[2]]
	alu[iah1, *l$index0[3], +carry, 0]	;ia1 = X + TAT2

	alu[*l$index0[2], --, B, ti] 		;upgrade TAT2_low
	alu[*l$index0[3], --, B, 0] 		;upgrade TAT2_high

// find MAX(0,ia1)
	alu[ial1, ial1, -, tau]
	alu[--, iah1, -carry, 0]
	alu[X, ial1, +, tau]	;ia1 > 0
	bgt[gfr13#]

	alu[X, --, B, tau] 		;ia1 < 0
gfr13#:

//End MCR control
	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be UBR w/MDCR. Single GCRA shaping needed
//GCRA(1/MDCR, 1/MDCR+1)
UBRwMDCR#:
	gcra_computed_deq(ti, *l$index0[0], *l$index0[1], tau,\
											 T1, in_ring_delay)
	
	br[nnwrite#]
//------------------------------------------------------------

//The VCQ happens to be UBR. No shaping needed
UBR#:
#ifdef SCRATCH_INTERFACE
	ctx_arb[next_thread_sig], br[end_gcra#]
#else	// NN_INTERFACE
	br[end_gcra#]
#endif	// SCRATCH_INTERFACE

//-----------------------------------------------------------

//NN ring for writing to next ME is full. Keep polling until 
//next ME drains an element from the ring.
nnfull#:
	br[nnwrite#]


//-----------------------------------------------------------

end_gcra#:
.end
#endm


#endif    //_ATM_TM_SHAPER_UTIL_UC
