/////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
/////////////////////////////////////////////////////////////////////////////////////
//
// 		File Name: qm_scheduler_packet_init.uc
// 
// 		Purpose:	  DRR scheduler and queue manager
//
/////////////////////////////////////////////////////////////////////////////////////
//
// 		History:
//
// 		Date			Comment										By
//		---------------------------------------------------------------------------
//
//		03/17/02		Created										Uday Naik
//
/////////////////////////////////////////////////////////////////////////////////////

#ifndef __QM_SCHEDULER_PACKET_INIT_UC__
#define __QM_SCHEDULER_PACKET_INIT_UC__


/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////
//                                  QM PACKET
/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////


///////////////////////////////////////////////////////////////////////////////
// signal_next_ctx
//	 	Description: Signal next context (thread) in the same ME. This macro is
//		to be used by threads 0 to 6. (Thread 7 has no next thread). 
//      No error checking done on signal number.
//
//	 	Outputs:
//			None
//
//		Inputs:
//			in_sig			Signal number (0-0xF).
//		
//		Size:   			3 instructions
///////////////////////////////////////////////////////////////////////////////	

#macro _qm_packet_set_signal_next_ctx_reg[]

    .set gl_next_context_sig

	.local tmp
	
	br=ctx[5, ctx_5#]

	br=ctx[6, sig_reg_setting_done#] // Contexts 6 and 7 are not used by QM.
	br=ctx[7, sig_reg_setting_done#]
				
	// for all other context signal next context

	move[tmp, 0x80]
	alu[gl_next_context_sig, tmp, OR, &sig_prev_thread, <<3]
	br[sig_reg_setting_done#]

ctx_5#:
	
	// if its ctx 5 then need to signal context 0

	move[tmp, 0]
	alu[gl_next_context_sig, tmp, OR, &sig_prev_thread, <<3]

sig_reg_setting_done#:

	.endlocal 

#endm

///////////////////////////////////////////////////////////////////////////////
// Macro Name  :  _qm_nn_init
// Description : Macro to initialize next neighbour index register
// Output      : Nil
// Input       : Nil
// Constant    : Nil
// Size		   : 5
///////////////////////////////////////////////////////////////////////////////
#macro _qm_packet_nn_init[]

.local ctx_enable_data

	// initialization
	// bit 20 in CTX_ENABLE - NN_MODE = 0 : next neighbor register are written
	//                                      by this ME 
	// bits [19:18] in CTX_ENABLE - NN_RING_EMPTY = 0 : NN_EMPTY asserts when
	//                                                  NN_PUT == NN_GET
	// bits [15:8] CTX enables for contexts 0 to 7
	// initialize local csr CTX_ENABLES to 0xFF00, local csr NN_PUT and NN_GET
	// to 0

	immed[ctx_enable_data, 0xFF00]				; set low word value
    immed_w1[ctx_enable_data, 0x0010]           ; set high word value
    local_csr_wr[CTX_ENABLES, ctx_enable_data] 	; initialize CTX_ENABLES
    local_csr_wr[NN_PUT, 0]						; initialize NN_PUT
    local_csr_wr[NN_GET, 0]	

.endlocal

#endm 
 
///////////////////////////////////////////////////////////////////////////////
// Macro Name  : _qm_thread0_init
// Description : Macro to initialize registers, rings and memory used by
//				 by Queue Manager
// Output      : Nil
// Input       : Nil
// Constant	   : Nil 
// Size        : 20
///////////////////////////////////////////////////////////////////////////////
#macro _qm_packet_thread0_init[]

 	.local count, temp

	// clear the CAM		 

	cam_clear;

	// Initialize absoulte registers that are common to all threads
	// used to store few immediates in global registers for ALU operation

	move[@enq_transition_bit_mask, QM_ENQ_TRANSITION_BIT_MASK]
	move[@invalid_dequeue_bit_mask, QM_INVALID_DEQUEUE_BIT_MASK]	
	move[@enq_ind_ref_mask_reg,ENQ_IND_REF_MASK]
	move[@qm_deq_transition_bit_mask_reg, QM_DEQ_TRANSITION_BIT_MASK ]

	
	// Initialize the NN ring

	_qm_packet_nn_init[]


	// Drop Queue Initialization 
	// Initialize local memory for drop Q

	.local reg, qa_cam_num, qarray_entry, $qdarray

	immed[qa_cam_num,QM_DROP_QUEUE_ENTRY]
	move[reg, QM_DROP_QUEUE]
	// set the channel number

	alu[qarray_entry, gl_channel_num, OR, qa_cam_num, <<SRAM_Q_ARRAY_NUMBER_FIELD]

	// Read the new queue into Q_Array Entry reserved for Drop Queue in 

	_qm_read_q_array[ reg, qarray_entry, $qdarray, sig_q_array_enq_r_done ] ; 3, 0

	// wait for read to complete

	ctx_arb[sig_q_array_enq_r_done]

	// update local  memory

	alu[*l$index1, --, b,0]


	.endlocal 
		
	.endlocal 

	
	/////////////////////////////////////////////////////////////////
	// Setting bit 15 (of Q_CNTL_MODE) in SRAM Control CSR to workaround 
	// the QArray problem.
	//
	// This is undocumented feature which will be released in the next
	// release.
	// 
	/////////////////////////////////////////////////////////////////

	.local $set_csr_reg, base, offset, temp

	// This is the sram base address for a given channel

	move[base, SRAM_CHANNEL0_BASE_ADDRESS] 

	// This is the offset that indicates what CSR we are looking for.
	// In out case we want to write to SRAM_CONTROL CSR

	move[offset, OFFSET_SRAMCONTROL_CSR]

	sram[csr_rd, $set_csr_reg, base, offset], ctx_swap[sram_done]
	move[temp, $set_csr_reg]

	// The value in this transfer register will be written to 
	// the SRAM_CONTROL CSR. We are setting bit 15 to workaround 
	// QArray problem.

 	alu[$set_csr_reg, temp, OR, 1, <<15] 
	
	sram[csr_wr, $set_csr_reg, base, offset], ctx_swap[sram_done]

	.endlocal


#ifdef EGRESS

	// Set the SRAM Channel CSR to ignore cellcount and eop bit in the 
	// buffer handle. This implies that even though we could have chained
	// buffers h/w will enq/deq only one buffer. Therefore we enqueue and
	// dequeue a packet everytime.

	.local $set_csr_reg, base, offset, temp

	// This is the sram base address for a give channel

	move[base, SRAM_CHANNEL0_BASE_ADDRESS] 

	// this is the offset that indicates what CSR we are looking for.
	// In out case we want to write to SRAM_CONTROL CSR

	move[offset, OFFSET_SRAMCONTROL_CSR]

	sram[csr_rd, $set_csr_reg, base, offset], ctx_swap[sram_done]
	move[temp, $set_csr_reg]

	// The value in this transfer register will be written to 
	// the SRAM_CONTROL CSR. We are setting two bits that will
	// indicate to ignore cellcount but not the eop bit. Therefore all handles
	// that are being enqueue should have the eop bit set.

 	alu[$set_csr_reg,temp,OR,1,<<13] ;
	
	sram[csr_wr, $set_csr_reg, base, offset], ctx_swap[sram_done]

	.endlocal

#endif	

#endm 

///////////////////////////////////////////////////////////////////////////////
// Macro Name  : qm_init
// Description : Macro to initialize registers, rings and memory used by
//				 by Queue Manager
// Output      : Nil
// Input       : Nil 
// Size        : 
///////////////////////////////////////////////////////////////////////////////
#macro qm_packet_init[]

	.local dummy

	// set the next context signal register

	_qm_packet_set_signal_next_ctx_reg[]

	// Setting a global base register. Need to shift by 2, 
	// because address used in reading and writing QD 
	// instruction uses LW alighned addresses.

	immed32[dummy, QD_SRAM_BASE]
	alu[dummy, dummy, AND~, 3, <<30] ;; masking out channel number
	alu[gl_qd_sram_base_reg, --,b, dummy,>>2]

	.endlocal

	// set the override bit used to override defaults in enqueue operation

	move[gl_override_bit_egress, 0x180000] ; for egress the eop bit is always set
									; for SOP handle	
	move[gl_override_bit_ingress, 0x100000] 

	// set the channel number for Q_Array

	immed32[gl_channel_num, QD_SRAM_BASE] 
	alu[gl_channel_num, --,b, gl_channel_num, >>30]
	alu_shf[gl_channel_num, --, B, gl_channel_num, <<SRAM_CHANNEL_NUMBER_FIELD]

	// set the drop queue register

	.local msb_set_reg

	move[msb_set_reg, MSB_SET]
	immed32[gl_qm_drop_queue_reg, QD_TOTAL]
	alu[gl_qm_drop_queue_reg, gl_qm_drop_queue_reg, OR, msb_set_reg]

	.endlocal


	// Initialize few more context specific global registers

	move[zero, 0]
	move[gl_addr_ptr_mask_reg, ADDR_PTR_MASK]
	move[gl_enq_queue_num_mask, ENQ_QUEUE_NUM_MASK]

	// Initialize local memory for drop Q

	.local reg

	move[reg, QM_LM_DROP_Q_BASE]
	_qm_lm_init[1, reg]

	.endlocal

	.if( ctx() == 0 )

		// Thread 0 does the initialization

		_qm_packet_thread0_init[]

		//  Initialize Q_Array 

		_qm_q_array_init[]

       // Since xfer registers 0..15 belong 
        // to thread 0 in relative mode, this thread will initialize them to 0 by reading in 
        // 0's into these registers initially. It will signal the scheduler thread once done
        .sig 		 read_signal1, read_signal2
        .reg 		 offset

	    // set up offset to point to a location in SRAM where 8 0's are stored
	    immed32[offset, SRAM_ZERO_BLOCK]

	    // read in the 8 zero's
	    sram[read, $$txd_p0, offset, 0, 8], sig_done[read_signal1]
	    sram[read, $$txd_p8, offset, 0, 8], sig_done[read_signal2]

	    // wait for the IO to complete. It is safe to let other threads run now
    	ctx_arb[read_signal1, read_signal2]

		// this is the signal that all blocks are to 
		// wait on to indicate that system initialization 
		// is done
		
		ctx_arb[sys_init_signal]
			
    .else

		// wait for previous thread/context signal to wake you up

		ctx_arb[sig_prev_thread]
	
	.endif

	
	
#endm 



/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////
//                                 PACKET SCHEDULER
/////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////


/////////////////////////////////////////////////////////////////////////////////////
// 
// _qm_scheduler_get_credit_increment()
//
// Description:
// 	
//		Get the DRR credit quantum for a given queue. This should be read from SRAM  
//		control block. In simulation we will simply use a fixed number equal to the minimum 
//		credit increment
//
// Outputs: 
//
//		out_credit_increment:	DRR credit quantum for a specific queue
//
// Inputs:  
//
//		in_queue_id:			queue_id
//
// Size: 
//
//		1 instruction
// 
//
/////////////////////////////////////////////////////////////////////////////////////
	
#macro _qm_scheduler_get_credit_increment(out_credit_increment, in_queue_id)

#ifdef USE_IMPORT_VAR
.begin
  .reg sram_block_base, offset, $credit0, queue_offset
  .sig sram_sig


  // get the sram address to read from 

  immed32(sram_block_base, SCHED_WEIGHT_CREDIT_BASE)

  // get the queue offset in SRAM. The first NUM_OF_PORTS LW addressess
  // are for weights on each port

  alu[queue_offset,--,b, NUMBER_OF_PORTS, <<2]

  // get the offset based on queue number

  alu[offset, --,b, in_queue_id, <<2]

  // the final offset of a give queue in SRAM
  alu[offset, offset, +, queue_offset]

  sram[read, $credit0, sram_block_base, offset, 1], ctx_swap[sram_sig]

  // $credit0 contains weight for the port

  alu[out_credit_increment, --,b, $credit0]


.end

#else

// In simulation mode define the credit increment like this

#define_eval	MIN_CREDIT_INCREMENT  ((MTU * 8) / (1 << BITS_FOR_PACKET_LENGTH))

  immed32(credit_increment, MIN_CREDIT_INCREMENT)

#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// _qm_scheduler_get_port_weight()
//
// Description:
// 	
//		 Get the WRR weight for a given port. This should be read from a SRAM control 
//		 block when run in hardware and is a fixed value in simulation.
//
// Outputs: 
//
//		weight:					WRR weight in number of packets
//
// Inputs:  
//
//		port:					port_number			
//
// Size: 
//
//		1 instruction
// 
//
/////////////////////////////////////////////////////////////////////////////////////
	
#macro _qm_scheduler_get_port_weight(out_weight, in_port]

#ifdef USE_IMPORT_VAR
.begin
  .reg sram_block_base, offset, $credit0
  .sig sram_sig


  // get the sram address to read from 

  immed32(sram_block_base, SCHED_WEIGHT_CREDIT_BASE)
  alu[offset, --,b, in_port, <<2]
  sram[read, $credit0, sram_block_base, offset, 1], ctx_swap[sram_sig]

  // $credit0 contains weight for the port

  alu[out_weight, --,b, $credit0]


.end

#else

   // In simulation we will set this up as 1 for every port

  immed[out_weight, 1]

#endif

#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// _qm_scheduler_init_scratch_ring[]
//
// Description:
// 	
//		 Initialize the scratch ring between the scheduler and the QM
//
// Outputs: 
//								None
//
// Inputs:  
//								None				
//
// Constants
//
//		RBASE: 					Base address of scratch ring. Should be 4 byte 
//								aligned
//
// 		RSIZE: 					Size of scratch ring in words. Valid values are 
//								128, 256, 512, 1024
//
// 		RING:  					Ring number (0..15)
//
// Size:     
//								10 instruction
// 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro	_qm_scheduler_init_scratch_ring(RBASE, RSIZE, RING)

.begin 	

	.sig 	cw1, cw2, cw3						; signals used in cap[write...]
	.reg	$_rhead, $_rtail, $_rbase, _base
 
	// These define_eval are required. Otherwise the caller cannot have spaces
	// in between parameters like init[a, b, c].

	#define_eval RN		RING
	#define_eval RS		RSIZE
	#define_eval RB		RBASE

	immed[$_rhead, 0x0]								; Initialise ring head to 0
	immed[$_rtail, 0x0]								; Initialise ring tail to 0;
	immed[_base, RB]								; Initialise ring base 

	alu_shf[$_rbase, _base, or, RING_SIZE_/**/RS, <<30]; [31:30]= 0 => Ring size is 
													   ; 128

	// Initialise the Scratch Ring base (and size), head and tail.

	// Note: We can Queue a max. of 4 commands to any external unit 
	// (like sram, dram, cap, etc). Beyond this limit the ME will stall.
	// The limit of 4 includes all the commands issued by all other MEs 
	// as well. It is the programmers responsibility to ensure this.

	// Since this is the only thread and ME that is queuing cmds at this time,
	// we can queue 3 commands safely.

	cap[write, $_rbase, SCRATCH_RING_BASE_/**/RN], sig_done[cw1]	; base = 0x1000
	cap[write, $_rhead, SCRATCH_RING_HEAD_/**/RN], sig_done[cw2]	; head = 0
	cap[write, $_rtail, SCRATCH_RING_TAIL_/**/RN], sig_done[cw3]	; tail = 0

	ctx_arb[cw1, cw2, cw3]		

#undef RN
#undef RS
#undef RB

.end

#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// _qm_scheduler_init_rings()
//
// Description:
// 	
//		 Initialize the scratch ring and NN rings
//
// Outputs: 
//								None
//
// Inputs:  
//								None				
//
// Constants
//								None
//
// Size: 
//								12 instruction
// 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro _qm_scheduler_init_rings()

.begin

	.reg 	ctx_enable_data

#ifdef UNIT_TEST

	_qm_scheduler_init_scratch_ring(RING_BASE, RING_SIZE, RING_ID)

#endif
	
	//
	// Set up the CTX_ENABLES local csr for NN ring 
	// 
	// bit 20 NN_MODE = 0 : next neighbor register are written
	//                                      by this ME 
	//
	// bits [19:18] NN_RING_EMPTY = 0 : NN_EMPTY asserts when
	//                                                  NN_PUT == NN_GET
	//													(default)
	// bits [15:8] CTX enables for contexts 0 to 7
	//

    immed[ctx_enable_data, 0xFF00]	            ; sel loe word value  
    immed_w1[ctx_enable_data, 0x0010]           ; set high word value 
    local_csr_wr[CTX_ENABLES, ctx_enable_data] 	

	/* initialize the NN indices */
	local_csr_wr[nn_put, 0]
	local_csr_wr[nn_get, 0]

.end

#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// scheduler_init()
//
// Description:
// 	
//		 Initialize the DRR scheduler
//
//
// Outputs:				
//
//		port_credit_vector		Current WRR credit vector for port
//		port_init_credit_vector Initial WRR credit vector for port
//		ring					Scratch ring for deq requests
//		port_mask				Mask used to round robin among ports			
//		minus_two				A constant 0xfffffffe for computinf next port mask	
// 
//
/////////////////////////////////////////////////////////////////////////////////////

#macro qm_scheduler_init(port_credit_vector, port_init_credit_vector, ring, port_mask,\
		   			     minus_two)

.begin
	
	.reg	i
	.reg 	credit_increment
	.reg 	weight
	.reg	q_mask

	// ((1 << number_of_queues_per_port) - 1)

	immed32(q_mask, QUEUE_MASK)

	// This is a constant 0xfffffffe which is used to compute the next port mask. 
	// This is stored in a register since it takes two instructions to compute

	immed32(minus_two, 0xfffffffe)

	// Store the initial bit vector for ports with credit in a register

#ifdef PORT_INITIAL_CREDIT_VECTOR
#undef PORT_INITIAL_CREDIT_VECTOR
#define PORT_INITIAL_CREDIT_VECTOR 0xf
#endif

	immed32(port_init_credit_vector, PORT_INITIAL_CREDIT_VECTOR)
	immed32(port_credit_vector, PORT_INITIAL_CREDIT_VECTOR)

	// Id of the scratch ring to talk to the Queue Manager

	alu_shf[ring, --, b, RING_ID, <<2]

	// This portMask is used to control the round robin scheduling among ports
	// Initialize it to all 1's

	alu[port_mask, --, ~B, 0]

	// Initialize the port empty vector to 0. All ports are initially empty

	immed[@port_empty_vector,0x0] 

	// Set the base address for ports in local memory. This has
	// a 3 cycle latency which should be covered by the branch in
	// the while

	localmem_set_address(PORT_LM_BASE_OFFSET, 0, LM_HANDLE_0)

	nop
	nop

	// For each port set up the configuration

	immed[i, 0]

	.while (i < NUMBER_OF_PORTS)		

		// Set schedule vector and queue mask to all 1's. Every queue has enough 
		// credits

		alu[*l$index0++, --, B, q_mask] ; write schedule vector
		alu[*l$index0++, --, B, q_mask] ; write queue mask

		// Set all ports to be empty

		alu[*l$index0++, --, B, 0] 			; QueueEmpty Vector 

		// Set the packets scheduled to be  0

		alu[*l$index0++, --, B, 0] 			; packets scheduled 

		_qm_scheduler_get_port_weight(weight, i)		

		alu[*l$index0++, --, B, weight] 	; current port weight for WRR among ports
		alu[*l$index0++, --, B, weight]		; port weight quantum for WRR
		
		alu[--, --, B, *l$index0++]
		alu[--, --, B, *l$index0++]

		// increment i

		alu[i, i, +, 1]

	.endw
	
	// initialize the loop variable
	
	immed[i, 0] 

	// For each queue set up the current credit and quantum. Get the quantum
	// from the SRAM control block

	.while (i < (NUMBER_OF_PORTS * NUMBER_OF_QUEUES_PER_PORT))
		
		_qm_scheduler_get_credit_increment(credit_increment, i)
		
		// Set the credit increment and the current credit

		alu[*l$index0++, --, B, credit_increment] ; credit increment
		alu[*l$index0++, --, B, credit_increment] ; current credit 

		// increment i

		alu[i, i, +, 1]

	.endw

.end

#endm

/////////////////////////////////////////////////////////////////////////////////////

#endif   // __QM_SCHEDULER_PACKET_INIT_UC__

/////////////////////////////////////////////////////////////////////////////////////