/////////////////////////////////////////////////////////////////////////////////////
//                                                                      
//                  I N T E L   P R O P R I E T A R Y                   
//                                                                      
//     COPYRIGHT (c)  2001 BY  INTEL  CORPORATION.  ALL RIGHTS          
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//     THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                      
//                        INTEL  CORPORATION                            
//                                                                     
//                     2200 MISSION COLLEGE BLVD                        
//                                                                      
//               SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                      
/////////////////////////////////////////////////////////////////////////////////////
//
// Macros for initializing the scheduler and setting up scratch and NN rings
//
/////////////////////////////////////////////////////////////////////////////////////
//
// The ring macros are only called during standalone testing of the scheduler. 
// Typically in a fully integrated application, the XScale or a single microengine 
// sets up all the scratch rings
//
/////////////////////////////////////////////////////////////////////////////////////

#ifndef __CSIX_SCHEDULER_INIT_UC__
#define __CSIX_SCHEDULER_INIT_UC__

/////////////////////////////////////////////////////////////////////////////////////

#include "scheduler.h"

/////////////////////////////////////////////////////////////////////////////////////
//
// Initialize scratch ring 
//
// RBASE: Base address of scratch ring. Should be 4 byte aligned
// RSIZE: Size of scratch ring in words. Valid values are 128, 256, 512, 1024
// RING:  Ring number (0..15)
// 
///////////////////////////////////////////////////////////////////////////////////////

#macro	_scheduler_init_scratch_ring(RBASE, RSIZE, RING)
.begin 	

	.sig 	CW1, CW2, CW3						; signals used in cap[write...]
	.reg	$_rhead, $_rtail, $_rbase, _base
 
	// These define_eval are required. Otherwise the caller cannot have spaces
	// in between parameters like init[a, b, c].

	#define_eval RN		RING
	#define_eval RS		RSIZE
	#define_eval RB		RBASE

	immed[$_rhead, 0x0]								; Initialise ring head to 0
	immed[$_rtail, 0x0]								; Initialise ring tail to 0;
	immed[_base, RB]								; Initialise ring base 

	alu_shf[$_rbase, _base, or, RING_SIZE_/**/RS, <<30]; [31:30]= 0 => Ring size is 128

	// Initialise the Scratch Ring base (and size), head and tail.

	// Note: We can Queue a max. of 4 commands to any external unit 
	// (like sram, dram, cap, etc). Beyond this limit the ME will stall.
	// The limit of 4 includes all the commands issued by all other MEs 
	// as well. It is the programmers responsibility to ensure this.

	// Since this is the only thread and ME that is queuing cmds at this time,
	// we can queue 3 commands safely.

	cap[write, $_rbase, SCRATCH_RING_BASE_/**/RN], sig_done[CW1]	; base = 0x1000
	cap[write, $_rhead, SCRATCH_RING_HEAD_/**/RN], sig_done[CW2]	; head = 0
	cap[write, $_rtail, SCRATCH_RING_TAIL_/**/RN], sig_done[CW3]	; tail = 0

	ctx_arb[cw1, cw2, cw3]		

#undef RN
#undef RS
#undef RB

.end
#endm

///////////////////////////////////////////////////////////////////////////////////////
//
// Initialize all rings
//
// Initialize the scratch ring used to send dequeue requests to the Queue Manager
// and the next neighbor ring used to receive QM messages from the Queue Manager
//
///////////////////////////////////////////////////////////////////////////////////////

#macro _scheduler_init_rings()
.begin

	.reg 	ctx_enable_data

#ifdef UNIT_TEST

	init_scratch_ring[RING_BASE, RING_SIZE, RING_ID]

#endif
	
	//
	// Set up the CTX_ENABLES local csr for NN ring 
	// 
	// bit 20 NN_MODE = 0 : next neighbor register are written
	//                                      by previous ME 
	//
	// bits [19:18] NN_RING_EMPTY = 0 : NN_EMPTY asserts when
	//                                                  NN_PUT == NN_GET
	//													(default)
	// bits [15:8] CTX enables for contexts 0 to 7
	//

    immed32[ctx_enable_data, 0xFF00]			
    local_csr_wr[CTX_ENABLES, ctx_enable_data] 	

	// initialize the NN indices.
	local_csr_wr[nn_put, 0]
	local_csr_wr[nn_get, 0]	

.end
#endm

///////////////////////////////////////////////////////////////////////////////////// 
//
// Get the credit quantum for a given queue. This should be read from a SRAM control 
// block. We read credit quantum for two queues at a time and store it in a 
// credit_increment register to be finally stored in local memory.

// For now we will simply set the current credit to 1 and credit increment to 1 for 
// even queues and current credit and increment to 2 for odd queues
//
// Output:   CreditIncrement
// Input:    QueueId
//
/////////////////////////////////////////////////////////////////////////////////////
	
#macro _scheduler_get_credit_increment(credit_increment, queue_id)


#ifdef USE_IMPORT_VAR
  .begin
  .reg sram_block_base, offset, $credit0, $credit1, temp_quantum
  .sig sram_sig
  .xfer_order $credit0, $credit1

  // get the sram address to read from 

  immed32(sram_block_base, SCHED_VOQ_CREDIT_BASE)
  alu[offset, --,b, queue_id, <<2]
  sram[read, $credit0, sram_block_base, offset, 2], ctx_swap[sram_sig]

  // $credit0 contains quantum for even queue

  alu[credit_increment, --, b, $credit0 ]
  alu[temp_quantum, $credit0, AND, 0xff]
  alu[credit_increment, credit_increment, OR, temp_quantum, <<8]

  // $credit1 contains quantum for odd queue

  alu[credit_increment, credit_increment, OR, $credit1, <<16]
  alu[temp_quantum, $credit1, AND, 0xff]
  alu[credit_increment, credit_increment, OR, temp_quantum, <<24]

  .end

#else

  immed32(credit_increment, 0x01010101)

#endif

#endm


///////////////////////////////////////////////////////////////////////////////////////////////////////// 
//
// Initialization routine for Scheduler
//
// outputs:
//
//		ring:				register with ring id * 4 
// 		minus_two			register with 0xfffffffe constant
//		queue_lm_base		offset in local memory where queue structures begin
//		group_mask			mask used for round robin scheduling among queue groups
//
/////////////////////////////////////////////////////////////////////////////////////

#ifndef UNIT_TEST

#macro scheduler_init(ring, minus_two, queue_lm_base, group_mask)

.begin

	.reg 	i
	.reg 	credit_increment

	// This is a constant 0xfffffffe which is used to compute the next group mask. This is
	// stored in a register since it takes two instructions to compute

	immed32[minus_two, 0xfffffffe]

	// Id of the scratch ring to talk to the Queue Manager

	alu_shf[ring, --, b, RING_ID, <<2]

	// This groupMask is used to control the round robin scheduling among queueGroups
	// Initialize it to all 1's

	alu[group_mask, --, ~B, 0]

	// Compute the offset into local memory where queue structures begin
	// Stored in a register to save compute time

	immed32[queue_lm_base, QUEUE_LOCAL_MEM_BASE]

	// Set PacketsScheduled to 0

	alu[@packets_scheduled, --, B, 0]

	// Set Packets Transmitted to 0

	alu[@packets_in_flight, --, B, 0]

	// Initialize the root empty vector to 0. All queues are empty

	immed[@root_empty_vector,0] 

	// Initialize the root flow control vector to all 1's. Flow control is off
	// on every queue group

	alu[@root_flow_control_vector, --, ~B, 0]

	// Set the base address for ports in local memory. This has
	// a 3 cycle latency which should be covered by the branch in
	// the while

	localmem_set_address(0, 0, LM_HANDLE_0)
	nop

	// For each queue group set up the configuration

	immed[i, 0]

	.while (i < NUMBER_OF_QUEUE_GROUPS)	
	
		// Set all queues to be empty

		alu[*l$index0++, --, B, 0] 	; QueueEmpty Vector 	

		// Set flow vector and queue mask to all 1's. 

		alu[*l$index0++, --, ~B, 0] ; write flow control vector
		alu[*l$index0++, --, ~B, 0] ; write queue mask

		// Set reserved word to 0

		alu[*l$index0++, --, B, 0] 		

		// increment i

		alu[i, i, +, 1]

	.endw
	
	immed[i, 0] 

	// For each queue set up the current credit and quantum. Get the quantum
	// from the SRAM control block

	.while (i < (NUMBER_OF_QUEUE_GROUPS * NUMBER_OF_QUEUES_PER_GROUP))
		
		// contains credit and credit increment value for 2 queues

		_scheduler_get_credit_increment(credit_increment, i)

		alu[*l$index0++, --, B, credit_increment] 	
			
		// increment i

		alu[i, i, +, 2]

	.endw

	// Set up the registers for next neighbor and scratch rings

	_scheduler_init_rings()

.end

#endm

#endif

/////////////////////////////////////////////////////////////////////////////////////

#endif // __CSIX_SCHEDULER_INIT_UC__

/////////////////////////////////////////////////////////////////////////////////////