/////////////////////////////////////////////////////////////////////////////////////
//
//
//                  I N T E L   P R O P R I E T A R Y
//
//     COPYRIGHT [c]  2002 BY  INTEL  CORPORATION.  ALL RIGHTS
//     RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY
//     BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A
//     RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER
//     LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,
//     MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT
//     THE PRIOR WRITTEN PERMISSION OF :
//
//                        INTEL  CORPORATION
//
//                     2200 MISSION COLLEGE BLVD
//
//               SANTA  CLARA,  CALIFORNIA  95052-8119
//
//
/////////////////////////////////////////////////////////////////////////////////////
//
//
//      File Name: pkt_copier.uc
//
//      Purpose: Packet replication  
//
/////////////////////////////////////////////////////////////////////////////////////

#ifndef	_PKT_COPIER_UC_
#define	_PKT_COPIER_UC_

//
// Include files
// 
#include	<stdmac.uc>
#include	<xbuf.uc>
#include	<sig_macros.uc>
#include	<dl_buf.uc>
#include	<dispatch_loop.uc>
#include	<dl_meta_ext.uc>
#include	"dl_system.h"
#include	"pkt_copier.h"
#include	"pkt_copier_util.uc"


#if		!defined(PKT_COPIER_2ME) || defined(PKT_COPIER_1ST_ME)

.sig sig_scr_req			; For reading the copy request
.sig sig_parent_meta_wr		; write ref. cnt
.sig sig_meta_rd			; for reading parent meta
.reg scr_ring_request		; store ring number to get requests
.reg parent_meta_base_lw	; LW base address for parent meta data

// parent meta data

xbuf_alloc[$parent_meta, 7, read]

// for reference count

.reg write $pkt_copier_ref_cnt[1]

//
// for reading a copy request
//
 xbuf_alloc[$request, PKT_COPIER_REQUEST_SIZE, read]

#ifdef	_DEBUG_COUNTERS_
.reg	@pkt_copier_num_req
#endif

#endif


#if		!defined(PKT_COPIER_2ME) || defined(PKT_COPIER_2ND_ME)

.sig sig_scr_rsp			; for writing the copy request
.sig sig_buf_alloc			; for obtaining a buffer
.sig sig_child_meta_wr		; for writing child meta
.reg scr_ring_response		; store ring number to send responses
.reg child_meta_constant	
.reg rx_stat_constant
.reg cur_child_handle		
.reg copy_for_outport		; store outport for current iteration

//
// prefetch buffer handle
//
.reg read $prefetch_handle

//
// Child meta data
//
 xbuf_alloc[$child_meta, 8, write]

//
// for writing response
//
 xbuf_alloc[$response, PKT_COPIER_RESPONSE_SIZE, write]

#ifdef	_DEBUG_COUNTERS_
.reg	@pkt_copier_num_rsp
.reg	@pkt_copier_buf_alloc_fail
#endif

#endif

//
// next thread signal 
//
.sig 	volatile			sig_next_thread
.addr	sig_next_thread		PKT_COPIER_NEXT_THD_SIGNAL


// used to store the value to be written to SAME_ME_SIGNAL
// CSR to signal next thread

.reg	pkt_copier_next_thread_gpr
.reg	mask_ffff

#ifndef	PKT_COPIER_2ME

.reg	exe_stat_flag
.reg	context_ptr_base
//
// Register for Signal mask
//
.reg	sig_mask_ph1_default	; will save some cycles
.reg	sig_mask_ph2_default	; will save some cycles
.reg	sig_mask_ph1
.reg	sig_mask_ph2

#endif


/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier_init()
//
// 	Description:
// 	
//		Initialize the packet copier microblock by setting up global variables etc.
//		This macro is used in 1 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////

#macro	_pkt_copier_init()

.begin
	
	.reg	sig_mask

	// common initialization for all threads 

	// initialize next thread signal 

	_pkt_copier_setup_next_thread_chain()

	move[mask_ffff, 0xffff]

	// initialize signal mask 

	// at end of ph1, we wait on the following signals

	immed[sig_mask_ph1_default, 0x0]

	alu_shf[sig_mask_ph1_default, sig_mask_ph1_default, OR, 0x1, <<&sig_meta_rd]

	alu_shf[sig_mask_ph1_default, sig_mask_ph1_default, OR, 0x1, <<&sig_next_thread]

	// end of ph2, we wait on the following signals

	immed[sig_mask_ph2_default, 0x0]

	alu_shf[sig_mask_ph2_default, sig_mask_ph2_default, OR, 0x1, <<&sig_next_thread]

	alu_shf[sig_mask_ph2_default, sig_mask_ph2_default, OR, 0x1, <<&sig_buf_alloc]

	alu_shf[sig_mask_ph2_default, sig_mask_ph2_default, OR, 0x1, <<&sig_scr_req]

	alu_shf[sig_mask_ph2_default, sig_mask_ph2_default, OR, 0x1, <<&sig_scr_rsp]

	alu_shf[sig_mask_ph2_default, sig_mask_ph2_default, OR, 0x1, <<&sig_child_meta_wr]


	// note that we will set sig_parent_meta_wr as needed.

	// reset the status flag

	immed[exe_stat_flag, 0x0]

	// init sigmask for initialization phase

	immed[sig_mask, 0x0]
	
	// init RX status
	
	alu_shf[rx_stat_constant, --, B, 0x1, <<11]

	// default copy

	immed[copy_for_outport, 0x0]

	// initialize the scratch ring gprs
	 
	alu_shf[scr_ring_request, --, B, PKT_COPIER_RING_IN, <<2]

	alu_shf[scr_ring_response, --, B, PKT_COPIER_RING_OUT, <<2]

	//	  initialize the freelist ID and Rx Status

	alu[child_meta_constant, --, B, ((CHILD_FREELIST_ID) & 0xf)]

	// for now, set the data offset to 128

	alu_shf[child_meta_constant, child_meta_constant, OR, 0x80, <<22]

	// set the parent meta base
	 

 	immed32(parent_meta_base_lw, BUF_SRAM_BASE)

	alu_shf[parent_meta_base_lw, --, B, parent_meta_base_lw, >>2]
	
	// setup context pointer

	move[context_ptr_base, PKT_COPIER_CTX_BASE]

	// to get rid of some warnings, do the following 
	.begin
		
		.reg	sram_addr
		.sig	tmp_sig

		immed[sram_addr, 0x0]

		sram[read, $parent_meta[0], 0, sram_addr, 7], ctx_swap[tmp_sig]

		sram[read, $request[0], 0, sram_addr, 4], ctx_swap[tmp_sig]

	.end

	// init LW0 of child meta, save 1 cycle

	alu[$child_meta[0], --, B, child_meta_constant]

	// thread specific initialization here 
	

	.if (ctx() == 0)

		// thread 0 specific initialization 

		; initialize the local queue descriptor
		_pkt_copier_queue_desc_init()

		; initiaze the Copy context
		_pkt_copier_ctx_init()


		#ifdef	_DEBUG_COUNTERS_
		
			immed[@pkt_copier_num_req, 0]
			immed[@pkt_copier_num_rsp, 0]
			immed[@pkt_copier_buf_alloc_fail, 0x0]
		#endif

		// initially send signal to self 

		signal_first_ctx[PKT_COPIER_NEXT_THD_SIGNAL]


		// now wait till system initialization is complete 

		.begin

			// we need to manually allocate a signal on which we wait.

			.sig 	volatile		sys_init_signal
			.addr	sys_init_signal	ME_INIT_SIGNAL

			// we wait for the signal here 

			ctx_arb[sys_init_signal]
		.end


	.else
		

		// other threads initialization 

		ctx_arb[sig_next_thread]

		
	.endif

	
	// continue common initialization 

	//  signal next thread 


	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// issue a buffer allocation request 

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	// issue a read request 

	_pkt_copier_get_request()

	alu_shf[exe_stat_flag, exe_stat_flag, OR, 0x1, <<REQUEST_BIT]
	
	alu_shf[sig_mask, --, B, sig_mask_ph2_default]

	alu_shf[sig_mask, sig_mask, AND~, 0x1, <<&sig_scr_rsp]

	alu_shf[sig_mask, sig_mask, AND~, 0x1, <<&sig_child_meta_wr]


	// simply wait

	ctx_arb[--], defer[1]

	local_csr_wr[ACTIVE_CTX_WAKEUP_EVENTS, sig_mask]


end#:
.end
#endm


/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier_1st_me_init()
//
// 	Description:
// 	
//		Initialize the packet copier microblock by setting up global variables etc.
//		This macro is used in by 1st ME in 2 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////
#macro	_pkt_copier_1st_me_init()


	_pkt_copier_setup_next_thread_chain()

	move[mask_ffff, 0xffff]

	// initialize the scratch ring gprs
	 
	alu_shf[scr_ring_request, --, B, PKT_COPIER_RING_IN, <<2]

	// set the parent meta base
	 
 	immed32(parent_meta_base_lw, BUF_SRAM_BASE)

	alu_shf[parent_meta_base_lw, --, B, parent_meta_base_lw, >>2]
	
	// to get rid of some warnings, do the following 
	.begin
		
		.reg	sram_addr
		.sig	tmp_sig

		immed[sram_addr, 0x0]

		sram[read, $parent_meta[0], 0, sram_addr, 7], ctx_swap[tmp_sig]

		sram[read, $request[0], 0, sram_addr, 4], ctx_swap[tmp_sig]

	.end

	#ifdef	_DEBUG_COUNTERS_
	
		immed[@pkt_copier_num_req, 0]

	#endif

	// initially send signal to self 

	signal_first_ctx[PKT_COPIER_NEXT_THD_SIGNAL]

	.if (ctx() == 0)

		// now wait till system initialization is complete 

		.begin

			// we need to manually allocate a signal on which we wait.

			.sig 	volatile		sys_init_signal
			.addr	sys_init_signal	ME_INIT_SIGNAL

			// we wait for the signal here 

			ctx_arb[sys_init_signal]
		.end


	.else
		
		// other threads initialization //

		ctx_arb[sig_next_thread]
		
	.endif

	// continue common initialization 

	//  signal next thread 


	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// issue a buffer allocation request 

	_pkt_copier_get_request()

	ctx_arb[sig_scr_req, sig_next_thread]

#endm


/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier_2nd_me_init()
//
// 	Description:
// 	
//		Initialize the packet copier microblock by setting up global variables etc.
//		This macro is used in by 2nd ME in 2 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////
#macro	_pkt_copier_2nd_me_init()


	_pkt_copier_setup_next_thread_chain()

	move[mask_ffff, 0xffff]

	// init RX status
	
	alu_shf[rx_stat_constant, --, B, 0x1, <<11]

	// default copy

	immed[copy_for_outport, 0x0]

	// initialize the scratch ring gprs

	alu_shf[scr_ring_response, --, B, PKT_COPIER_RING_OUT, <<2]

	//	  initialize the freelist ID and Rx Status

	alu[child_meta_constant, --, B, ((CHILD_FREELIST_ID) & 0xf)]

	// for now, set the data offset to 128

	alu_shf[child_meta_constant, child_meta_constant, OR, 0x80, <<22]

	// init LW0 of child meta, save 1 cycle

	alu[$child_meta[0], --, B, child_meta_constant]

	// initiaze the Copy context

	_pkt_copier_2me_ctx_init()

	#ifdef	_DEBUG_COUNTERS_
	
		immed[@pkt_copier_num_rsp, 0]

		immed[@pkt_copier_buf_alloc_fail, 0x0]

	#endif

	// initially send signal to self 

	signal_first_ctx[PKT_COPIER_NEXT_THD_SIGNAL]

	.if (ctx() == 0)

		// perform NN ring initialization

		.reg ctx_enable

		//	Set the NN ring so that the ME can write to its own NN ring

		local_csr_rd[ctx_enables]

		immed[ctx_enable, 0]
		alu_shf[ctx_enable, ctx_enable, AND~, 1, <<20]

		alu_shf[ctx_enable, ctx_enable, AND~, 3, <<18]
		alu_shf[ctx_enable, ctx_enable, OR, 0x2, <<18]

		local_csr_wr[ctx_enables, ctx_enable]
		alu[--, --, b, 0]
		alu[--, --, b, 0]

		// set the port mask

		alu[_PORT_MASK, --, B, 0x0]

		// now wait till system initialization is complete 

		.begin

			// we need to manually allocate a signal on which we wait.

			.sig 	volatile		sys_init_signal
			.addr	sys_init_signal	ME_INIT_SIGNAL

			// we wait for the signal here 

			ctx_arb[sys_init_signal]
		.end


	.else
		
		// other threads initialization //

		ctx_arb[sig_next_thread]
		
	.endif

	// continue common initialization 

	_pkt_copier_2me_ctx_ptr_set()

	//  signal next thread 

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// issue a buffer allocation request 

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	ctx_arb[sig_buf_alloc, sig_next_thread]

#endm


/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier()
//
// 	Description:
// 	
//		Implement packet copier functionality for 1 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////
#macro	_pkt_copier()


	// all threads enter here for each iteration
	 

start#:


	// 		-------------------------------
	//		S T A R T   O F   P H A S E   1
	//		-------------------------------
phase_1#:

	// let assembler know that I/O is completed.

	.io_completed sig_buf_alloc sig_next_thread sig_scr_req sig_scr_rsp

	.io_completed sig_child_meta_wr sig_parent_meta_wr
	

.begin

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// check if we issued a request

	br_bclr[exe_stat_flag, REQUEST_BIT, ph1_no_request_issued#], defer[3]

	immed[exe_stat_flag, 0x0]

	// restore the sig mask
	// Phase1 sigmask contains parent meta rd, next thread signal

	alu[sig_mask_ph2, --, B, sig_mask_ph2_default]

	// initialize signal masks

	alu[sig_mask_ph1, --, B, sig_mask_ph1_default]


	// check if the request is valid

	alu[--, --, B, $request[0]]

	beq[ph1_invalid_request#]


	// issue meta read for this request

	dl_meta_load_cache($request[1], FREE_LIST_ID, $parent_meta, sig_meta_rd, 1, 6)


	// set a bit indicating we are fetching meta data

	alu_shf[exe_stat_flag, exe_stat_flag, OR, 0x1, <<PARENT_META_READ_BIT]


ph1_continue_checks#:


	// while meta read is going on, process a current request. Consider
	// LM latency

	_pkt_copier_ctx_ptr_set()


	// check if we have a prefetch buffer

	alu[--, --, B, $prefetch_handle]

	beq[ph1_no_prefetch_handle#]


	// check if we have entry to process

	alu[--, --, B, _QD_CNT]

	beq[ph1_nothing_to_process#]


ph1_process_cur_request#:

	// store the prefetch handle

	alu[cur_child_handle, --, B, $prefetch_handle]

	// we have something to process. Get next copy number

	_pkt_copier_get_next_copy_num()
	
	// create child meta data

	_pkt_copier_create_child_meta()

	// now set the SOP/EOP and Cell count bits

	ld_field[cur_child_handle, 1000, _PARENT_SOP_HANDLE, <<0]

	// check if we have to write parent ref. cnt

	br_bclr[_FLAGS, REF_CNT_BIT, ph1_no_write_ref_cnt#]

	// clear this flag

	alu_shf[_FLAGS, _FLAGS, AND~, 0x1, <<REF_CNT_BIT]

	// if so, set the reference count
	
	.begin

		.reg	count

		ld_field_w_clr[count, 0001, _CNT, >>16]

		_pkt_copier_set_ref_cnt(count, _PARENT_SOP_HANDLE, FREE_LIST_ID)

	.end

	// set signal mask

	alu_shf[sig_mask_ph2, sig_mask_ph2, OR, 0x1, <<&sig_parent_meta_wr]

ph1_no_write_ref_cnt#:

	// create response

	_pkt_copier_create_response()

	// now child meta data is complete 

	_pkt_copier_write_child_meta()

	// send response

check_ring_full#:

	_pkt_copier_check_n_write_response(response_ring_full#)

	// prefetch another buffer

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	// finally check if we are done with copies, if so move the Queue Head

	_pkt_copier_is_request_complete(copies_still_pending#)

	// fetch next request from local queue by moving queue head

	_pkt_copier_local_dequeue_next_request()

copies_still_pending#:


#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_num_rsp, @pkt_copier_num_rsp, +, 1]

#endif

	ctx_arb[--], defer[1]

	local_csr_wr[active_ctx_wakeup_events, sig_mask_ph1]

.end

	// 		-------------------------------
	//		S T A R T   O F   P H A S E   2
	//		-------------------------------

phase_2#:
	
.begin
	
	.io_completed	sig_meta_rd sig_next_thread

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// check if we issued a meta read

	br_bclr[exe_stat_flag, PARENT_META_READ_BIT, ph2_meta_read_not_issued#]

	// clear this bit

	alu_shf[exe_stat_flag, exe_stat_flag, AND~, 0x1, <<PARENT_META_READ_BIT]


#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_num_req, @pkt_copier_num_req, +, 1]

#endif

	// this means, we have an request also. Enqueue to local queue

	_pkt_copier_local_enqueue()


ph2_meta_read_not_issued#:

	// check if we can fetch another request

	_pkt_copier_check_and_issue_get_request(ph2_request_not_issued#)

	br[phase_2_complete#], defer[1]

	alu_shf[exe_stat_flag, exe_stat_flag, OR, 0x1, <<REQUEST_BIT]


ph2_request_not_issued#:

	// clear this signal from sig mask

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_scr_req]

phase_2_complete#:
	
	ctx_arb[--], defer[1], br[phase_1#]

	local_csr_wr[active_ctx_wakeup_events, sig_mask_ph2]



.end


	// __________________________________________________
	
	// Error cases
	// __________________________________________________

response_ring_full#:

	nop
	br[check_ring_full#]


ph1_no_request_issued#:

ph1_invalid_request#:

	// in both cases, there is no parent meta read

	// clear the signal

	br[ph1_continue_checks#], defer[1]

	alu_shf[sig_mask_ph1, sig_mask_ph1, AND~, 0x1, <<&sig_meta_rd]
	

ph1_no_prefetch_handle#:

#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_buf_alloc_fail, @pkt_copier_buf_alloc_fail, +, 1]

#endif

	// at this point, meta read is pending

	// prefetch another buffer

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	// since there was no prefetch handle, we skip lot of stuff. Adjust the signal mask
	// accordingly.

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_scr_rsp]

	ctx_arb[--], defer[2], br[phase_2#]

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_child_meta_wr]

	local_csr_wr[active_ctx_wakeup_events, sig_mask_ph1]



ph1_nothing_to_process#:

	// at this point meta read is pending, we also have a buffer

	// since there was nothing to process, we skip lot of stuff. Adjust the signal mask
	// accordingly.

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_buf_alloc]

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_scr_rsp]

	ctx_arb[--], defer[2], br[phase_2#]

	alu_shf[sig_mask_ph2, sig_mask_ph2, AND~, 0x1, <<&sig_child_meta_wr]

	local_csr_wr[active_ctx_wakeup_events, sig_mask_ph1]


#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier_1st_me()
//
// 	Description:
// 	
//		Implement Packet Copier functionality of 1st ME in 2 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////
#macro	_pkt_copier_1st_me()

phase1#:

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// check if valid request

	alu[--, --, B, $request[0]]

	beq[ph1_no_request#]

#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_num_req, @pkt_copier_num_req, + , 1]

#endif

	// if so issue a read for meta data

	dl_meta_load_cache($request[1], FREE_LIST_ID, $parent_meta, sig_meta_rd, 1, 6)

	ctx_arb[sig_meta_rd, sig_next_thread]

phase2#:

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]


	// set the reference count

	.begin

		.reg	count
	
		ld_field_w_clr[count, 0001, $request[3], >>16]

		_pkt_copier_set_ref_cnt(count, $request[1], FREE_LIST_ID)

	.end


	// meta read is finished. send request to next ME

	_pkt_copier_send_req_to_next_me($request, $parent_meta)

	// issue read for next request

	_pkt_copier_get_request()

	ctx_arb[sig_parent_meta_wr, sig_scr_req, sig_next_thread], br[phase1#]


	// error cases
	/////////////////////

ph1_no_request#:

	ctx_arb[sig_next_thread]

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// issue read for next request

	_pkt_copier_get_request()

	ctx_arb[sig_scr_req, sig_next_thread], br[phase1#]

#endm

/////////////////////////////////////////////////////////////////////////////////////
// 
// _pkt_copier_2nd_me()
//
// 	Description:
// 	
//		Implement packet copier functionality in 2nd ME in 2 ME configuration.
//
//	Output
//
//		None.
//
//	Input
//
//		None.
// 
/////////////////////////////////////////////////////////////////////////////////////
#macro	_pkt_copier_2nd_me()

phase1#:

	// signal next thread

	local_csr_wr[SAME_ME_SIGNAL, pkt_copier_next_thread_gpr]

	// check if we have a prefetch buffer

	alu[--, --, B, $prefetch_handle]

	beq[ph1_no_prefetch_handle#]

	// check if we have entry to process

	alu[--, --, B, _PORT_MASK]

	beq[ph1_nothing_to_process#]


phase1_start_process#:


	// store the prefetch handle

	alu[cur_child_handle, --, B, $prefetch_handle]

	// we have something to process. Get next copy number

	_pkt_copier_get_next_copy_num()
	
	// create child meta data

	_pkt_copier_create_child_meta()

	// now child meta data is complete 

	_pkt_copier_write_child_meta()

	// now set the SOP/EOP and Cell count bits

	ld_field[cur_child_handle, 1000, _SOP_EOP_SEG_CNT, <<0]

	// create response

	_pkt_copier_create_response()

	// send response

check_ring_full#:

	_pkt_copier_check_n_write_response(response_ring_full#)

#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_num_rsp, @pkt_copier_num_rsp, + , 1]

#endif

	// prefetch another buffer

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	// finally check if we are done with copies

	_pkt_copier_is_request_complete(copies_still_pending#)

	// fetch next request

	_pkt_copier_rcv_req_from_prev_me()

copies_still_pending#:

	ctx_arb[sig_buf_alloc, sig_next_thread, sig_scr_rsp, sig_child_meta_wr], br[phase1#]

	// error cases
	///////////////////

response_ring_full#:

	nop
	br[check_ring_full#]


ph1_no_prefetch_handle#:

#ifdef	_DEBUG_COUNTERS_

	alu[@pkt_copier_buf_alloc_fail, @pkt_copier_buf_alloc_fail, +, 1]

#endif

	// prefetch another buffer

	dl_buf_alloc($prefetch_handle, CHILD_FREELIST_ID, sig_buf_alloc, SIG_NONE, FREELIST_ID_TYPE)

	// check if there is a request ready

	_pkt_copier_is_request_complete(err_copies_still_pending#)

	// fetch next request from local queue by moving queue head

	_pkt_copier_rcv_req_from_prev_me()

err_copies_still_pending#:

	ctx_arb[sig_buf_alloc, sig_next_thread], br[phase1#]

	// actually, we will never come here

ph1_nothing_to_process#:

	// fetch next request

	_pkt_copier_rcv_req_from_prev_me()

	br[phase1_start_process#]

#endm


///////////////////////////////////////////////////////////////
//	CODE EXECUTION STARTS HERE
///////////////////////////////////////////////////////////////

#ifndef	PKT_COPIER_2ME

init#:
	; perform initialization
	_pkt_copier_init()

main#:

	; enter an infinite loop
	.while(1)

	_pkt_copier()

	.endw

#else
	// 2 ME pipeline

#if		defined(PKT_COPIER_1ST_ME)

init#:

	_pkt_copier_1st_me_init()

main#:

	.while (1)

	_pkt_copier_1st_me()

	.endw


#elif	defined(PKT_COPIER_2ND_ME)

init#:

	_pkt_copier_2nd_me_init()

main#:

	.while (1)

	_pkt_copier_2nd_me()

	.endw


#else
#error	"Define either PKT_COPIER_1ST_ME or PKT_COPIER_2ND_ME"
#endif
#endif


#endif	// _PKT_COPIER_UC_ 
