//------------------------------------------------------------------------------------
//                                                                      
//                   I N T E L   P R O P R I E T A R Y                   
//                                                                       
//      COPYRIGHT (c)  1998-99 BY  INTEL  CORPORATION.  ALL RIGHTS          
//      RESERVED.   NO  PART  OF THIS PROGRAM  OR  PUBLICATION  MAY      
//      BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A    
//      RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER    
//      LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,    
//      MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT    
//      THE PRIOR WRITTEN PERMISSION OF :                                
//                                                                       
//                         INTEL  CORPORATION                            
//                                                                      
//                      2200 MISSION COLLEGE BLVD                        
//                                                                       
//                SANTA  CLARA,  CALIFORNIA  95052-8119                  
//                                                                       
//------------------------------------------------------------------------------------
// rec_lmatch.uc
// IPV4 longest specific address match
//
//
// system: SA1200
// subsystem: IP router microcode
// usage: example
// author: dfh 9/28/97
//
// revisions:
//  dfh     Feb 28, 2000    use ip.uc macros, ip_da_extract, ip_trie5_lookup


// ---------------------------SA1200 microcode--------------------------	
//
// Prerequisite Register Usage:
//	symbol
//	 hi64k_base		  - base address of 3 adjacent tables:
//   					1. 64k entry lookup table, each entry 32 bits, total 256KB
//					 	2. 256 entry lookup table, each entry 32 bits, total 1KB 
//						3. many 16 entry trie tables, each entry  32 bits
//   route_table_base -	sdram route entries 128K x 8 longwords (1MB)
//						32 byte stride = offset must be shifted left 5
//	 ip_da			  - 32 bit IP destination address
//
//
// description:
//	Get the route entry whose ip address has the most specific match with ip_da.
//	Also known as longest-prefix match. 
//  lookup result:
//   $$route_entry0-3	- 4 longwords of route entry info,
//					      including the forwarding interface and MAC DA
// Algorithm:
//		please refer to IXP1200 Software Reference Manual (SRM)


; ADDED BY TUNGFAI
; Reservation Table Lookup
; Lock the entry iff matches
; Input:	ip_sa (source address)
;			ip_da (destination addres)
; Output:	(use rate to determine if lookup success, assume non-zero for success)
;			reserve_type (0x0 = Premium  0x1 = Assured)
;			rate
;			burst
; 			last_cycle0, last_cycle1
;			residue_tokens
;			table_addr
; NOTE: It won't lock the whole entry, instead, it locks the first longword.
;       It shouldn't be a problem it everything follow the same rule.	
#macro ReservationTable_Lookup_Lock[ip_sa, ip_da, type, rate, burst, last_cycle0, last_cycle1, residue_tokens, table_addr]
.local current_count tmp2 
	; Backup the original values
	alu[tmp2, --, B, $xfer2]

	immed[table_addr, RESERVATION_TABLE_BASE]
	immed[current_count, TABLE_COUNT_LOCATION]
	sram[read, $xfer2, current_count, 0, 1], ctx_swap, optimize_mem
	alu[current_count, --, B, $xfer2]

next_entry#:

	.if current_count == 0x0
		br[lookup_finish#]
	.endif


	sram[read, $xfer2, table_addr, 0, 1], ctx_swap, optimize_mem

	.if $xfer2 == ip_sa
		sram[read, $xfer2, table_addr, 1, 1], ctx_swap, optimize_mem

		.if $xfer2 == ip_da
			; Lock the first longword instead of the whole entry
			sram[read_lock, $xfer2, table_addr, 0, 1], ctx_swap, optimize_mem

			; Get the rate
			sram[read, $xfer2, table_addr, 2, 1], ctx_swap, optimize_mem
			alu[rate, --, B, $xfer2]

			; Get the burst/reserved type
			sram[read, $xfer2, table_addr, 3, 1], ctx_swap, optimize_mem

			immed[ip_sa, 0x1F]
			alu[ip_sa, ip_sa, B-A, 1, <<5]
			alu[--, ip_sa, B, 0]
			alu_shf[burst, $xfer2, AND~, 0x1, <<indirect]

			alu[--, ip_sa, B, 0]
			alu_shf[type, $xfer2, AND, 0x1, <<indirect] ;0x00000000 or 0x8000000

			; Get cycle0 and cycle1
			sram[read, $xfer2, table_addr, 4, 1], ctx_swap, optimize_mem
			alu[last_cycle0, --, B, $xfer2]
			sram[read, $xfer2, table_addr, 5, 1], ctx_swap, optimize_mem
			alu[last_cycle1, --, B, $xfer2]

			; Get residue tokens
			sram[read, $xfer2, table_addr, 6, 1], ctx_swap, optimize_mem
			alu[residue_tokens, --, B, $xfer2]

			br[lookup_finish#]
		.endif
	.endif

	; Decrement the count
	alu[current_count, current_count, -, 1]

	.if current_count > 0x0
		; Increment the address				
		alu[table_addr, table_addr, +, TABLE_RECORD_LEN]
		br[next_entry#]
	.endif
	


lookup_finish#:
	; Restore the original values
	alu[$xfer2, --, B, tmp2]

	immed[current_count, TABLE_COUNT_LOCATION]
	//sram[unlock, --, current_count, 0, 1], priority
.endlocal

#endm

; ADDED BY TUNGFAI
; Reservation Table Update specifed by the address
; Input:	table_addr
;			new_cycle0
;		    new_cycle1
;			residue_tokens
#macro ReservationTable_Update[table_addr, last_cycle0, last_cycle1, residue_tokens]
	; Should be no need to backup original values
	alu[$xfer2, --, B, last_cycle0]
	sram[write, $xfer2, table_addr, 4, 1], ctx_swap

	alu[$xfer2, --, B, last_cycle1]
	sram[write, $xfer2, table_addr, 5, 1], ctx_swap

	alu[$xfer2, --, B, residue_tokens]
	sram[write, $xfer2, table_addr, 6, 1], ctx_swap

#endm

; ADDED BY TUNGFAI
; Unlock the entry specified by the address
; Input:	table_addr
#macro ReservationTable_Unlock[table_addr]
	sram[unlock, --, table_addr, 0, 1], priority
#endm




; ADDED BY TUNGFAI
; Calculate the new tokens according to time
; Input: 	last_cycle0
;		 	last_cycle1
;			new_cycle0 (assume in $xfer2, no need to pass)
;			new_cycle1 (assume in $xfer3, no need to pass)
;			rate
; Return:	new_tokens

// op1: rate (16 bits, higer 16 bits are zero)
// op2: time cycle difference (32 bits)
#macro mul[accum, op1, op2]
.local mask
	alu_shf[op1, --, b, op1, <<15]

	immed[accum, 0]
	alu_shf[--, --, b, op2, <<31]
	alu_shf[--, --, b, op2, <<30]

	alu[accum, op1, +ifsign, accum]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<29]
	alu_shf[--, --, b, op2, <<28]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<27]
	alu_shf[--, --, b, op2, <<26]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<25]
	alu_shf[--, --, b, op2, <<24]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<23]
	alu_shf[--, --, b, op2, <<22]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<21]
	alu_shf[--, --, b, op2, <<20]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<19]
	alu_shf[--, --, b, op2, <<18]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<17]
	alu_shf[--, --, b, op2, <<16]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

//  end of the lower part

//  start of the higher half

//  discard the lower half
	immed_w1[mask, 0xffff]
	immed_w0[mask, 0x0000]
	alu_shf[accum, accum, AND, mask]
	alu_shf[accum, --, b, accum, >>1]

// 	alu_shf[op1, --, b, op1, <<15]

	alu_shf[--, --, b, op2, <<15]
	alu_shf[--, --, b, op2, <<14]

	alu[accum, op1, +ifsign, accum]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<13]
	alu_shf[--, --, b, op2, <<12]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<11]
	alu_shf[--, --, b, op2, <<10]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<9]
	alu_shf[--, --, b, op2, <<8]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<7]
	alu_shf[--, --, b, op2, <<6]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<5]
	alu_shf[--, --, b, op2, <<4]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<3]
	alu_shf[--, --, b, op2, <<2]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

	alu_shf[--, --, b, op2, <<1]
	alu_shf[--, --, b, op2]

	alu[accum, op1, +ifsign, accum, >>1]
	alu[accum, op1, +ifsign, accum, >>1]

.endlocal
#endm

#macro calc_tokens[rate, last_cycle0, last_cycle1, burst, residue_token, new_tokens]
.xfer_order $xfer2 $xfer3
.local tempa tempb

	csr[read, $xfer2, cycle_cnt], ctx_swap
	
	alu[tempa, --, B,$xfer2]
	alu[tempb, --, B,$xfer3]


	alu[tempb, tempb, -, last_cycle1]

// assign maximum(burst) directly, 
	.if tempb > 2
		alu[new_tokens, --, B, burst]
		br[done_calc#]
	.endif

// calculate the token, from the lower 32bits, since higher 32bits are the same
	.if tempb = 0
		alu[tempa, tempa, -, last_cycle0]
		mul[new_tokens, rate, tempa]
		alu[new_tokens, new_tokens, +, residue_token]
		alu[tempb, --, B, burst]
		.if new_tokens > tempb
			alu[new_tokens, --, B, burst]
		.endif
		br[done_calc#]
	.endif

// deal with tempb - last_cycle1 = 1
// right shift 1 bit put an 1 at most significant bit of tempa
// discard the difference of the least significant bit

	dbl_shf[tempa, 0x1, tempa, >>1]
	alu_shf[last_cycle0, --, B, last_cycle0, >>1]

	alu[tempa, tempa, -, last_cycle0]


	// tempa < 0 ---> bit31 is '1' --> difference is more than 32 bits long
	.if tempa < 0 
		// assign maximum token
		alu[new_tokens, --, B, burst]
	.else
		// calculate the tokens
		alu_shf[tempa, --, B, tempa, <<1]
		mul[new_tokens, rate, tempa]
		alu[new_tokens, new_tokens, +, residue_token]
		alu[tempb, --, B, burst]
		.if new_tokens > tempb
			alu[new_tokens, --, B, burst]
		.endif

	.endif

done_calc#:

.endlocal
#endm



#include "ip.uc"

.xfer_order $$xfer0 $$xfer1 $$xfer2 $$xfer3 $$xfer4 $$xfer5 $$xfer6 $$xfer7
.operand_synonym $$route_entry0 $$xfer0
.operand_synonym $$route_entry1 $$xfer1
.operand_synonym $$route_entry2 $$xfer2
.operand_synonym $$route_entry3 $$xfer3

.import_var hi64k_base route_table_base


route_lookup#:
	ip_da_extract[ip_da, 14]									; get da. ip header starts at read xfer byte 14
	ip_trie5_lookup[route_ent_offset, ip_da, hi64k_base, 0]		; perform longest prefix match
	br!=0[qos#], guess_branch
    immed[exception, IP_NO_ROUTE]
    br[packet_discard#]

; Added by tungfai
qos#:

#ifdef DIFFSERV

	.local ip_tos type rate burst last_cycle0 last_cycle1 residue_tokens table_addr shift_parameter

	; EDITED BY TUNGFAI

	; TODO: Make sure the order ot $xfer2 and $xfer3 - tungfai
	; csr[read, $xfer2, cycle_cnt], ctx_swap

	; Default queue_id to be 1 (Non Premium Traffic)
	immed[queue_id, 0x1]

	; Expected Table Format
	; 	SRC	| DEST | P/A | RATE | BRUST | LAST CYCLE | AVAILABLE TOKEN 

	.local ip_sa
	; Extract Src/Dest Addresses and TOS Fields
	ip_sa_extract[ip_sa, BYTEOFFSET14]
	ip_tos_extract[ip_tos, BYTEOFFSET14]

	; Clear Correspoinding TOS field bits
	; Bit 6, and 7 of TOS field are not used
	;   according to RFC 795
	; Assume Bit 6 - Premium Service
	;        Bit 7 - Assured Service
	immed[shift_parameter, 0x6]
	alu[shift_parameter, shift_parameter, B-A, 1, <<5]
	alu[--, shift_parameter, B, 0]
	alu_shf[ip_tos, ip_tos, AND~, 0x3, <<indirect]

	; DEBUG - Comment out for debug and default best-effort traffic
	immed[rate, 0x0]

	; Lookup the Table/Memory
	ReservationTable_Lookup_Lock[ip_sa, ip_da, type, rate, burst, last_cycle0, last_cycle1, residue_tokens, table_addr]

	.endlocal

	; Decided if this flow is reserved
	.if rate > 0x0
		.local new_tokens ip_totallen

		; Find Additional Tokens
		calc_tokens[rate, last_cycle0, last_cycle1, burst, residue_tokens, new_tokens]

		alu[last_cycle0, --, B, $xfer2]
		alu[last_cycle1, --, B, $xfer3]

		//immed[new_tokens, 0x0FFF]

		; Find out Packet Size
		ip_totallen_extract[ip_totallen, BYTEOFFSET14]

		; Calculate udpated values
		; NOTE: Calculate here shouldn't cause problem if the premium packet will be enqueued
		;       since the context will branch to [next_packet#] after enqueue 
		;	    This saves several registers.
		; $xfer2 = 0  tokens > packet length
		; $xfer2 = 1  tokens < packet length
		.if new_tokens > ip_totallen
			immed[rate, 0x0]
			alu[residue_tokens, new_tokens, -, ip_totallen]
		.else
			immed[rate, 0x1]
			immed[residue_tokens, 0x0]
		.endif

		.endlocal

		; Decided what type of service, Premium/Assured
		;   Currently, only Premium
		.if type == 0x0
			; Premium Service

			; Set Bit 6 of TOS to 1
			alu[--, shift_parameter, B, 0]
			alu_shf[ip_tos, ip_tos, OR, 0x01, <<indirect]

			.if rate == 0x0

				immed[queue_id, 0x0]

			.else
				; Discard the packet

				; Unlock the table
				ReservationTable_Unlock[table_addr]
				br[packet_discard#]
				nop

			.endif



		.else
			; Assured Service
			; Let it passes anyway
			; Set the Bit iff has enough tokens
			.if $xfer2 == 0x0
				; Set Bit 7 of TOS to 1
				alu[--, shift_parameter, B, 0]
				alu_shf[ip_tos, ip_tos, OR, 0x10, <<indirect]
			.endif

		.endif

		; Update information in the table
		ReservationTable_Update[table_addr, last_cycle0, last_cycle1, residue_tokens]
		ReservationTable_Unlock[table_addr]


	; FOR DEBUG 11/18/00 - tungfai
	.else

		//br[packet_discard#]
		nop
		nop
	.endif


	; Write the TOS field back to IP header
	; JIA-CHENG
	//immed_w1[queue_id, 0xffff]
	//immed_w0[queue_id, 0xff00]
	//sdram[read, $$xfer0, packet_buf_addr, 3, 1], ctx_swap
	//alu[$$xfer0, $$xfer0, AND, queue_id]
	//alu[$$xfer0, $$xfer0, AND, ip_tos]
	//sdram[write, $$xfer0, packet_buf_addr, 3, 1]


	; DEBUG - assume everything go to queue 0 first - tungfai
	//immed[queue_id, 0x0]

	; only determine up to queue id
	; everything left for rec_enqueue

	.endlocal
#else

	immed[queue_id, 0x0]

#endif


ip_get_route#:
.local temp_route_base
    immed_w0[temp_route_base, route_table_base]				; load shared address value
    immed_w1[temp_route_base, route_table_base>>16]
	

// the transfer from rfifo was done at the top of ipverify, in order to free the rfifo element earlier
// we should have the signal back well before now
//
	ctx_arb[sdram]									    ; is transfer from rfifo done?

//	Write thread done as soon as the rfifo element is transferred to sdram
//  By the time it turns around and get sot the Receive Scheduler, this thread will be done
//
#ifndef RECEIVE16						; if no receive scheduler, there are 16 receive threads
			fast_wr[3, THREAD_DONE]		; notify receive scheduler with EOP encode
#endif


//---------------------------------------------------------------------------------------
#ifdef LAYER4
// do some busywork to emulate layer 4+
	delay[8]
	sram[read, $xfer2, temp_base3, 2, 1], ctx_swap
	delay[8]
	move[$xfer0, 0]
	move[$xfer1, 1]
	move[$xfer2, 0]
	move[$xfer3, 1]
	hash2_48[$xfer0], ctx_swap						; 2 hash lookups
	delay[8]
	sram[read, $xfer2, temp_base3, 2, 1], ctx_swap
	delay[8]
	sram[read, $xfer2, temp_base3, 2, 1], ctx_swap
#endif
//---------------------------------------------------------------------------------------

// get route entry
	sdram[read, $$route_entry0, temp_route_base, route_ent_offset, 2], optimize_mem, ctx_swap
	alu[output_port, --, b, $$route_entry0]				; save output port for enqueue

.endlocal		// temp_route_base


got_output_port#:
write_layers23#:	
// modify layer 2. insert MAC DA from route entry, write first 32 bytes to sdram  
	move[$$xfer0, $$route_entry1]							; next hop da bytes 0-3
#ifdef LITTLE_ENDIAN
	alu[$$xfer1, $$route_entry2, +, sa01, <<16]				; next hop da bytes 4-5: 0 merge with sa bytes 0-1
#else
	alu[$$xfer1, sa01, +, $$route_entry2, <<16]				; next hop da bytes 4-5: 0 merge with sa bytes 0-1
#endif
	move[$$xfer2, $xfer2]									; sa bytes 2-5
	sdram[write, $$xfer0, packet_buf_addr, 0, 4]			; write first 32 bytes of modified mpacket

.endlocal		// sa01

; rec_enqueue.uc follows

