
#include "chare.h"
#include "machine.h"
#include "extern.h"

static int MASK[6][20] = {
{  001, 002, 004, 010, 020, 040, 000, 000, 000, 000, 
   000, 000, 000, 000, 000, 000, 000, 000, 000, 000},
{  003, 005, 011, 021, 041, 006, 012, 022, 042, 014, 
   024, 044, 030, 050, 060, 000, 000, 000, 000, 000},
{  007, 013, 023, 043, 015, 025, 045, 031, 051, 061, 
   016, 026, 046, 032, 052, 062, 034, 054, 064, 070},
{  017, 027, 047, 033, 053, 063, 035, 055, 065, 071, 
   036, 056, 066, 072, 074, 000, 000, 000, 000, 000},
{  037, 057, 067, 073, 075, 076, 000, 000, 000, 000, 
   000, 000, 000, 000, 000, 000, 000, 000, 000, 000},
{  077, 000, 000, 000, 000, 000, 000, 000, 000, 000, 
   000, 000, 000, 000, 000, 000, 000, 000, 000, 000}};
/*	{0x01, 0x02, 0x04, 0x08,0x00,0x00},
	{0x03, 0x05, 0x06, 0x09,0x0A,0x0C},
	{0x07, 0x0B, 0x0D, 0x0E,0x00,0x00},
	{0x0F, 0x00, 0x00, 0x00,0x00,0x00}};
*/
long McGetSysClk();

McKillSys()
{
    SYSMSG	*mPtr, *CreateSysMsg();
    int		dummy,k,peNum = McMyPeNum();
    float	f;
    extern int packedGoal;

    /* broadcast kill msg to all node */
    mPtr = CreateSysMsg(SYSMSGTYPE_KILL,0,NULLPTR,
		peNum,NULLPTR,ALLNODES,-1,
		-1,FALSE,sizeof(SYSMSG),0,0,NULLPTR,NULLPTR,0);
    McSendMsg(mPtr,peNum);
    /**OsMemUsage(peNum);**/
    if (PE(peNum).chareNumConsumed > 0) 
	f = (float) ((float)op.hopsNum / (float)PE(peNum).chareNumConsumed);
    else f = 0.0;
    printf("\n#%d clk%ld ch#(o%d,i%d,q%d) da#(o%d,i%d,q%d) l%d s%d r%d %.3f h%.3f %d",
	peNum,McGetSysClk(),
	PE(peNum).chareNumGenerated,PE(peNum).chareNumConsumed,
	PE(peNum).inChareQ.maxLen,
	PE(peNum).dataNumGenerated,PE(peNum).dataNumConsumed,
	PE(peNum).inDataQ.maxLen,
	PE(peNum).loadNum,PE(peNum).routNum,PE(peNum).contNum,
	PE(peNum).suspendTime,f,PE(peNum).chareNumRemote);
    if (PE(peNum).isHost != TRUE) {
        mPtr = CreateSysMsg(SYSMSGTYPE_KILL,0,NULLPTR,
		peNum,NULLPTR,CubeGetHostNum(),-1,
		-1,FALSE,sizeof(SYSMSG),0,0,NULLPTR,NULLPTR,0);
        McSendMsg(mPtr,peNum);
        CubeKillNode(peNum,ALLPIDS);
        exit(5);
	}
    else {
	for (k = 0; k < 99999; k++) dummy = k*k*k*k*k/2/2/2/2;
	    CubeKillNode(ALLNODES,USRPID);
        exit(5);
	}
}

McSysFinal(peNum)
int peNum;
{
    int i,dummy;
    extern int packedGoal;
    float	f;

    /**OsMemUsage(peNum);**/
    /* after getting kill msg, kill ourselve */
    if (PE(peNum).chareNumConsumed > 0) 
	f = (float) ((float)op.hopsNum / (float)PE(peNum).chareNumConsumed);
    else f = 0.0;
    printf("\n#%d clk%ld ch#(o%d,i%d,q%d) da#(o%d,i%d,q%d) l%d s%d r%d %.3f h%.3f %d",
	peNum,McGetSysClk(),
	PE(peNum).chareNumGenerated,PE(peNum).chareNumConsumed,
	PE(peNum).inChareQ.maxLen,
	PE(peNum).dataNumGenerated,PE(peNum).dataNumConsumed,
	PE(peNum).inDataQ.maxLen,
	PE(peNum).loadNum,PE(peNum).routNum,PE(peNum).contNum,
	PE(peNum).suspendTime,f,PE(peNum).chareNumRemote);
    if (PE(peNum).isHost == TRUE) {
	for (i = 0; i < 99999; i++) dummy = i*i*i*i*i/2/2/2/2;
	CubeKillNode(ALLNODES,ALLPIDS);
	}
    else
    	CubeKillNode(peNum,ALLPIDS);
    exit(5);
}

McHostInit(argc,argv)
int argc;
char *argv[];
{
    char *readVar,*str=NULLPTR;
    int  k,varSize,out,more,peNum = OsMyPeNum();
    SYSMSG *mPtr, *ExtendSysMsg(),*CreateSysMsg();

    if (argc < 2) {
        printf("cubeman: Incorrect command line, missing usr name\n");
        exit();
        }
    printf("loading usr program %s onto all nodes...\n",argv[1]);

    MEM_SPACE(0) = (int *)malloc(sizeof(int)*maxMem);
    MEM_PTR(0)  = 0;
    if (MEM_SPACE(0) == NULLPTR) {
	perror("Not enough mem");
	exit(0);
	}

    CubeLoadNode(argv[1],ALLNODES,USRPID); 
    CubeSynSend(SYSMSGTYPE_INIT,&ip,sizeof(INPARAM),ALLNODES,USRPID);
    ip.ringLen = 0; /* always use sync send in host */
    PE(peNum).isHost = TRUE;
    PE(peNum).nextPE = NODE0;
    PE(peNum).neighborNum = 0;

#define MAXHOSTMSG	250000
    if (argc >2 ) str = argv[2];
    UsrSetReadVar(&readVar,&varSize,str);
    if (varSize == 0 || readVar == NULLPTR) {
  	mPtr = CreateSysMsg(SYSMSGTYPE_RVAR,0,NULLPTR,0,0,0,0,
			0,FALSE,sizeof(SYSMSG),0,0,NULLPTR,NULLPTR,0);
        CubeSynSend(SYSMSGTYPE_RVAR,mPtr,sizeof(SYSMSG),ALLNODES,USRPID);
	}
    else {
        mPtr = ExtendSysMsg(SYSMSGTYPE_RVAR,0,readVar,0,0,0,0,0,FALSE,
			varSize+sizeof(SYSMSG),varSize,0,NULLPTR,NULLPTR,0);
	for (out=0,more=varSize+sizeof(SYSMSG); more > 0; out+=k,more-=k) {
	    k = (more < MAXHOSTMSG) ? more : MAXHOSTMSG;
/* printf("send buff %d, %d left\n",k,more-k); */
            CubeSynSend(SYSMSGTYPE_RVAR,mPtr+out,k,ALLNODES,USRPID);
	    }
 	}
    PE(peNum).myChareBlk	= (CHAREBLK*) DUMMY;
    PE(peNum).inChareQ.head	= PE(peNum).inDataQ.head	= NULLPTR;
    PE(peNum).inChareQ.tail	= PE(peNum).inDataQ.tail	= NULLPTR;
    PE(peNum).inChareQ.qLen	= PE(peNum).inDataQ.qLen	= 0;
    PE(peNum).inChareQ.maxLen	= PE(peNum).inDataQ.maxLen	= 0;
    PE(peNum).allChareQ.head	= PE(peNum).waitQ.head	= NULLPTR;
    PE(peNum).allChareQ.tail	= PE(peNum).waitQ.tail	= NULLPTR;
    PE(peNum).allChareQ.qLen	= PE(peNum).waitQ.qLen	= 0;
    PE(peNum).allChareQ.maxLen	= PE(peNum).waitQ.maxLen	= 0;
    PE(peNum).spreadQ.head	= NULLPTR;
    PE(peNum).spreadQ.tail	= NULLPTR;
    PE(peNum).spreadQ.qLen	= 0;
    PE(peNum).spreadQ.maxLen	= 0;

    PE(peNum).chareNumGenerated = PE(peNum).dataNumGenerated = 0;
    PE(peNum).chareNumConsumed  = PE(peNum).dataNumConsumed  = 0;
    PE(peNum).contNum 		= PE(peNum).currentChare     = 0;
    PE(peNum).chareNumRemote    = 0;
    PE(peNum).suspendTime = 0.0;

    McInitClk(peNum);
}


McProcStat(peNum)
int peNum;
{
    int 	t,i,k,destPE,*ary;
    SYSMSG	*mPtr, *CreateSysMsg(), *QmGetCoPeMsg(), *ExtendSysMsg();

    t = McGetSysClk();
    k = t/ip.CSinterval;
    McCurrentLoad(peNum,FALSE);
    if (k > PE(peNum).diffCount2) {
	PE(peNum).diffCount2 = k;
	if (bTbl.bNum > 0) {
	    ary = (int *) OsAllocMsg(sizeof(SYSMSG)+sizeof(int)*bTbl.bNum);
	    for (i = 0; i < bTbl.bNum; i++) 
		ary[i] = *bTbl.bPtr[i];
   	    mPtr = ExtendSysMsg(SYSMSGTYPE_BVAR,0,ary,
			peNum,NULLPTR,PE(peNum).upNode,-1,PE(peNum).upNode,
			FALSE,sizeof(SYSMSG)+sizeof(int)*bTbl.bNum,
			sizeof(int)*bTbl.bNum,0,NULLPTR,NULLPTR,0);
	    /*if (peNum == 0) 
		printf("Broradcast msg %d at %d\n",ary[0],OsGetSysClk());*/
	    McSendMsg(mPtr,peNum);
	    }
	}
#ifdef TERMINATION
    if (peNum == 0) {
        k = t/100;
        if (PE(peNum).reductNum <= 0 && k > PE(peNum).diffCount3) {
	    if (PE(0).downNum > 0) {
	    	PE(peNum).diffCount3 = k;
	    	PE(0).everBusy = FALSE;
	    	PE(0).reductNum = PE(0).downNum;
/**printf("PE 0 send a TERM at time %d\n",OsGetSysClk(0));**/
	    	McSysSend(&PE(0).msgTerm,0);
		}
	    else if (PE(0).everBusy != TRUE
			&& PE(0).inChareQ.head == NULLPTR
			&& PE(0).inDataQ.head == NULLPTR
			&& PE(0).waitQ.head == NULLPTR) {
	   	printf("\nSystem terminated at time %d\n",OsGetSysClk());
		OsKillSys();
		}
	    else {
	    	PE(peNum).diffCount3 = k;
	    	PE(0).everBusy = FALSE;
		}
	    }
     	}
#endif
    k = (ip.CS == CSGRANDDIFF) ? t : t/100;
    if (k > PE(peNum).diffCount) {
	PE(peNum).diffCount = k;
  	if (ip.CS == CSCONTRACTX && abs(PE(peNum).newLoad-PE(peNum).oldLoad) 
		>= ip.CSloadMark2) {
	    for (i = 1; i <= PE(peNum).neighborNum; i++) {
	    if (PE(peNum).statOutAry[destPE=PE(peNum).neighbors[i]] == FALSE){
   		mPtr = CreateSysMsg(SYSMSGTYPE_STAT,0,NULLPTR,
			peNum,NULLPTR,destPE,-1,
			destPE,FALSE,sizeof(SYSMSG),0,0,NULLPTR,NULLPTR,0);
		McSendMsg(mPtr,peNum);
		}
	    PE(peNum).statOutAry[destPE] = FALSE;
	    }
	    PE(peNum).oldLoad = PE(peNum).newLoad;

	    if (ip.CSredistrKey && !PE(peNum).satu) LdbReDistr(peNum);
	    }
      	else if (ip.CS == CSGRANDDIFF) {
	    /*CoGranDiff(peNum);     */
	    LdbDiffProc(peNum);
	    }
    }
}

McSuspendPe(peNum)
int peNum;
{
    if (PE(peNum).isHost == TRUE) {
        PE(peNum).suspendTime += 0.000125;
#ifdef TERMINATION
	if (PE(peNum).everBusy == DUMMY)
	    McSysFinal(peNum);
#endif
#ifdef QUIET
	if (PE(peNum).everBusy == DUMMY)
	    McSysFinal(peNum);
#endif
	}
#ifdef QUIET
    else if (PE(peNum).everUp != FALSE) {
	if (peNum == 0) {
	    if (PE(peNum).downNum <= 0) {
	    if (PE(0).inChareQ.head == NULLPTR
		&& PE(0).inDataQ.head == NULLPTR
		&& PE(0).waitQ.head == NULLPTR) {
	   	printf("\nSystem terminated at time %d (1 PE)\n",OsGetSysClk());
	   	OsKillSys();
	   	}
	    }
	    else {
/**printf("PE 0 send a TYPE%d at time %d\n",PE(0).everUp,OsGetSysClk(0));**/
	    /**SYSMSGTYPE((&PE(0).msgTerm)) = SYSMSGTYPE_GOUP;**/
	    SYSMSGTYPE((&PE(0).msgTerm)) = PE(0).everUp;
	    McSysSend(&PE(0).msgTerm,0);
	    PE(0).everUp = PE(0).everBusy = FALSE;
	    PE(0).reductNum = PE(0).downNum;
	    }
	    }
	else {
	    SYSMSGTYPE((&PE(peNum).msgTerm)) = SYSMSGTYPE_TERM;
    	    McSysSend(&PE(peNum).msgTerm,peNum);
/**printf("PE %d send a TERM%d at time %d\n",peNum,PE(peNum).msgTerm.sysMsgType,OsGetSysClk(0));/**/
    	    PE(pNum).everUp = PE(peNum).everBusy = FALSE;
	    PE(peNum).reductNum = PE(peNum).downNum;
	    }
     	}
#endif
    if (ip.CS != CSCONTRACT1) 
        PE(peNum).suspendTime += 0.000125;
    else
        PE(peNum).suspendTime += 0.00006;
    /* busy waiting */
}

McResumePe(peNum)
int peNum;
{
}

SYSMSG *McGetPendingMsg(peNum) 
int	peNum;
{
    SYSMSG	*mPtr=NULLPTR;
    int		msglen;

    if (CubeAsynProbe(ANYTYPE) == 1 ) {
	msglen = CubeMsgLen();
	mPtr = (SYSMSG *) OsAlloc2(msglen);
	CubeSynRecv(ANYTYPE,mPtr,msglen);
	SYSMSGSIZE(mPtr) = msglen;
	}
    return(mPtr);
}

McSendMsg(mPtr,peNum)
SYSMSG	*mPtr;
int	peNum;
{
    int		i,s,k;
    char	*p1,*p2;
    SYSMSG	*newPtr;
    extern int	maxPackSize;

    if (SYSMSGSRCPE(mPtr) == peNum && SYSMSGNEXTPE(mPtr) != peNum
		&& SYSMSGUSIZE(mPtr) != 0 ) {
	if (!SYSMSGNEEDPACK(mPtr))
		/* || SYSMSGISPACK(mPtr) || SYSMSGUPTR(mPtr) == NULLPTR)*/ {
	    SYSMSGSIZE(mPtr) 	 = SYSMSGUSIZE(mPtr)+sizeof(SYSMSG);
	    SYSMSGUPTR(mPtr)	 = NULLPTR;
/*
	    newPtr = (SYSMSG *) OsAlloc2(SYSMSGUSIZE(mPtr)+sizeof(SYSMSG));
	    p1 = (char*) newPtr; p2 = (char*) mPtr;
	    for (i = 0; i < sizeof(SYSMSG); i++)
		p1[i] = p2[i];
            p1 = (char*) newPtr + sizeof(SYSMSG);
            p2 = (char*) SYSMSGUPTR(mPtr);
            for (i = 0; i < SYSMSGUSIZE(mPtr); i++)
    	        p1[i] = p2[i];
	    SYSMSGSIZE(newPtr) 	 = SYSMSGUSIZE(mPtr)+sizeof(SYSMSG);
	    SYSMSGUPTR(newPtr)	 = NULLPTR;
            OsFree(SYSMSGUPTR(mPtr),SYSMSGUSIZE(mPtr));
	    OsFree(mPtr,SYSMSGSIZE(mPtr));
	    mPtr = newPtr;
*/
	    }
	else {
            if (ip.COproc != 2) 
	        add_clock(SYSMSGSIZE(mPtr)>>1);

	    s = maxPackSize;
	    for (;;) {
		k = s + sizeof(SYSMSG);
	        newPtr = (SYSMSG *) OsAlloc2(k); 
	        p1 = (char*) newPtr; 
	        UsrPackMsg(SYSMSGUCODE(mPtr),SYSMSGUPTR(mPtr),
			p1+sizeof(SYSMSG),&i,s);
		if (i > s) {
		    s = i; /*SYSMSGUSIZE(newPtr);*/
		    OsFree(newPtr,k);
		    }
		else break;
	 	}
	    p2 = (char*) mPtr;
	    s = i;
	    for (i = 0; i < sizeof(SYSMSG); i++)
		p1[i] = p2[i];
	    OsFreeMsg(SYSMSGUPTR(mPtr),SYSMSGUSIZE(mPtr));
	    /*OsFree(mPtr,SYSMSGSIZE(mPtr));*/
	    SYSMSGISPACK(newPtr) = TRUE;
	    SYSMSGSIZE(newPtr) 	 = k;
	    SYSMSGUSIZE(newPtr)	 = s;
	    SYSMSGUPTR(newPtr)	 = NULLPTR;
	    mPtr = newPtr;
	    }
	} 

/*****    SYSMSGUPTR(mPtr)	 = NULLPTR;  *****/
    if (ip.CS == CSCONTRACTX) {
        SYSMSGLOAD(mPtr) = pe.newLoad;
	if (SYSMSGDESTPE(mPtr) == -1)
	    for (i = 1; i <= PE(peNum).neighborNum; i++) 
    	    	pe.statOutAry[PE(peNum).neighbors[i]] = TRUE;
	else
            pe.statOutAry[SYSMSGDESTPE(mPtr)] = TRUE;
	}

    if (ip.ringLen > 0) {
        pe.asynSendIndx = (pe.asynSendIndx + 1) % ip.ringLen;
        if (pe.asynSendID[pe.asynSendIndx] >= 0) {
	    CubeSynMsgWait(pe.asynSendID[pe.asynSendIndx]);
	    /***/OsFree(pe.asynSendBptr[pe.asynSendIndx],
		SYSMSGSIZE(pe.asynSendBptr[pe.asynSendIndx]));/***/
 	    } 
        pe.asynSendID[pe.asynSendIndx] = CubeAsynSend(SYSMSGTYPE(mPtr),mPtr,
	/**SYSMSGSIZE(mPtr)+SYSMSGUSIZE(mPtr),SYSMSGDESTPE(mPtr),USRPID);**/
	sizeof(SYSMSG)+SYSMSGUSIZE(mPtr),SYSMSGDESTPE(mPtr),USRPID);
        pe.asynSendBptr[pe.asynSendIndx] = mPtr;
	/***/if (SYSMSGDESTPE(mPtr) == -1 
		&& SYSMSGTYPE(mPtr) == SYSMSGTYPE_DATA 
		&& PE(peNum).isHost != TRUE) {
	    DuplicateMsg(&newPtr,mPtr);
	    QmPutLocalMsg(newPtr,peNum);
	    }/***/
	}
    else {
        CubeSynSend(SYSMSGTYPE(mPtr),mPtr,
		sizeof(SYSMSG)+SYSMSGUSIZE(mPtr),SYSMSGDESTPE(mPtr),USRPID);
	/***if (SYSMSGDESTPE(mPtr) == -1 
		&& SYSMSGTYPE(mPtr) == SYSMSGTYPE_DATA 
		&& PE(peNum).isHost != TRUE) {
	    QmPutLocalMsg(mPtr,peNum);
	    }
        else OsFree(mPtr,SYSMSGSIZE(mPtr));***/
	}
    /* SYSMSGSIZE() may larger than SYSMSGUSIZE+sizeof(SYSMSG) since pack */
}

McSysSend(mPtr,peNum)
SYSMSG	*mPtr;
int	peNum;
{
    int		i,s,k;

    /* piggypack a load stat on it */
    if (ip.CS == CSCONTRACTX) {
        SYSMSGLOAD(mPtr) = pe.newLoad;
/**	if (SYSMSGDESTPE(mPtr) == -1)
	    for (i = 1; i <= PE(peNum).neighborNum; i++) 
    	    	pe.statOutAry[PE(peNum).neighbors[i]] = TRUE;
	else 
            pe.statOutAry[SYSMSGDESTPE(mPtr)] = TRUE;
**/
	}
    if (ip.ringLen > 0) 
        CubeAsynSend(SYSMSGTYPE(mPtr),mPtr,
		sizeof(SYSMSG)+SYSMSGUSIZE(mPtr),SYSMSGDESTPE(mPtr),USRPID);
    else 
        CubeSynSend(SYSMSGTYPE(mPtr),mPtr,
		sizeof(SYSMSG)+SYSMSGUSIZE(mPtr),SYSMSGDESTPE(mPtr),USRPID);
}
McSeparateMsg(mPtr)
SYSMSG *mPtr;
{
    char    *p1,*p2;
    int	    p;

p = (int) SYSMSGUPTR(mPtr);
    if (SYSMSGUPTR(mPtr) == NULL && SYSMSGUSIZE(mPtr) > 0) {
	    SYSMSGUPTR(mPtr) = (USRMSG*) ((char*)mPtr+sizeof(SYSMSG));
	}
    if (SYSMSGISPACK(mPtr)) {
	if (ip.COproc != 2) /* charge unpacking time */
	    add_clock(SYSMSGSIZE(mPtr)>>2);
	UsrUnpackMsg(SYSMSGUCODE(mPtr),SYSMSGUPTR(mPtr));
	}
}

McUpdateStat(peNum,srcPe,newLoad)
int peNum,srcPe, newLoad;
{
    PE(peNum).loadAry[srcPe] = newLoad;
    if (PE(peNum).satu && newLoad < ip.CSsaturateLowMark){
	PE(peNum).satu = FALSE;
	}
}

McMyPeNum()
{
    return(CubeGetPeNum());
}

long McGetSysClk(peNum)
int peNum;
{
    return(CubeGetClk()-PE(peNum).initClk);
}

McInitClk(peNum)
int peNum;
{
    PE(peNum).initClk = CubeGetClk();
    srand(peNum);
}

McRandomPe(peNum)
int peNum;
{
    int x = rand();

    if (ip.CSmaxDist == 9 && x&CubeNodeNum())
	return(peNum);
    else
        return(x & (CubeNodeNum()-1));
}


McBroadcast(peNum)
int	peNum;
{
}

McCurrentLoad(peNum,wKey)
int	peNum;
BOOLEAN	wKey;
{
    PE(peNum).loadAry[peNum] = PE(peNum).newLoad = 
	ip.LDcMsg*PE(peNum).inChareQ.qLen + 
	ip.LDdMsg*PE(peNum).inDataQ.qLen + 
	ip.LDchare*PE(peNum).currentChare; 
}

McChoosePe(peNum,avoidPE,wKey)
int   	peNum;
int	avoidPE;
BOOLEAN	wKey;
{
    int i,k,j,n,leastLoad,leastPe;

McCurrentLoad(peNum,FALSE);
    if (peNum != avoidPE) {
	leastLoad = PE(peNum).newLoad; leastPe = peNum;
	}
    else {
	leastLoad = MAXINT; leastPe = DUMMY;
	}
    /*n = PE(peNum).neighborNum; j = (rand()%n) + 1;	*/
    for (i = 1; i <= PE(peNum).neighborNum; /*j = j%n+1,*/ i++) {
        if (PE(peNum).loadAry[k=PE(peNum).neighbors[i]] < leastLoad
		&& k != avoidPE) {
	    leastPe = k;
	    leastLoad = PE(peNum).loadAry[k];
	    }
	}
/*
printf("\nPE%d at %d c%d m%d z%d lpe %d load %d new %d [0]%d [1]%d [2]%d [3]%d",peNum,
	OsGetSysClk(),PE(peNum).inChareQ.qLen,PE(peNum).inDataQ.qLen, 
	PE(peNum).currentChare,
	leastPe,PE(peNum).loadAry[leastPe],PE(peNum).newLoad,
	PE(peNum).loadAry[0],
	PE(peNum).loadAry[1],PE(peNum).loadAry[2],PE(peNum).loadAry[3]);
*/
    if (ip.CSsaturateKey && leastLoad > ip.CSsaturateHighMark){
	PE(peNum).routNum++;
    	PE(peNum).satu	= TRUE;
	}
    if (leastLoad < 1)
	PE(peNum).dist = ip.CSminDist;
    else if (leastLoad > ip.CSloadMark3)
	PE(peNum).dist = 0;
    if (leastPe == DUMMY) 
	return(peNum); 
    else {
	PE(peNum).loadAry[leastPe]++;
	return(leastPe);
	}
}

McParamInit()
{
    FILE 	*siminfp;

    strcpy(simName,"sim.in");
    siminfp = fopen(simName,"r");

 /**********************************************************************/
 /* contracting control strategy				       */
 /* 	1: contract at a distance of one			       */
 /* 	2: contract at a specified distance			       */
 /*	3: diffusion gradient contracting			       */
 /**********************************************************************/
    fscanf(siminfp,"%d",&ip.CS); ip.CSredistrKey = ip.CSsaturateKey = 0;
    ip.CSinterval = 200; ip.CSloadMark2 = 2;
    if (ip.CS == CSCONTRACTX || ip.CS == CSCONTRACT1 || ip.CS == CSCONTRACTN) 
 	fscanf(siminfp,"%d %d %d %d %d %d %d %d %d %d %d",
	&ip.CSmaxDist,&ip.CSminDist,
	&ip.CSredistrKey, &ip.CSredistrMark,
	&ip.CSloadMark,&ip.CSloadMark2,&ip.CSloadMark3,
	&ip.CSsaturateKey, &ip.CSsaturateHighMark, 
	&ip.CSsaturateLowMark,&ip.CSinterval);
    else if (ip.CS == CSGRANDDIFF)
 	fscanf(siminfp,"%d %d %d %d",&ip.CShighMark,&ip.CSlowMark,
	&ip.CSinterval,&ip.CSloadMark2);

    fscanf(siminfp,"%d %d %d %d %d",
	&ip.neighborDist,&ip.ringLen,
	&ip.LDchare,&ip.LDcMsg,&ip.LDdMsg);
    if (ip.ringLen > MAXSENDRINGLEN)
	ip.ringLen = MAXSENDRINGLEN;
  /**********************************************************************/
  /* output form specification 						*/ 
  /* 	(1) trace flag: (0 off/ 1 on)					*/
  /**********************************************************************/
    fscanf(siminfp,"%d",&ip.TraceOn);

  /**********************************************************************/
  /* queue control strategy						*/
  /*	(1) 1: chare/data; 0: data/chare; 2: mix			*/
  /*    (2) 1: FIFO	 0: LIFO					*/
  /*	(3) 1: one chareQ; / PE 0: one chareQ / system			*/
  /*	(4) 1: one dataQ; / PE  0: one dataQ / system			*/
  /**********************************************************************/
    fscanf(siminfp,"%d %d %d %d",&ip.QMchareData,&ip.QMfifo,&ip.QMchareQ,
				 &ip.QMdataQ);

    if (ip.TraceOn == 4) return(1); else return(0);
}

McNodeInit(peNum)
int peNum;
{
    int  i,j,k,n,m,l,in,more;
    char *readVar1,*readVar2;
    SYSMSG *p;

    for (i = 0; i < MAXSENDRINGLEN; i++) {
	PE(peNum).asynSendID[i] 	= DUMMY;
    	PE(peNum).asynSendBptr[i]	= NULLPTR;
        }
    PE(peNum).asynSendIndx	= 0;

    PE(peNum).myChareBlk	= (CHAREBLK*) DUMMY;
    PE(peNum).inChareQ.head	= PE(peNum).inDataQ.head	= NULLPTR;
    PE(peNum).inChareQ.tail	= PE(peNum).inDataQ.tail	= NULLPTR;
    PE(peNum).inChareQ.qLen	= PE(peNum).inDataQ.qLen	= 0;
    PE(peNum).inChareQ.maxLen	= PE(peNum).inDataQ.maxLen	= 0;
    PE(peNum).allChareQ.head	= PE(peNum).waitQ.head	= NULLPTR;
    PE(peNum).allChareQ.tail	= PE(peNum).waitQ.tail	= NULLPTR;
    PE(peNum).allChareQ.qLen	= PE(peNum).waitQ.qLen	= 0;
    PE(peNum).allChareQ.maxLen	= PE(peNum).waitQ.maxLen	= 0;
    PE(peNum).spreadQ.head	= NULLPTR;
    PE(peNum).spreadQ.tail	= NULLPTR;
    PE(peNum).spreadQ.qLen	= 0;
    PE(peNum).spreadQ.maxLen	= 0;

    for (i = 0; i < CubeNodeDim(); i++) {
        k = PE(peNum).neighbors[i+1] = peNum ^ MASK[0][i]; 
        PE(peNum).loadAry[k]	    = 0;	
        PE(peNum).statOutAry[k]    = FALSE;	
	}
    PE(peNum).isHost = FALSE;
    PE(peNum).nextPE = NODE0;
    if (peNum == 0) {
	PE(peNum).upNode = ALLNODES;
	}
    else 
	for (i = 0; i < CubeNodeDim(); i++) 
	    if (peNum & (1 << i)) {
	        PE(peNum).upNode = peNum ^ (1 << i);
		break;
		}
#ifdef TERMINATION
    PE(peNum).downNum = 0;
    for (i = 0; i < CubeNodeDim(); i++) {
	if (peNum & (1 << i)) { break; }
	PE(peNum).downNum++;
	}
    PE(peNum).reductNum = (peNum == 0) ? 0 : PE(peNum).downNum;
    if (peNum == 0) { /* root node */
   	ExtendSysMsg(SYSMSGTYPE_TERM,0,&PE(peNum).everBusy,
			peNum,NULLPTR,PE(peNum).upNode,-1,PE(peNum).upNode,
			FALSE,sizeof(SYSMSG),
			0,0,NULLPTR,NULLPTR,0);
	}
    else { /* non-leaf node */
   	ExtendSysMsg(SYSMSGTYPE_REDU,0,&PE(peNum).everBusy,
			peNum,NULLPTR,PE(peNum).upNode,-1,PE(peNum).upNode,
			FALSE,sizeof(SYSMSG)+sizeof(int),
			sizeof(int),0,NULLPTR,NULLPTR,0);
	}
#endif
#ifdef QUIET
    PE(peNum).downNum = 0;
    PE(peNum).everBusy = FALSE;
    PE(peNum).everUp = (peNum == 0) ? SYSMSGTYPE_GOUP : FALSE;
    for (i = 0; i < CubeNodeDim(); i++) {
	if (peNum & (1 << i)) { break; }
	PE(peNum).downNum++;
	}
    PE(peNum).reductNum = PE(peNum).downNum;
    if (peNum == 0) { /* root node */
   	ExtendSysMsg(SYSMSGTYPE_TERM,0,&PE(peNum).everBusy,
			peNum,NULLPTR,PE(peNum).upNode,-1,PE(peNum).upNode,
			FALSE,sizeof(SYSMSG),
			0,0,NULLPTR,NULLPTR,0);
	}
    else { /* non-leaf node */
   	ExtendSysMsg(SYSMSGTYPE_REDU,0,&PE(peNum).everBusy,
			peNum,NULLPTR,PE(peNum).upNode,-1,PE(peNum).upNode,
			FALSE,sizeof(SYSMSG)+sizeof(int),
			sizeof(int),0,NULLPTR,NULLPTR,0);
	}
#endif
#ifdef HASH
    PE(peNum).hNext = 0;
    for (i = 0; i < MAXENTRY; i++) {
        PE(peNum).hBlk[i].hashNum = DUMMY;
        PE(peNum).hBlk[i].sharedNext = 0;
        PE(peNum).hBlk[i].resultQ.head	= NULLPTR;
        PE(peNum).hBlk[i].resultQ.tail	= NULLPTR;
        PE(peNum).hBlk[i].resultQ.qLen	= 0;
        PE(peNum).hBlk[i].resultQ.maxLen	= 0;
	}
#endif
    PE(peNum).neighborNum = CubeNodeDim();
    PE(peNum).satu	= FALSE;
    PE(peNum).diffCount	= 0;
    PE(peNum).oldLoad  = MAXINT;  PE(peNum).newLoad	= 0;

    PE(peNum).chareNumGenerated = PE(peNum).dataNumGenerated = 0;
    PE(peNum).chareNumConsumed  = PE(peNum).dataNumConsumed  = 0;
    PE(peNum).contNum 		= PE(peNum).currentChare     = 0;
    PE(peNum).chareNumRemote    = 0;
    PE(peNum).suspendTime = 0.0;

    MEM_SPACE(PeNum) = (int *)malloc(sizeof(int)*maxMem);
    MEM_PTR(PeNum)  = 0;
    if (MEM_SPACE(PeNum) == NULLPTR) {
	perror("Not enough mem");
	exit(0);
	}

    CubeSynRecv(SYSMSGTYPE_INIT, &ip, sizeof(INPARAM)); 
    PE(peNum).dist	= ip.CSminDist;
    if (ip.neighborDist > (k=CubeNodeDim()))
	ip.neighborDist = k;
    m = PE(peNum).neighborNum;
    for (j = 1; j < ip.neighborDist; j++) {
	i = j+1; 
	if (i > k/2) i = k-i;
	for (n=k,l = 0; l < i; l++) n *= (k-j-1);
	if (i != 0) n = n / i; else n = 1;
	for (i = 0; i < n; i++)
            PE(peNum).neighbors[++m] = peNum ^ MASK[j][i]; 
	}
	PE(peNum).neighborNum = m;

    CubeSynProbe(SYSMSGTYPE_RVAR);
    in = CubeMsgLen(); 
    readVar1 = (char *) OsAlloc2(in); p = (SYSMSG*) readVar1;
    CubeSynRecv(SYSMSGTYPE_RVAR, readVar1, in); 
    if ( (k = SYSMSGSIZE(p)) > in) {
	readVar2 = (char *) OsAlloc2(k);
	for (j = 0; j < in; j++) readVar2[j] = readVar1[j];
	OsFree(readVar1,in); readVar1 = readVar2;
        for (more=k-in; more > 0; in+=k,more-=k) {
	    k = (more < MAXHOSTMSG) ? more : MAXHOSTMSG;
    	    CubeSynRecv(SYSMSGTYPE_RVAR,readVar2+in,k); 
	    }
	}
    if (SYSMSGUSIZE(p) > 0) 
	UsrGetReadVar(readVar1+sizeof(SYSMSG),in-sizeof(SYSMSG));
    /*OsFree(readVar,i);*/
    bTbl.bNum = 0;
    UsrSetBoundVar(&bTbl.bNum,bTbl.bPtr,bTbl.bFun);

    CubeSynRecv(SYSMSGTYPE_BEGN, &i, 4); 
    PE(peNum).initClk = McGetSysClk();

    if (ip.TraceOn == 4) return(1); else return(0);
}
McProcBvar(ary,peNum)
int ary[],peNum;
{
    int i;

    for (i = 0; i < bTbl.bNum; i++) {
	(*bTbl.bFun[i]) (bTbl.bPtr[i],ary[i]);
	}
}
