



/********************************************************************/
/*  WARNING  WARNING  WARNING  WARNING  WARNING  WARNING  WARNING   */
/*      DO NOT EDIT!  DO NOT EDIT!  DO NOT EDIT!  DO NOT EDIT!      */
/*      DO NOT EDIT!  DO NOT EDIT!  DO NOT EDIT!  DO NOT EDIT!      */
/*                                                                  */
/*                      THIS IS NOT A SOURCE FILE                   */
/*  All ECO code is written using a set of m4 macros.  If you want  */
/*  to edit, or read, the ECO code, please use the m4 macro code.   */
/********************************************************************/


/*
 * 	ECO: Efficient Collective Operations
 * 	Beta release 0.1b
 * 	Bruce Lowekamp and Adam Beguelin	
 * 	School of Computer Science
 * 	Carnegie Mellon University
 * 	Pittsburgh, PA 15213
 * 
 * 	(C) 1996 All Rights Reserved
 * 
 * NOTICE:
 * 
 *  Permission to use, copy, modify, and distribute this software and
 *  its documentation for any purpose and without fee is hereby granted
 *  provided that the above copyright notice appear in all copies and
 *  that both the copyright notice and this permission notice appear in
 *  supporting documentation.
 * 
 *  Neither Carnegie Mellon University nor the Authors make any
 *  representations about the suitability of this software for any
 *  purpose.  This software is provided `as is' without express or
 *  implied warranty.
 * 
 *  This research is sponsored in part by the Department of Defense
 *  Advanced Research Projects Agency and the National Science
 *  Foundation.
 */

/*  prints out data on the communication times between all nodes. */
/*  prints mean, stddev, and min for */
/*   - congested communication, at least n-1 nodes involved */
/*   - uncongested communicatoin, only 2 at a time */





#include <stdio.h>
#include <sys/time.h>
#include <math.h>
#include <string.h>
#include <time.h>




#include <pvm3.h>


#include "eco_netanal.h"
#include "eco_netutil.h"
#include "spawn_mpp.h"

/* constants local to this particular program */
#define NO_ASSIGN -1
#define NO_ACTION -1
#define SEND 0
#define RECEIVE 1
#define FAKE 2


void do_cong_times(double *total, double *total_2, double* min_time,
		   double** raw_time,
		   int iam_procr, int *tids, int p,
		   int NUMTRYS, int MSGLENGTH, int INNER,
		   char *mesgbuf);

void data_collect_output(double *total, double *total_2, double* min_time,
			 double** raw_time,
			 int iam_procr, int *tids, int p,
			 int NUMTRYS, int show_raw);

void do_free_times(double *total, double *total_2, double* min_time,
		   double** raw_time,
		   int iam_procr, int *tids, int p,
		   int NUMTRYS, int MSGLENGTH, int INNER,
		   char *mesgbuf);

int cong_partner(int offset, int iam, int p, int phase, int* extra,
		 int* my_action);

void pre_spawn(int* p, int* iam, int** tids, int argc, char** argv);


int do_full_run;

main(int argc, char **argv)
{
  int num_procrs=0, iam_procr;
  int * procrs_tid;

  /* timing stuff*/
  double *total_time; /* array of times with each processor*/
  double *total_time_2; /* squared times (for statistics)*/
  double *min_time; 
  double **raw_time;
  
  /* communication stuff*/
  int buf_id;
  char *mesgbuf; /* hold some sort of message*/
  int control_mesg; /* control value, specifier is in msgtag*/
  int message_type; /* type of message received this time*/

  int stat_ready = FALSE; /* whether process 0 ready for statistics*/
  int my_tid; /* figure out which processor I am*/
  
  int MSGLENGTH=1000; /* length of each message*/
  int NUMTRYS=10;     /* number of exchanges*/
  int INNER=5;        /* inner loop repetitions*/
  int show_all_data = FALSE; /* show all values*/
  

  int i,j;

  pre_spawn(&num_procrs, &iam_procr, &procrs_tid, argc, argv);
  do_full_run=FALSE;
  

  for (i=0;i<argc;i++)
    if (argv[i][0] == '-')
      switch(argv[i][1]) {
      case 'l':
	MSGLENGTH = atoi(&(argv[i][2]));
	break;
      case 'n':
	NUMTRYS = atoi(&(argv[i][2]));
	break;
      case 'i':
	INNER = atoi(&(argv[i][2]));
	break;
      case 'a':
	show_all_data = TRUE;
	break;
      case 'p':
	num_procrs = atoi(&(argv[i][2]));
	break;
      case 'f':
	do_full_run = TRUE;
      default:
	break;
      }

  /*printf ("%i %i %i\n", MSGLENGTH, NUMTRYS, INNER);*/
  
  setvbuf(stdout, NULL, _IOLBF, 1024);

  /* initialization stuff*/

  mesgbuf = (char *) malloc(MSGLENGTH*sizeof(char));
  for (i=0;i<MSGLENGTH;i++)
    mesgbuf[i]=i%256;


  pvm_setopt(PvmRoute, PvmRouteDirect);
  /*pvm_setopt(PvmRoute, PvmDontRoute);*/

  total_time = (double *) malloc (num_procrs * sizeof(double));
  total_time_2 = (double *) malloc (num_procrs * sizeof(double));
  min_time = (double *) malloc (num_procrs * sizeof(double));
  raw_time = (double **) malloc (num_procrs * sizeof(double *));
  for (i=0;i<num_procrs;i++) {
    total_time[i]=total_time_2[i]=0;
    min_time[i] = 1e20;
    raw_time[i] = (double *) malloc (NUMTRYS * sizeof(double));
    for (j=0;j<NUMTRYS;j++)
      raw_time[i][j] = 0;
  }
  
  if(iam_procr==0) {
    time_t cur_time;
    time(&cur_time);
    printf ("#netstone\n");
    printf ("#beginning computation with %i processors at %s", 
	    num_procrs, ctime(&cur_time));
    printf ("#message length: %i\n#exchanges per measurement: %i\n",
	    MSGLENGTH, INNER);
    printf ("#number of measurements: %i\n", NUMTRYS);
    printf ("#all data will be divided by two to approximate for one\n");
    printf ("#message, although pairs have to be exchanged.\n");
  }


/* first set up all communication lines to make times fairer for*/
/* PvmRouteDirect*/
  /* set up the links*/
  for(i=0;i<num_procrs;i++)
    if(iam_procr < i) {
      pvm_initsend(PvmDataDefault);
      pvm_send(procrs_tid[i], SETUP);
      if(0);
      pvm_recv(procrs_tid[i], SETUP);
      if(0);
    }
    else if (iam_procr > i) {
      pvm_recv(procrs_tid[i], SETUP);
      if(0);
      pvm_initsend(PvmDataDefault);
      pvm_send(procrs_tid[i], SETUP);
      if(0);
    }

  if(0);

  if(do_full_run){
    /* ****************/
    /*  call the timing routines themselves*/
    /**/
    /* first, congestion*/
    do_cong_times(total_time, total_time_2, min_time, raw_time,
		  iam_procr, procrs_tid, num_procrs,
		  NUMTRYS, MSGLENGTH, INNER, mesgbuf);
    if(iam_procr==0)  printf ("\n\ncongested times:\n");
    data_collect_output(total_time, total_time_2, min_time, raw_time,
			iam_procr, procrs_tid, num_procrs, NUMTRYS,
			show_all_data);
  }
  
  for (i=0;i<num_procrs;i++) {
    total_time[i]=total_time_2[i]=0;
    min_time[i] = 1e20;
    for (j=0;j<NUMTRYS;j++)
      raw_time[i][j] = 0;
  }
  /**/
  /* second, no congestion*/
  do_free_times(total_time, total_time_2, min_time, raw_time,
		iam_procr, procrs_tid, num_procrs,
		NUMTRYS, MSGLENGTH, INNER, mesgbuf);
  if(iam_procr==0) printf ("\n\nuncongested times:\n");
  data_collect_output(total_time, total_time_2, min_time, raw_time,
		      iam_procr, procrs_tid, num_procrs, NUMTRYS,
		      show_all_data);
  
  fflush(stdout);
  fflush(stderr);
  clean_exit(iam_procr, procrs_tid, num_procrs);

}


  
  
  


void data_collect_output(double *total, double *total_2, double *min_time,
			 double** raw_time,
			 int iam_procr, int *tids, int p,
			 int NUMTRYS, int print_raw)
{
  int i,j;
  
  /* after timing is finished, all the messages are sent back to */
  /* gtid[0] to be merged into a single table.*/
  
  if (iam_procr == 0) {
    /* we are going to have to store this whole thing and print out the*/
    /* results*/
    struct pvmtaskinfo *taskp;
    int num_tasks;
    int raw_num_procrs, numarchs;
    struct pvmhostinfo *hostp; /* information on the virtual machine*/

    double **means;
    double **stddev;
    double **mins;
    double *tempvec = (double *) malloc((p>NUMTRYS?p:NUMTRYS)*sizeof(double));
    
    means = (double **) malloc (p*sizeof(double*));
    stddev = (double **) malloc (p*sizeof(double*));
    mins = (double**) malloc(p*sizeof(double*));

    /* half of this is wasted space---we're being lazy and not*/
    /* addressing it as an upper-tri matrix SLAP!!!*/
    for(i=0;i<p;i++) {
      means[i] = (double *) malloc(p*sizeof(double));
      stddev[i] = (double *) malloc(p*sizeof(double));
      mins[i] = (double *) malloc(p*sizeof(double));
      for(j=0;j<p;j++) {
	means[i][j] = stddev[i][j]= 0;
	mins[i][j] = 1e20;
      }
    }
    
    pvm_initsend(PvmDataDefault);
    pvm_pkint(&p, 1, 1);
    pvm_mcast(&(tids[1]), p-1,STAT_READY);

    pvm_config(&raw_num_procrs, &numarchs, &hostp);
    
    for (i=0;i< p;i++) {
      pvm_tasks(tids[i], &num_tasks, &taskp);
      printf ("=%s\n", pvmhostname(hostp, taskp[0].ti_host));
    }
    

    for (i=1;i< p;i++) {
      means[0][i]=total[i];
      stddev[0][i] = total_2[i];
      mins[0][i] = min_time[i];
    }

    if (print_raw)
      for(i=0;i<p;i++)
	if(total[i] > 0) {
	  printf ("raw data from %i to %i\n", 0, i);
	  for(j=0;j<NUMTRYS;j++)
	    printf ("%lf\n", raw_time[i][j]);
	  printf ("\n");
	}

    
    /* build the matrix*/
    for(i=1;i<p;i++) {
      pvm_recv(tids[i], STAT_RESULTS);

      pvm_upkdouble(tempvec, p, 1);
      for(j=0;j<i;j++)
	means[j][i] += tempvec[j];
      for(j=i+1;j<p;j++)
	means[i][j] += tempvec[j];

      pvm_upkdouble(tempvec, p, 1);
      for(j=0;j<i;j++)
	stddev[j][i] += tempvec[j];
      for(j=i+1;j<p;j++)
	stddev[i][j] += tempvec[j];

      pvm_upkdouble(tempvec, p, 1);
      for(j=0;j<i;j++)
	if(tempvec[j] < mins[j][i])
	  mins[j][i] = tempvec[j];
      for(j=i+1;j<p;j++)
	if(tempvec[j] < mins[i][j])
	  mins[i][j] = tempvec[j];

      if(print_raw) {
	pvm_upkint(&j,1,1);
	while(j!= -1) {
	  pvm_upkdouble(tempvec, NUMTRYS, 1);
	  printf ("raw data from %i to %i\n", i, j);
	  for(j=0;j<NUMTRYS;j++)
	    printf ("%lf\n", tempvec[j]);
	  pvm_upkint(&j,1,1);
	  printf ("\n");
	}
      }
      
    }

    /* now calculate the actual values*/
    for(i=0;i<p;i++)
      for(j=i+1;j<p;j++) {
	double tmean, tstd;

	if(0);
	
	tmean =  means[i][j]/(NUMTRYS);
	tstd = sqrt(((NUMTRYS)*tmean*tmean - 
		     2* tmean*means[i][j] + stddev[i][j])/(NUMTRYS-1));

	means[i][j] = tmean;
	stddev[i][j] = tstd;
      }

    if(do_full_run) {
      /**/
      /* now print the mean*/
      u_t_table_out(stdout, tids, p, means);
      
      /**/
      /* now print the stddev*/
      u_t_table_out(stdout, tids, p, stddev);
    }
    
    /**/
    /* now print the min*/
    u_t_table_out(stdout, tids, p, mins);
    
  } else {
    
    pvm_recv(tids[0], STAT_READY);
    pvm_upkint(&i, 1, 1);
    
    pvm_initsend(PvmDataDefault);
    pvm_pkdouble(total, p, 1);
    pvm_pkdouble(total_2, p, 1);
    pvm_pkdouble(min_time, p, 1);
    if(print_raw) {
      for(i=0;i<p;i++)
	if(total[i] > 0) {
	  pvm_pkint(&i,1,1);
	  pvm_pkdouble(raw_time[i], NUMTRYS, 1);
	}
      i=-1;
      pvm_pkint(&i,1,1);
    }
    pvm_send(tids[0], STAT_RESULTS);
    for(i=0;i<p;i++)
      if(0);
    
  }
}  


void do_cong_times(double *total, double *total_2, double *min_time,
		   double** raw_time,
		   int iam_procr, int *tids, int p,
		   int NUMTRYS, int MSGLENGTH, int INNER,
		   char *mesgbuf)
{
  int offset; /* distance away we are timing this iteration*/

  int partner;
  double this_time;
  int i,j;
  int action, extra, phase;
  
  for (i=0;i<NUMTRYS;i++)
    for(offset = 1; offset <= p/2; offset++) {
      extra = FALSE;
      phase = 1;
      while(phase < 3) {
	partner =  cong_partner(offset,iam_procr,p,phase,&extra,&action);
	if((action & (~FAKE))== SEND) {
	  this_time = master_exch(tids[partner], MSGLENGTH,
				  INNER, mesgbuf);
	  if(!(action & FAKE)) {
	    total[partner] += this_time;
	    total_2[partner] += this_time*this_time;
	    raw_time[partner][i] = this_time;
	    if(min_time[partner] > this_time)
	      min_time[partner] = this_time;
	    
	    if(0);
	  }
	  else
	    if(0);
	}
	else 
	  if((action & (~FAKE)) == RECEIVE)
	    slave_exch(tids[partner], MSGLENGTH, INNER, mesgbuf);
	
	
	if(!extra)
	  phase++;
      }
    }
}




int cong_partner(int offset, int iam, int p, int phase, int* extra,
		 int* my_action)
{
  int i,j;
  int assigned[p];
  int action[p];
  int need_extra=FALSE;
  int last_found;

  for(i=0;i<p;i++) {
    assigned[i]=NO_ASSIGN;
    action[i] = NO_ACTION;
  }

  /*printf ("considering offset %i p=%i phase=%i extra=%i\n",*/
  /*offset, p, phase, extra);*/

  if((offset == p/2) && (phase == 2) &&(p%2 == 0)) {
    *extra = FALSE;
    *my_action = NO_ACTION;
    return NO_ASSIGN;
  }
  
  /* first set up senders*/
  for(i=0;i<p;i++) 
    if ((((i%(offset*2))<offset) && (phase==1)) ||
	(((i%(offset*2))>=offset) && (phase==2))) {
      assigned[i] = (i+offset)%p;
      action[i] = SEND;
    }

  /* now set up receivers and eliminate overlaps*/
  for(i=0;i<p;i++)
    if(action[i] == SEND) {
      if(assigned[assigned[i]] == NO_ASSIGN) {
	assigned[assigned[i]] = i;
	action[assigned[i]] = RECEIVE;
      }
      else {
	/* if this is our first attempt at this phase, don't let the*/
	/* second guy in*/
	if (!(*extra)) {
	  assigned[i] = NO_ASSIGN;
	  action[i] = NO_ACTION;
	  need_extra = TRUE;
	}
	/* otherwise, prefer the second to the first*/
	else {
	  assigned[assigned[assigned[i]]] = NO_ASSIGN;
	  action[assigned[assigned[i]]] = NO_ACTION;
	  assigned[assigned[i]] = i;
	  /* mark as fake so we know these are the ones needed in extra*/
	  action[assigned[i]] = RECEIVE|FAKE;
	  action[i] = action[i]|FAKE;
	}
      }
    }

  /* if we are on the extra step, we need to give the loops that*/
  /* were setup as normal a NO_ASSIGN status so dummies are used*/
  /* in their place*/
  if(*extra)
    for(i=0;i<p;i++)
      if (action[i] & FAKE)
	action[i] = action[i] & (~FAKE);
      else {
	action[i] = NO_ACTION;
	assigned[i] = NO_ASSIGN;
      }
	  
  /* now we fix up the congestion by adding loops for unused nodes*/
  last_found = NO_ASSIGN;
  for(i=0;i<p;i++) 
    if(assigned[i] == NO_ASSIGN) {
      if (last_found == NO_ASSIGN)
	last_found = i;
      else {
	assigned[i] = last_found;
	action[i] = RECEIVE|FAKE;
	assigned[last_found] = i;
	action[last_found] = SEND|FAKE;
	last_found = NO_ASSIGN;
      }
    }

  *extra = need_extra;

  /*for(i=0;i<p;i++)*/
  if(0);

  
  *my_action=action[iam];
  return assigned[iam];
  
}



void do_free_times(double *total, double *total_2, double* min_time,
		   double** raw_time,
		   int iam_procr, int *tids, int p,
		   int NUMTRYS, int MSGLENGTH, int INNER,
		   char *mesgbuf)
{
  int n;
  double this_time;
  int i,j;


  for (n=0;n<NUMTRYS;n++) {
    for (i=0;i<iam_procr;i++)
      slave_exch(tids[i], MSGLENGTH, INNER, mesgbuf);
    
    for (i=iam_procr+1;i<p;i++)
      if(i!=iam_procr) {
	
	this_time = master_exch(tids[i], MSGLENGTH, INNER, mesgbuf);

	total[i] += this_time;
	total_2[i] += this_time*this_time;
	raw_time[i][n] = this_time;
	if(min_time[i] > this_time)
	  min_time[i] = this_time;
	
	if(0);
      }
    
  }


}


void slave_exch(int partner, int MSGLENGTH, int INNER, char *mesgbuf)
{
  int j;

  pvm_initsend(PvmDataDefault);
  pvm_send(partner, EXCHANGE_OK);

  if(0);

  pvm_initsend(PvmDataRaw);
  pvm_pkbyte(mesgbuf, MSGLENGTH, 1);

  for(j=0;j<INNER;j++) {
    pvm_recv(partner, TIME_MESSAGE);
    pvm_send(partner, TIME_MESSAGE);
  }
  if(0);

  pvm_freebuf(pvm_getrbuf());

}


double master_exch(int partner, int MSGLENGTH, int INNER, char *mesgbuf)
{
  int j;
  struct timeval starttime; /* time communication test began*/
  struct timeval finishtime; /* time communication test over*/

  if(0);

  pvm_recv(partner, EXCHANGE_OK);

  if(0);

  pvm_initsend(PvmDataRaw);
  pvm_pkbyte(mesgbuf, MSGLENGTH, 1);
  gettimeofday(&starttime,NULL);

  for(j=0;j<INNER;j++) {
    pvm_send(partner, TIME_MESSAGE);
    pvm_recv(partner, TIME_MESSAGE);
  }
  gettimeofday(&finishtime,NULL);
      
  pvm_freebuf(pvm_getrbuf());


  return (((finishtime.tv_usec-starttime.tv_usec)*1e-6 +
	    (finishtime.tv_sec - starttime.tv_sec)) /
	  (INNER*2));
}


	
/* this routine now measures the maximum time at which this routine was*/
/* called on each processor*/
double get_runtime(int *tids, int p, int iam,
		   double* offsets, struct timeval alg_started) {

  int i;
  long a[2];
  double max_offset = 0;
  
  if (iam == 0) {
    struct timeval p_time;
    double p_used;

    for (i=1;i<p;i++) {
      pvm_recv(tids[i], RUNTIME);
    
      pvm_upklong(a, 2, 1);

      p_time.tv_sec = a[0];
      p_time.tv_usec = a[1];
      
      p_used = (p_time.tv_sec - alg_started.tv_sec) +
	(p_time.tv_usec - alg_started.tv_usec)*1e-6 +
	offsets[i];

      /*printf ("p_i %i used %lf (off %lf)(%li %li)\n", i, p_used, offsets[i],*/
      /*     a[0],a[1]);*/

      if (p_used > max_offset)
	max_offset = p_used;
    }

    return max_offset;
  }
  else {
    struct timeval end_time;

    gettimeofday(&end_time, NULL);
    a[0] = end_time.tv_sec;
    a[1] = end_time.tv_usec;
    
    /*sleep(1); *//* hopefully ensure that everyone has finished*/

    pvm_initsend(PvmDataDefault);
    pvm_pklong(a, 2, 1);
    pvm_send(tids[0], RUNTIME);

  }
}

void get_offsets(int iam, int p, int* tids, double* offsets, int count)
{
  long a[2];
  int i,j;
  double min_round[p];

  if (iam == 0) {
    struct timeval before, after, remote;

    for(i=0;i<p;i++) {
      offsets[i] = 0;
      min_round[i] = 10000;
    }
    
    printf ("calibrating timers\n");
    for(i=0;i<count;i++) {
      for(j=1;j<p;j++) {
	pvm_initsend(PvmDataDefault);
	gettimeofday(&before, NULL);
	gettimeofday(&after,NULL);
	a[0] = after.tv_sec;
	a[1] = after.tv_usec;
	pvm_pklong(a,2,1);
	pvm_send(tids[j], SETUP);

	pvm_recv(tids[j], SETUP);
	gettimeofday(&after, NULL);

	pvm_upklong(a, 2, 1);
	remote.tv_sec = a[0];
	remote.tv_usec = a[1];


	pvm_initsend(PvmDataDefault);
	gettimeofday(&before, NULL);
	gettimeofday(&after,NULL);
	a[0] = after.tv_sec;
	a[1] = after.tv_usec;
	pvm_pklong(a,2,1);
	pvm_send(tids[j], SETUP);

	pvm_recv(tids[j], SETUP);
	gettimeofday(&after, NULL);

	pvm_upklong(a, 2, 1);
	remote.tv_sec = a[0];
	remote.tv_usec = a[1];



	/*printf ("with %i before %li %li remote %li %li,\nafter %li %li\n", j,*/
	/*	before.tv_sec, before.tv_usec, remote.tv_sec, remote.tv_usec,*/
	/*	after.tv_sec, after.tv_usec);*/

	/*t_before = before.tv_sec + before.tv_usec*1e-6;*/
	/*t_after = after.tv_sec + after.tv_usec*1e-6;*/
	/*offset[j] += (t_after-t_before)/2 - (remote - t_before);*/
	/*offsets[j] +=*/
	/* ((after.tv_sec - before.tv_sec) +*/
	/*  (after.tv_usec - before.tv_usec)*1e-6)/2 -*/
	/*  ((remote.tv_sec - before.tv_sec) +*/
	/*   (remote.tv_usec - before.tv_usec)*1e-6);*/
	if (((after.tv_sec - before.tv_sec) +
	     (after.tv_usec - before.tv_usec)*1e-6) < min_round[j]) {
	  min_round[j] = ((after.tv_sec - before.tv_sec) +
			  (after.tv_usec - before.tv_usec)*1e-6);
	  
	offsets[j] =
	  ((after.tv_sec + before.tv_sec) +
	   (after.tv_usec + before.tv_usec)*1e-6)/2 -
	  (remote.tv_sec + remote.tv_usec*1e-6);
	}
      }

      /*sleep(1);*/
    }

    printf ("finished calibration\n");

    /*for(i=0;i<p;i++)*/
    /*  offsets[i] = offsets[i]/count;*/
  }
  else {
    struct timeval recv_time;
    
    for(i=0;i<count;i++) {
      pvm_initsend(PvmDataDefault);
      pvm_recv(tids[0], SETUP);
      gettimeofday(&recv_time, NULL);
      a[0] = recv_time.tv_sec;
      a[1] = recv_time.tv_usec;
      pvm_pklong(a,2,1);
      pvm_send(tids[0], SETUP);


      pvm_initsend(PvmDataDefault);
      pvm_recv(tids[0], SETUP);
      gettimeofday(&recv_time, NULL);
      a[0] = recv_time.tv_sec;
      a[1] = recv_time.tv_usec;
      pvm_pklong(a,2,1);
      pvm_send(tids[0], SETUP);
    }
  }
}
    
    

/* fairly simple spawn */
void pre_spawn(int* p, int* iam, int** tids, int argc, char** argv)
{
  int i;
  struct pvmhostinfo *hostp; /* information on the virtual machine*/

  if((pvm_parent() == PvmNoParent) || ((argc > 1)&&(argv[1][0] == '0'))) {
    char **new_argv;
    int my_tid = pvm_mytid();

    if((argc>1)&&(argv[1][0] == '0'))
      argv[1][0] = 'n';
    
    if (*p < 1)
      pvm_config(p, &i, &hostp);


    new_argv=(char **) malloc(argc*sizeof(char*));
    for (i=1;i<argc;i++)
      new_argv[i-1]=argv[i];
    new_argv[argc-1]=NULL;

    *tids = (int *) malloc (*p * sizeof(int));
    (*tids)[0] = pvm_mytid();
    *iam = 0;

    if(strchr(argv[0],'/') != NULL) 
      alb_spawn(strrchr(argv[0],'/')+1, new_argv, *p-1,(*tids)+1, NULL, 0,
		0);
    else
      alb_spawn(argv[0], new_argv, *p-1,(*tids)+1, NULL, 0, 0);

    pvm_initsend(PvmDataDefault);
    pvm_pkint(p,1,1);
    pvm_pkint(*tids,*p,1);
    pvm_mcast(&((*tids)[1]), *p-1, TID_LIST);
  }
  else {
    pvm_recv(pvm_parent(), TID_LIST);
    pvm_upkint(p, 1, 1);
    
    *tids = (int *) malloc (*p * sizeof(int));
    pvm_upkint(*tids, *p, 1);

    for(*iam=0;(*tids)[*iam]!=pvm_mytid();(*iam)++);
  }
  

  pvm_setopt(PvmRoute, PvmRouteDirect);
}

void clean_exit(int iam, int* tids, int p)
{
  int i;
  
  if (iam == 0) {
    /*for(i=1;i<p;i++)*/
    /*pvm_recv(tids[i], EXITING);*/

    for(i=1;i<p;i++)
      pvm_notify(PvmTaskExit, EXITING, 1, &tids[i]);
    
    for(i=1;i<p;i++)
      pvm_send(tids[i], EXITING);

    for(i=1;i<p;i++)
      pvm_recv(-1, EXITING);
  }
  else {
    /*pvm_send(tids[0], EXITING);*/
    pvm_recv(tids[0], EXITING);
  }
}

