# define HACK /*
# -rw-r--r--  1 ajr          1166 Feb 26 19:40 onchip.trl
# 1166 is 0x48d, top is 0x80001000, so set base to be 0x80000b70
name=`echo $0 | sed "s/[.]c$//"`
addr="0x80000b70"
set -x
tcc -s $addr -T $addr -fnodouble -I$HOME/tools -O3 -o $name onchip.c $name.c $HOME/tools/tc_tools.tll /usr/local/tcx/lib/libt8track32.tll /usr/local/tcx/lib/libt8local32.tll
set +x
exit 0
*/
# undef HACK
/* 
 *      16 Apr 93       mmh     Added comments
 *
 */
/*****************************************************************************/
# include "Tools.h"
# include "Timit.h"

# define UP_LIMIT	    16.0
# define LO_LIMIT	    (1.0 / UP_LIMIT)
# define INIT_OUTPUT_WEIGHT (-4.0)
# define INIT_COPY_WEIGHT   (2.0)
# define INIT_DIAG_WEIGHT   (2.0)
# define INIT_GEN_WEIGHT    (1.0 / 8.0)
# define DEFAULT_ADECAY     1.0
# define DEFAULT_AINIT      0.5

# define DEFAULT_INC	    (1.0 / DEFAULT_DEC) 
# define DEFAULT_DEC	    0.9
# define DEFAULT_STEP	    0.001
# define DEFAULT_MAXNPASS   32.0
# define DEFAULT_MAXWEIGHT  32.0
# define DEFAULT_BIAS       1.0
# define DEFAULT_ALPHA      0.5
# define DEFAULT_DELAY      4
# define DEFAULT_SAVET      0
# define DEFAULT_NOFFSET    1
# define DEFAULT_NSUM       32
# define DEFAULT_SEED	    1
# define DEFAULT_MMARGIN    1.04
# define FIFO_TIME          (TickRate_Low * 60 * 60 * 2)
# define CHECK_TIME         (TickRate_Low * 60 * 5)
# define NTRAIN             LAST_NODEID

# define DEFAULT_M0_INP     TIMIT_DEFAULT_NINP
# define DEFAULT_M0_STA     96
# define NHID		    0	    /* no support for hidden units */
# define DEFAULT_M0_OUT     TIMIT_NALLOPHONE
# define BYTE_SIZE	    256

int	Pick_biggest(float*, int);
int	remote_fread(char*, int, int, FILE*);
void	ruler(int, FILE*);
void	norm_output(Machine_type*);
int	fexist(char*);

int main(int argc, char **argv) {
  static Machine_type m;
  static Tools_type *target, **state_op, **eta;
  static FILE  *fp_pre, *fp_wei, *fp_log;
  static float  ierftab[BYTE_SIZE], minout, maxout, adecay, ainit;
  static float  inc, dec, step, senergy0, senergy1, sncorrect;
  static float  npass, maxnpass, maxweight, mmargin, meta, scosth = 0.0;
  static int    debug, nsum, online, ninp, nsta, nout, base_frame;
  static int    nframe, pre_size, pre_max, framesize, msize, nsize;
  static int    delay, noffset, state, nodeid, i, j, phnsizenorm;
  static int	last_phn_index = -1, last_phn_nframe, *phn_nframe_buffer;
  static int    last_check_time = 0, first_after_boot = TRUE, savet;
  static char   base_name[MAX_NAME_SIZE], *data;

  /* processor initialization */
  nodeid = getnodeid();
  Set_heapend();
  Init_write_down_link1out();

  /* command line processing -- make sure required argiuments are specified */
  if(nodeid == B011_NODEID) {

    /* check for help request */
    if(Scan_flag(argc, argv, "-h") || argc <= 3 ||
       (fp_pre = Std_fopen(argv[argc - 3], "r")) == NULL ||
       (fp_wei = fopen(argv[argc - 2], "a+")) == NULL ||
       (fp_log = Std_fopen(argv[argc - 1], "a")) == NULL) {
      fprintf(stderr, "Syntax:\t%s\n", *argv);
      fprintf(stderr, "\t-noerf\n");
      fprintf(stderr, "\t-centre\n");
      fprintf(stderr, "\t-state\n");
      fprintf(stderr, "\t-debug\n");
      fprintf(stderr, "\t-online\n");
      fprintf(stderr, "\t-linear\n");
      fprintf(stderr, "\t-copyinit\n");
      fprintf(stderr, "\t-nodiaginit\n");
      fprintf(stderr, "\t-nophnsizenorm\n");
      fprintf(stderr, "\t-inc %f\n", DEFAULT_INC);
      fprintf(stderr, "\t-dec %f\n", DEFAULT_DEC);
      fprintf(stderr, "\t-step %f\n", DEFAULT_STEP);
      fprintf(stderr, "\t-mmargin %f\n", DEFAULT_MMARGIN);
      fprintf(stderr, "\t-maxnpass %f\n", DEFAULT_MAXNPASS);
      fprintf(stderr, "\t-maxweight %f\n", DEFAULT_MAXWEIGHT);
      fprintf(stderr, "\t-nsum %d\n", DEFAULT_NSUM);
      fprintf(stderr, "\t-ninp %d\n", DEFAULT_M0_INP);
      fprintf(stderr, "\t-nsta %d\n", DEFAULT_M0_STA);
      fprintf(stderr, "\t-nout %d\n", DEFAULT_M0_OUT);
      fprintf(stderr, "\t-seed %d\n", DEFAULT_SEED);
      fprintf(stderr, "\t-delay %d\n", DEFAULT_DELAY);
      fprintf(stderr, "\t-savet %d\n", DEFAULT_SAVET);
      fprintf(stderr, "\t-noffset %d\n", DEFAULT_NOFFSET);
      fprintf(stderr, "\t-bias %f\n", DEFAULT_BIAS);
      fprintf(stderr, "\t-ainit %f\n", DEFAULT_AINIT);
      fprintf(stderr, "\t-adecay %f\n", DEFAULT_ADECAY);
      fprintf(stderr, "\t-dump <no default>\n");
      fprintf(stderr, "\t<-|preprocessor file>\n");
      fprintf(stderr, "\t<weights file>\n");
      fprintf(stderr, "\t<-|log file>\n");
      exit(1);
    }
    rewind(fp_wei);
  }

  /* broadcast command line arguments to the processing nodes */
  Broadcast_argc_argv(&argc, &argv);

  /* get the optional command line arguments */
  debug      = Scan_flag(argc, argv, "-debug");
  state      = Scan_flag(argc, argv, "-state");
  online     = Scan_flag(argc, argv, "-online");
  phnsizenorm=!Scan_flag(argc, argv, "-nophnsizenorm");
  nsum       = Scan_int(argc, argv, "-nsum", DEFAULT_NSUM);
  delay      = Scan_int(argc, argv, "-delay", DEFAULT_DELAY);
  savet      = Scan_int(argc, argv, "-savet", DEFAULT_SAVET);
  noffset    = Scan_int(argc, argv, "-noffset", DEFAULT_NOFFSET);
  ninp       = Scan_int(argc, argv, "-ninp", DEFAULT_M0_INP);
  nsta       = Scan_int(argc, argv, "-nsta", DEFAULT_M0_STA);
  nout       = Scan_int(argc, argv, "-nout", DEFAULT_M0_OUT);
  inc        = Scan_double(argc, argv, "-inc", DEFAULT_INC);
  dec        = Scan_double(argc, argv, "-dec", DEFAULT_DEC);
  ainit      = Scan_double(argc, argv, "-ainit", DEFAULT_AINIT);
  adecay     = Scan_double(argc, argv, "-adecay", DEFAULT_ADECAY);
  mmargin    = Scan_double(argc, argv, "-mmargin", DEFAULT_MMARGIN);
  maxnpass   = Scan_double(argc, argv, "-maxnpass", DEFAULT_MAXNPASS);
  maxweight  = Scan_double(argc, argv, "-maxweight", DEFAULT_MAXWEIGHT);

  /* save the command line in the log file */
  if(nodeid == B011_NODEID) Panic_fprintf_args(argc, argv, fp_log);

  minout = exp(- BYTE_SIZE / LNPROB_FLOAT2INT);

  /* initialize the network output range */
  minout = 0.0;
  maxout = 1.0 - (nout - 1) * minout;

  meta = step= Scan_double(argc, argv, "-step", DEFAULT_STEP);

  /* initialize the random number generator */
  Ran1(Scan_int(argc, argv, "-seed", DEFAULT_SEED));

  /* determine the input framesize */
  framesize = ninp + 1;

  if(nodeid != B011_NODEID) debug = FALSE;

  /* construct table for mapping of the 'lna' representation into [0,1] */
  if(Scan_flag(argc, argv, "-center"))
    for(i = 0; i < BYTE_SIZE; i++)
      ierftab[i] = 2.0 * (i + 0.5) / BYTE_SIZE - 1.0;
  else if(Scan_flag(argc, argv, "-noerf"))
    for(i = 0; i < BYTE_SIZE; i++)
      ierftab[i] = (i + 0.5) / BYTE_SIZE;
  else
    for(i = 0; i < BYTE_SIZE; i++)
      ierftab[i] = ierf(2.0 * (i + 0.5) / BYTE_SIZE - 1.0);

  /* compute the number of frames in the preprocessed file */
  if(nodeid == B011_NODEID) {
    (void) fseek(fp_pre, (long) 0, 2);
    pre_size = ftell(fp_pre) / framesize;
    rewind(fp_pre);
  }

#ifdef TC
  if(!Scan_flag(argc, argv, "-dump")) {

    /* broadcast the number of frames for each word */
    Broadcast_word(&pre_size);

    /* allocate memory for each node with some saftey margin */
    pre_max = mmargin * pre_size / NTRAIN;
    data    = Panic_uchar_array(pre_max * framesize);

    /* server node reads the data and distributes it to the client nodes */
    if(nodeid == B011_NODEID) {
      int nread = 0, last_nread = 0;
      for(i = 0; i < NTRAIN; i++) {
	if(!online && fopen(ONLINE, "r") != NULL) {
	  system("$HOME/bin/fifo_repeat");
	  fprintf(fp_log, "Bailing out!\n");
	  exit(0);
	}

	/* read main chuck of data from the file */
	nread += fread(data,framesize,pre_size*(i+1)/NTRAIN-last_nread,fp_pre);

	/* read data 1 frame at a time to locate end of the utterance */
	while((*(data + (nread - last_nread - 1) * framesize) &TIMIT_SENT_MASK)
	      == 0 && nread - last_nread < pre_max)
	  nread += fread(data+(nread-last_nread)*framesize,framesize,1,fp_pre);

	/* check for errors -- may want to adjust the margin */
	if((*(data + (nread - last_nread - 1) * framesize)&TIMIT_SENT_MASK)==0)
	  Panic("%s: Overflow of input buffer on data read\n", *argv);

	/* write the data to the client nodes */
	ChanOutBlock(LINK3OUT, (char*) data, (nread - last_nread) * framesize);


	/* save the number of points read */
	last_nread = nread;

	/* output data to the log file to keep track of where we are */
	ruler(i, fp_log);
      }

      /* make sure all the data was read from the file */
      if(pre_size != ftell(fp_pre) / framesize) 
	Panic("%s: Only read %d training frames out of %d\n", *argv,
	      ftell(fp_pre) / framesize, pre_size);

      /* initialize the processed frame number */
      nframe = 0;

      /* update the log file */
      fprintf(fp_log, "\n");
      fflush(fp_log);

      /* free the data area -- no longer used by the server node */
      Panic_free(data);
    }
    else {
      for(i = 0; i < NTRAIN - nodeid; i++)
	ChanOutBlock(LINK1OUT, (char*) data, ChanInBlock(LINK0IN,(char*)data));
      nframe = ChanInBlock(LINK0IN, (char*) data) / framesize;
    }
  }
#endif

  /* read in the header */
  if(remote_fread((char*) &nsta,      sizeof(nsta),	1, fp_wei) != 1 ||
     remote_fread((char*) &npass,     sizeof(npass),	1, fp_wei) != 1 ||
     remote_fread((char*) &meta,      sizeof(meta),	1, fp_wei) != 1 ||
     remote_fread((char*) &senergy0,  sizeof(senergy0),	1, fp_wei) != 1 ||
     remote_fread((char*) &senergy1,  sizeof(senergy1),	1, fp_wei) != 1 ||
     remote_fread((char*) &sncorrect, sizeof(sncorrect),1, fp_wei) != 1) {
    npass = 0.0;
  }
#ifndef TC
  else {
    /* swap bytes because data structure is different on T-rack */
    Swal((char*) &nsta,      (char*) &nsta, sizeof(nsta));
    Swal((char*) &npass,     (char*) &npass, sizeof(npass));
    Swal((char*) &meta,      (char*) &meta, sizeof(meta));
    Swal((char*) &senergy0,  (char*) &senergy0, sizeof(senergy0));
    Swal((char*) &senergy1,  (char*) &senergy1, sizeof(senergy1));
    Swal((char*) &sncorrect, (char*) &sncorrect, sizeof(sncorrect));
  }
#endif

  /* allocate memory for output targets */
  target = Panic_float_array(nout);

  /* if the server node create the model */
  if(nodeid == B011_NODEID) {

    /* create the model structue */
    Create_machine(&m, BIAS, ninp, nsta, NHID, nsta, nout, INIT_GEN_WEIGHT);

    /* weight modifications */
    if(Scan_flag(argc, argv, "-copyinit"))
      for(i = 0; i < nsta; i++) {
	m.weight[m.sta_op + i][0] -= 0.5 * INIT_COPY_WEIGHT;
	m.weight[m.sta_op + i][m.ext_ip + i] += INIT_COPY_WEIGHT;
      }
    if(!Scan_flag(argc, argv, "-nodiaginit"))
      for(i = 0; i < nsta; i++) 
	m.weight[m.sta_op + i][m.sta_ip + i] += Ran1(0) * INIT_DIAG_WEIGHT;

    /* make a copy of the weights with all values set to 'step' */
    if((eta = Make_weight_matrix(&m)) == NULL) Panic("%s: bye bye\n", *argv);
    else Set_weight_matrix(&m, eta, step);
  }
  else /* create the model structure without the smoothing array */
    Create_machine_no_smooth(&m, BIAS, ninp, nsta, NHID, nsta, nout, 1.0);

  /* reassign node bias */
  m.node_op[0] = Scan_double(argc, argv, "-bias", DEFAULT_BIAS);
  
  /* set the node non-linearities */
  m.hid_node = m.sta_node = &Safe_usigmoid_node;
  m.ext_node = &Exp_node;
  if(Scan_flag(argc, argv, "-linear")) m.sta_node = &Linear_node;

  /* get the number of elements and bytes in the weigt matrix */
  nsize = (msize = Sizeof_weight_matrix(&m)) / sizeof(**m.weight);

  /* read the weights and step size data from the weights file */
  if(nodeid == B011_NODEID) {
    if(!feof(fp_wei)) {
#ifdef TC
      if(!Read_machine(&m, fp_wei) ||
#else
      if(!Read_swal_machine(&m, fp_wei) ||
#endif	
	 fread((char*) *eta, 1, msize, fp_wei) != msize)
	Panic("%s: can't read in machine and step size\n", *argv);
#ifndef TC
      Swal((char*) *eta, (char*) *eta, msize);
#endif
    }

#ifndef TC
    if(Scan_flag(argc, argv, "-dump")) {
      FILE *fp_dmp;
      uchar **ddata, *dump_out;
      int ndump = state ? nsta : nout;
      float *todump = m.node_op + (state ? m.sta_op : m.ext_op);

      fp_dmp = Panic_fopen(Scan_string(argc, argv, "-dump", "/dev/null"), "w");
      ddata   = (uchar**) Panic_char_2d_array(abs(delay) + 1, framesize);
      dump_out = Panic_uchar_array(ndump + 1);
      for(i = 0; i <= delay; i++) ddata[i][0] = TIMIT_SILENCE_INDEX;
      for(i = 0; fread((char*) ddata[0], framesize, 1, fp_pre) == 1; i++) {

	for(j = 0; j < ninp; j++)
	  m.node_op[m.ext_ip + j] = ierftab[ddata[0][j + 1]];
	Forward(&m);
	norm_output(&m);

	COPY(m.node_op + m.sta_op, m.node_op + m.sta_ip, nsta);
	dump_out[0] = ddata[delay][0];
	if(i >= delay) {
	  for(j = 0; j < ndump; j++) {
	    int lnp = floor(- LNPROB_FLOAT2INT * log(todump[j] + VERY_SMALL));
	    dump_out[j + 1] = (lnp < MAX_UCHAR) ? lnp : MAX_UCHAR;
	  }	
	  Panic_fwrite(dump_out, sizeof(*dump_out), ndump + 1, fp_dmp);
	}
	for(j = delay; j > 0; j--) COPY(ddata[j - 1], ddata[j], framesize);
      }
      for(i = delay; i > 0; i--) {
	dump_out[0] = ddata[i][0];
	Panic_fwrite(dump_out, sizeof(*dump_out), ndump + 1, fp_dmp);
      }
      exit(0);
    }
#endif
  }
#ifdef TC

  /* create the node output array over the BP thru time interval */
  state_op = Panic_float_2d_array(nsum + 1, nsta + nout);

  /* create the vector of phones over the BP thru time interval */
  phn_nframe_buffer = Panic_int_array(nsum);

  /**********************/
  /* START OF MAIN LOOP */
  /**********************/

  /* initialize the target outputs */
  for(i = 0; i < nout; i++) target[i] = minout;

  /* distribute the outputs and weights to the different nodes */
  Distribute((char*) m.node_op, m.length * sizeof(*m.node_op), LAST_NODEID);
  COPY(m.node_op + m.sta_ip, state_op[0], nsta);
  Distribute((char*) *m.weight, msize, LAST_NODEID);

  for(base_frame = npass * nframe; TRUE; base_frame += nsum) {
    static float decay0, decay1, energy, local_energy, ncorrect,local_ncorrect;
    static float costh = 0.0;
    static int phn_size;
    float final_alpha = 1.0 - NTRAIN * nsum / (float) (noffset * pre_size);
    float alpha = final_alpha - (final_alpha - ainit) * exp(-npass / adecay);

    local_energy = 0.0;
    local_ncorrect = 0.0;
    decay0 = 1.0 - alpha;
    decay1 = 0.5 * decay0;

    if(nodeid == B011_NODEID) {
      if(!first_after_boot) {
	float dot_product = 0.0, change_smag = 0.0, smooth_smag = 0.0;
	float seta = 0.0, max_eta = UP_LIMIT * meta, min_eta = LO_LIMIT * meta;
	float calpha = 1.0 - alpha;

	for(i = m.hidden; i < m.length; i++) {
	  float *eta_i = eta[i];

	  for(j = 0; j < MIN(i, m.sta_op); j++) {
	    seta	+= eta_i[j];
	    dot_product	+= SIGN(m.change[i][j]) * eta_i[j] * m.smooth[i][j];
	    change_smag	+= eta_i[j] * eta_i[j];
	    smooth_smag	+= m.smooth[i][j] * m.smooth[i][j];
            if(eta_i[j] > max_eta) eta_i[j] = max_eta;
            if(eta_i[j] < min_eta) eta_i[j] = min_eta;
            if(m.weight[i][j] >  maxweight) {
	      m.weight[i][j] =  maxweight;
	      eta_i[j] = (eta_i[j] + step) / 2.0;
	    }
            if(m.weight[i][j] < -maxweight) {
	      m.weight[i][j] = -maxweight;
	      eta_i[j] = (eta_i[j] + step) / 2.0;
	    }
	    
	    m.smooth[i][j] = alpha * m.smooth[i][j] + calpha * m.change[i][j];
	    m.change[i][j] = 0.0;
	  }
	}
	costh = dot_product / sqrt(change_smag * smooth_smag + VERY_SMALL);
	meta = seta / nsize;
      }
    }
    else {

      /* clear the change matrix */
      Zero_weight_matrix(&m, m.change);

      /* step through the time interval */
      for(i = 0; i < nsum; i++) {

	/* get pointer to interval frame of data */
	uchar *curr_data = data + ((base_frame + i) % nframe) * framesize;

	/* get the frame label information -- phone and sentence end flag */
	int key = *(data + ((base_frame + i -delay+nframe)%nframe)*framesize);

	/* get the frame label */
	int index = key & TIMIT_LABEL_MASK;

	if(phnsizenorm) {
	  if(index != last_phn_index || (key & TIMIT_SENT_MASK) != 0) {
	    int done = FALSE;

	    last_phn_index = index;
	    last_phn_nframe = 0;
	    for(j = base_frame + i + 1 - delay + nframe; !done; j++) {
	      int new_key = *(data + (j % nframe) * framesize);
	      int new_index = new_key & TIMIT_LABEL_MASK;

	      if(new_index != index || (new_key & TIMIT_SENT_MASK) != 0)
		done = TRUE;
	      last_phn_nframe++;
	    }
	  }
	  phn_nframe_buffer[i] = last_phn_nframe;
	}

	/* assign the external input for the current frame */
	for(j = 0; j < ninp; j++)
	  m.node_op[m.ext_ip + j] = ierftab[curr_data[j + 1]];

	/* set up the internal inputs */
	COPY(state_op[i], m.node_op + m.sta_ip, nsta);

	/* forward propagate the network */
	Forward(&m);
	norm_output(&m);

	/* save internal and external outputs */
	COPY(m.node_op + m.sta_op, state_op[i + 1], nsta + nout);
      }

      /* clear the state node delta terms */
      ZERO(m.delta + m.sta_op, nsta);

      /* back-propagate through the interval */
      for(i = nsum - 1; i >= 0; i--) {

	/* get pointer to interval frame of data */
	uchar *curr_data = data + ((base_frame + i) % nframe) * framesize;

	/* get the frame label information -- phone and sentence end flag */
	int key = *(data + ((base_frame + i -delay+nframe)%nframe)*framesize);

	/* get the frame label */
	int index = key & TIMIT_LABEL_MASK;

	/* assign the external input for the current frame */
	for(j = 0; j < ninp; j++)
	  m.node_op[m.ext_ip + j] = ierftab[curr_data[j + 1]];

	/* set up the internal inputs */
	COPY(state_op[i], m.node_op + m.sta_ip, nsta);

	/* set up the internal and external outputs */
	COPY(state_op[i + 1], m.node_op + m.sta_op, nsta + nout);

	/* check if the forward process selected the correct phone */
	if(Pick_biggest(m.node_op + m.ext_op, nout) == index) local_ncorrect++;

	/* set the target value to the true phone */
	target[index] = maxout;

	/* compute the error signal over the frame outputs */
	if(phnsizenorm) {
	  local_energy += H_compare(&m, target) / phn_nframe_buffer[i];
	  for(j = 0; j < nout; j++) 
	    m.delta[m.ext_op + j] /= (float) phn_nframe_buffer[i];
	}
	else
	  local_energy += H_compare(&m, target);

	/* clear the target for the next frame */
	target[index] = minout;

	/* back-propagate the error signal */
	Backward_accumulate(&m, m.change);

	/* copy the delta signal inputs to outputs (reversed for BP?) */
	COPY(m.delta + m.sta_ip, m.delta + m.sta_op, nsta);
      }

      /* wrap the internal exteral outputs */
      COPY(state_op[nsum], state_op[0], nsta + nout);
    }

    /* accumulate the parameters from the different nodes */
    Reap_array(*m.change, nsize);
    Reap_float(&local_energy);
    Reap_float(&local_ncorrect);

    if(nodeid == LAST_NODEID) {
      int index = -1;
      for(i = 0; i < nsum && index == -1; i++)
	if(*(data + ((base_frame + i - delay + nframe) % nframe) * framesize)
	   & TIMIT_SENT_MASK) index = i;
      if(index == -1) ChanOutBlock(LINK1OUT, (char*) NULL, 0);
      else ChanOutBlock(LINK1OUT, (char*) state_op[index], nsta*sizeof(float));
    }
    if(nodeid == B011_NODEID) 
      ChanInBlock(LINK2IN, (char*) &m.node_op[m.sta_ip]);

    /* normalize parameters and broadcast */
    energy   = local_energy   / (NTRAIN * nsum);
    ncorrect = local_ncorrect / (NTRAIN * nsum);
    Broadcast((char*) &energy, sizeof(energy));
    Broadcast((char*) &ncorrect, sizeof(ncorrect));

    /* set the smoothing, momemtum, whatever, etc terms */
    if(npass == 0) {
      senergy1  = senergy0 = energy;
      sncorrect = ncorrect;
      scosth    = costh;
    }
    else if(!first_after_boot) {
      senergy0  = (1.0 - decay0) * senergy0 + decay0 * energy;
      senergy1  = (1.0 - decay1) * senergy1 + decay1 * energy;
      sncorrect = (1.0 - decay0) * sncorrect + decay0 * ncorrect;
      scosth	= (1.0 - decay0) * scosth   + decay0 * costh;
    }
    
    if(nodeid == B011_NODEID) {
      for(i = m.hidden; i < m.length; i++) {
	float *weight_i	= m.weight[i];
	float *change_i	= m.change[i];
	float *smooth_i	= m.smooth[i];
	float *eta_i	= eta[i];
	
	for(j = 0; j < MIN(i, m.sta_op); j++) {
	  eta_i[j] *= SIGN(change_i[j]) == SIGN(smooth_i[j]) ? inc : dec;
	  if(change_i[j] > 0.0) weight_i[j] += eta_i[j];
	  else weight_i[j] -= eta_i[j];
	}
      }
    }

    /* distribute the model weights to all the nodes for the next iteration */
    Distribute((char*) *m.weight, msize, LAST_NODEID);

    if(nodeid == B011_NODEID) {
      FILE *fsave = (FILE*) NULL;

      /* save information in the log file */
      fprintf(fp_log,"%6.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.6f\n",
	      npass, alpha, meta / step, energy, senergy0, senergy1/senergy0,
	      ncorrect, sncorrect, scosth);
      fflush(fp_log);

      /* check for exit-processing status */
      if(!online && Time() > last_check_time + CHECK_TIME) {
	if(fexist(ONLINE) || (Time() > FIFO_TIME &&
	  system("test `ls /usr/spool/fifo | wc -l` = 1") != 0))
	  fsave = fp_wei;
	last_check_time = Time();
      }

#define NGUFF 8
      { float oldnpass = npass;
	
	npass += NTRAIN * nsum / (float) pre_size;
	
	if(savet > 0 && floor(oldnpass / savet) != floor(npass / savet)) {
	  char *path;
	  
	  path = malloc(strlen(argv[argc - 2]) + NGUFF);
	  sprintf(path, "%s-%02d", argv[argc - 2], (int) floor(npass));

	  fsave = Panic_fopen(path, "w");
	  free(path);
	}	  
      }

      if(fsave != NULL) {
	rewind(fsave);
	if(fwrite((char*) &nsta,	sizeof(nsta),	  1, fsave) !=1 ||
	   fwrite((char*) &npass,	sizeof(npass),    1, fsave) !=1 ||
	   fwrite((char*) &meta,	sizeof(meta),     1, fsave) !=1 ||
	   fwrite((char*) &senergy0,	sizeof(senergy0), 1, fsave) !=1 ||
	   fwrite((char*) &senergy1,	sizeof(senergy1), 1, fsave) !=1 ||
	   fwrite((char*) &sncorrect,	sizeof(sncorrect),1, fsave) !=1 ||
	   Write_machine(&m, fsave) == FALSE ||
	   fwrite((char*) *eta, 1, msize, fsave) != msize) 
	  Panic("%s: Can't write machine\n", *argv);

	if(npass > maxnpass) exit(0);

	if(fsave == fp_wei) {
	  system("$HOME/bin/fifo_repeat");
	  exit(0);
	}
	else
	  fclose(fsave);
      }
      
      /* update the processing status */
      first_after_boot = FALSE;
    }
  }
#endif 
  return(0);
}

int Pick_biggest(float *array, int size) {
  float best_value = array[0];
  int   i, best_index = 0;
  for(i = 1; i < size; i++)
    if(array[i] > best_value) {
      best_value = array[i];
      best_index = i;
    }
  return(best_index);
}

int remote_fread(char *data, int size, int nitems, FILE *stream) {
  int nread;
  if(getnodeid() == B011_NODEID) nread = fread(data, size, nitems, stream);
  Broadcast_word(&nread);
  Broadcast(data, size * nread);
  return(nread);
}

void ruler(int i, FILE *stream) {
  if(i % 0x10 == 0) fprintf(stream, "%1x", i / 0x10);
  else if(i % 0x08 == 0) fprintf(stream, "_");
  else fprintf(stream, ".");
  fflush(stream);
}

/* works on the input to the node to aviod overflow problems */
void norm_output(Machine_type *p) {
  float max, sum;
  int i;

  max = p->node_ip[p->ext_op];
  for(i = p->ext_op + 1; i < p->length; i++)
    if(p->node_ip[i] > max) max = p->node_ip[i];

  sum = 0.0;
  for(i = p->ext_op; i < p->length; i++)
    if(max - p->node_ip[i] < log(VERY_BIG))
      sum += p->node_op[i] = exp(p->node_ip[i] - max);
    else
      sum += p->node_op[i] = 1.0 / VERY_BIG;

  for(i = p->ext_op; i < p->length; i++) p->node_op[i] /= sum;
}

int fexist(char *path) {
  FILE *stream = fopen(path, "r");
  int result = (stream != NULL);

  fclose(stream);
  return(result);
}
