/************************************
   Code for implementing the backpropagation algorithm for training
   a fully-connected MLP neural network with 1 layer of hidden units.
   Loosely translated from backprop.lisp by David Touretzky.

   Compile with the command:  cc -o backprop backprop.c -lm
   
   Justin Boyan, Oct 5 1993
*************************************/

#include <math.h>
#include <stdio.h>

/*** Set up the network to solve the 4-2-4 encoder problem***/
#define n_inp 4
#define n_hid 2
#define n_out 4

#define n_pat 4
float train_in[n_pat][n_inp] =  {{1,0,0,0}, {0,1,0,0}, {0,0,1,0}, {0,0,0,1}};
float train_out[n_pat][n_out] = {{1,0,0,0}, {0,1,0,0}, {0,0,1,0}, {0,0,0,1}};
/*********/

float eta1=0.1, eta2=0.1;      /* learning rates used in w1 and w2 weights */
float alpha=0.9;               /* momentum coefficient */
float randmax=0.5;             /* random weights are initialized in [-R,R) */

long  epoch_counter=0;	       /* keeps track of # of training epochs */
float epoch_error;	       /* keep track of total sum^2 error each epoch */

float inp[n_inp+1];            /* input unit activations -- +1 for bias */
float hid0[n_hid],hid[n_hid+1];/* hidden unit before & after activ. fn. */
float out0[n_out],out[n_out];  /* output unit before & after activ. fn. */

float target[n_out];           /* target output values */
float dout[n_out],dhid[n_hid]; /* delta vals used in backprop computation*/
float w1[n_inp+1][n_hid];      /* input->hidden weight matrix */
float w2[n_hid+1][n_out];      /* hidden->output weight matrix */
float dw1[n_inp+1][n_hid];     /* accumulates weight changes to w1 per-epoch */
float dw2[n_hid+1][n_out];     /* accumulates weight changes to w2 per-epoch */
float prev_dw1[n_inp+1][n_hid];/* previous epoch's change to w1 */
float prev_dw2[n_hid+1][n_out];/* previous epoch's change to w2 */

enum actfntype {sigm,line,gauss}
  h_actfn = sigm, o_actfn = sigm;  /* activation fn to use on each layer */

/*** Prototypes ***/

#define sqr(x) ((x)*(x))
#define rnd(lo,hi) ((lo) + ((hi)-(lo))*(rand()/2147483647.0))
void  initialize();
void  train_one_epoch();
void  forward_pass();
void  backward_pass();
void  update_weights();

/***/

int main ()
{
  initialize();

  do {
    train_one_epoch();
    epoch_counter ++;

    if (epoch_counter % 10 == 0) {
      printf("Epoch %d:  total error = %7.4f\n",
	     epoch_counter, epoch_error);
    }
  } while (epoch_counter < 100000 && epoch_error >= 0.01);

  printf("\nBackprop quit after epoch %d with error %7.4f\n",
	 epoch_counter, epoch_error);
}

void initialize()
{
  int i,h,o;
  printf("Initializing %d->%d->%d network:\n", n_inp,n_hid,n_out);
  printf("\teta = (%f,%f), alpha = %f, randmax = %f\n",
	 eta1,eta2,alpha,randmax);
  printf("\t%d training patterns\n", n_pat);

  /* initialize bias units */
  inp[n_inp] = 1.0;
  hid[n_hid] = 1.0;

  /* initialize input->hidden weights */
  for (i=0; i<n_inp+1; i++) {
    for (h=0; h<n_hid; h++) {
      w1[i][h] = rnd(-randmax,randmax);
      dw1[i][h] = 0.0;
    }
  }
  /* initialize hidden->output weights */
  for (h=0; h<n_hid+1; h++) {
    for (o=0; o<n_out; o++) {
      w2[h][o] = rnd(-randmax,randmax);
      dw2[h][0] = 0.0;
    }
  }
}

void train_one_epoch ()
{
  int i,h,o,p;
  /* clear all weight deltas */
  for (i=0; i<n_inp+1; i++) for (h=0; h<n_hid; h++) dw1[i][h]=0.0;
  for (h=0; h<n_hid+1; h++) for (o=0; o<n_out; o++) dw2[h][o]=0.0;

  epoch_error = 0.0;
  for (p=0; p<n_pat; p++) {
    for (i=0;i<n_inp;i++) inp[i] = train_in[p][i]; /* set input vector */
    forward_pass();
    for (o=0;o<n_out;o++) target[o] = train_out[p][o]; /* set target vector */
    backward_pass();
  }
  update_weights ();
}

/* applies an activation function of type g to a value */
float actfn(g,value)
enum actfntype g;
float value;
{
  if (g==line) return value;
  if (g==sigm) return 1.0 / (1.0+exp(-value));
  if (g==gauss) return exp(-sqr(value));
}

/* computes the deriv. of an act.fn. g at a value, possibly using g(value) */
float actfnprime(g,value,g_value)
enum actfntype g;
float value, g_value;
{
  if (g==line) return 1.0;
  if (g==sigm) return g_value*(1.0-g_value);
  if (g==gauss) return -2.0*value*g_value;
}

/* assume the inp[] array has already been set with the desired input */
void forward_pass ()
{
  int i,h,o;
  /* propagate inputs to hidden layer */
  for (h=0; h<n_hid; h++) {
    hid0[h]=0.0;
    for (i=0; i<n_inp+1; i++) {
      hid0[h] += inp[i] * w1[i][h];
    }
    hid[h] = actfn(h_actfn, hid0[h]);
  }
  /* propagate hidden layer to outputs */
  for (o=0; o<n_out; o++) {
    out0[o]=0.0;
    for (h=0; h<n_hid+1; h++) {
      out0[o] += hid[h] * w2[h][o];
    }
    out[o] = actfn(o_actfn, out0[o]);
  }
}

/* assume the inp[] array has been set, forward_pass() has already been called
   to propagate forward those inputs, and the target[] array is also set.   */
void backward_pass ()
{
  float error,delta;
  int i,h,o;
  /* compute error at outputs */
  for (o=0; o<n_out; o++) {
    error = target[o]-out[o];
    epoch_error += sqr(error);
    dout[o] = error * actfnprime(o_actfn,out0[o],out[o]);
  }
  /* backpropagate error signal to the hidden layer */
  for (h=0; h<n_hid; h++) {
    delta=0.0;
    for (o=0; o<n_out; o++) {
      delta += dout[o] * w2[h][o];
    }
    dhid[h] = delta * actfnprime(h_actfn,hid0[h],hid[h]);
  }
  /* Now that we've got an error signal for each unit in the network,
     we can determine the weight changes & accumulate them in dw1 and dw2. */
  for (o=0; o<n_out; o++) {
    for (h=0; h<n_hid+1; h++) {
      dw2[h][o] += eta2 * dout[o] * hid[h];
    }
  }
  for (h=0; h<n_hid; h++) {
    for (i=0; i<n_inp+1; i++) {
      dw1[i][h] += eta1 * dhid[h] * inp[i];
    }
  }
}

/* update the w1 and w2 weights using the accumulated changes in dw1 and dw2
   as well as a momentum term involving the last epoch's total weight change.*/
void update_weights()
{
  int i,h,o;
  for (i=0; i<n_inp+1; i++) {
    for (h=0; h<n_hid; h++) {
      w1[i][h] += ( prev_dw1[i][h] = dw1[i][h] + alpha*prev_dw1[i][h] );
    }
  }
  for (h=0; h<n_hid+1; h++) {
    for (o=0; o<n_out; o++) {
      w2[h][o] += ( prev_dw2[h][o] = dw2[h][o] + alpha*prev_dw2[h][o] );
    }
  }
}
