/****** Testing performance of fgrep *****************

Reads the simufile, collecting performance statistics for each snapshot.
Figures computed: average error per output unit, 
                  percentage of units within different error ranges 
		    (e.g. within 0.15 of the correct value),
		  percentage of output words which are closest to the correct 
                    lexicon entry (all words)
		  percentage of output words which are closest to the correct 
                    lexicon entry (counting only words which have synonyms)
		  a word by word list of #of occurrences and #of correctly
                    output words

Different error ranges can be specified within [0.0, 1.0] by changing 
  the table entries below. 
Damage resistance is tested by removing n last units from the representation, 
  values for n specified below.
In itemized_display mode the figures are computed for test each sentence 
separately. If itemized display is off, only the average for the whole 
test set is output.

1st parameter: simufile
2nd          : testfile
     
     Risto Miikkulainen 7/13/1987 
     major perestroika 7/7/89

***********************************************************************/
  
#include <stdio.h>
#include <math.h>
  
#define snapshotend 999999999
#define toks 0                          /* tokens / no */

/* max table dimensions */
#define maxrep 75              /* maximum size of the representations */
#define maxsents 2000          /* maximum number of sentences */
#define maxwordl 30            /* maximum length of input words (chars) */
#define maxinpas 9             /* maximum number of input assemblies */
#define maxoutas 9             /* maximum number of output assemblies */
#define maxsnaps 50            /* maximum number of snapshots */
#define maxphase 5             /* maximum number of phases */
#define maxwords 750

/* some parameters for testing */
#define blanklength 0.15   /* a vector shorter than this is considered blank */
#define itemized_display 0     /* 1=statistics output for each sentence */
#define maxerr 1               /* # of different error ranges tested */
#define maxdam 1               /* # of degrees of damage tested */
/*#define maxdam 13*/

int fildes, nphase, ninpas, noutas, ninprep, noutrep, nwordrep, nhidrep, 
  nwords, phase, epoch, geners[maxsents], nsents, seed;
long phaseends[maxphase], snapshots[maxsnaps];
float colors[256][3], etas[maxphase], eta;

/* vars for collecting statistics */
int correctwords[maxdam],
  testwordarray[maxwords+1], *testwords,
  testwordsallarray[maxwords+1], *testwordsall,
  testwordscorrectarray[maxdam][maxwords+1], *testwordscorrect[maxdam],
  nalltok;
/* error ranges and damages tested: remember to change maxerr and maxdam also*/
float err[maxerr]={0.15}, 
  damage[maxdam]={0.0},
/*  damage[maxdam]={0.0, 0.1, 0.2, 0.3, 0.4, 0.45, 0.55, 0.6, 0.7, 0.8, 0.9, 0.95, 1.0},*/
  totnerr[maxdam][maxerr], deltasum[maxdam];

/* representation indeces */
int inpnums[maxsents][maxinpas], tchnums[maxsents][maxoutas];

/* units and weights */
float inprep[maxinpas][maxrep], outrep[maxoutas][maxrep],
  tchrep[maxoutas][maxrep],  hidrep[maxrep],
  wih[maxinpas][maxrep][maxrep],  who[maxoutas][maxrep][maxrep];

/* filenames */
char simufile[100], wordfile[100], inputfile[100];
FILE *fp;

/* lexicon */
struct lexicon {
  char chars[maxwordl];
  float rep[maxrep];
} wordarray[maxwords+1],*words;


/*********************  main control ************************/

main(argc,argv)
int argc; char *argv[];
{
  sprintf(simufile, "%s", argv[1]);
  init_params(fp);
  sprintf(inputfile, "%s", argv[2]);
  read_inputs();
  init_test();
  iterate_snapshots();
  exit(0);
}


iterate_snapshots()
/* go through the saved snapshots, collecting performance statistics */
{
  int readfun();
  float cume;

  fp=fopen(simufile,"r");
  read_params(fp);

  while (fscanf(fp,"%d",&epoch)!=EOF) /* read the epoch */
    {
      fscanf(fp, "%f", &cume);  /* read away the error */
      iterate_weights(readfun); /* read the current word reps and weights */
      srand48(seed);  /* we want to test the same ID:s for all snapshots */
      iterate_inputs();         /* collect statistics for one epoch */
    }
  fclose(fp);
}


iterate_inputs()
/* go through one epoch */
{
  register int i,j;
  get_current_params();
  init_snapshot();
  print_header();
  for(i=0; i<nsents; i++)
    sentence(i);
  print_footer();
  print_words();
}


sentence(senti)
/* process one sentence */
int senti;
{
  register int i,j;
  int *inpdataptr;
  
#if toks
  /* when testing with random ID:s, randomize tokens here */
  /*randomize_tokens();*/ /*randomize each sentence separately, stats better*/
#endif
  /* get the current input and teaching patterns from the lexicon */
  for (i=0; i<ninpas; i++)
    for(j=0; j<ninprep;j++)
      inprep[i][j]=words[inpnums[senti][i]].rep[j];
  for(i=0;i<noutas;i++)
    for(j=0;j<noutrep;j++)
      tchrep[i][j]=words[tchnums[senti][i]].rep[j];
  propagate_and_print(senti);
}

propagate_and_print(senti)
int senti;
{
  register int d,i,j,k;
  int nokinprep; /* upper bound for units included in the rep */
  float sigmoid();
  
  if (itemized_display)
    {
      printf(" %2d. ",geners[senti]);
      for(i=0; i<ninpas; i++) printf("%-8s", words[inpnums[senti][i]].chars); 
    }
  for(d=0; d<maxdam; d++)
    {
      nokinprep=ninprep-((int) (damage[d]*ninprep));
      forward_prop(nokinprep);
      cumulate_error(d);
      cumulate_nearest(senti,d);
      cumulate_within(d);
    }
  if (itemized_display) printf("\n");
}

/*********************  initializations ******************************/

init_params()
{
  register int i;

  fp=fopen(simufile,"r");
  read_params(fp);
  fclose(fp);

  testwords = testwordarray+1;
  testwordsall=testwordsallarray+1;
  for(i=0; i<maxdam; i++)
    testwordscorrect[i]=testwordscorrectarray[i]+1;
  words = wordarray+1;		/* blank word for empty display */
}


read_params(fp)
FILE *fp;
{
  char s[100];
  register int i;
  int temp;

  /* simulation parameters */
  fscanf(fp,"%s", wordfile); fgets(s,99,fp);
  fscanf(fp,"%s", s); fgets(s,99,fp); /* we don't care what the traininp was*/
  fscanf(fp,"%s", s); fgets(s,99,fp); /* or the colormap */
  fscanf(fp,"%s", s); fgets(s,99,fp); /* or the outdevice */
  fscanf(fp,"%s", s); fgets(s,99,fp); /* or the outdriver */
  fscanf(fp,"%d %d", &nwordrep, &nhidrep); fgets(s,99,fp);
  fscanf(fp, "%d %d %d %d", &temp, &temp, &seed, &nphase); fgets(s,99,fp);
  for(i=0; i<nphase; i++)
    fscanf(fp,"%d",&phaseends[i]); fgets(s,99,fp);
  for(i=0; i<nphase; i++)
    fscanf(fp,"%f",&etas[i]); fgets(s,99,fp);
  
  /* saving info */
  fscanf(fp,"%d", &snapshots[0]);
  for(i=0; i<maxsnaps && snapshots[i]<snapshotend; i++)
    fscanf(fp,"%d", &snapshots[i+1]);
  fgets(s,99,fp);
  ninprep=noutrep=nwordrep;
}


read_inputs()
{
  char s[100];
  register int i,j;
  int c;

  /* read the words */
  fp=fopen(wordfile,"r");
  for(i=0; fscanf(fp,"%s", words[i].chars)!=EOF; i++);
  nwords=i;
  fclose(fp);

  /* read the number of necessary assemblies */
  fp=fopen(inputfile,"r");
  fscanf(fp, "%d %d", &ninpas, &noutas); fgets(s,99,fp);

  /* read the input sentences */
  for(i=0; (c=getc(fp))!=EOF; i++)
    {
      ungetc(c, fp);
      for(j=0; j<ninpas; j++)
	fscanf(fp,"%d",&inpnums[i][j]);
      for(j=0; j<noutas; j++)
	fscanf(fp,"%d",&tchnums[i][j]);
      fscanf(fp, "%1s%d", s, &geners[i]);
      fgets(s,99,fp);
    }
  nsents=i;
  fclose(fp);
}


init_test()
/* initialize the statistics variables */
{
  register int i,j;
  
  for(i=(-1); i<nwords; i++)
    testwords[i]=i;  /* list of words which are tested */
  for(i=0; i<nsents; i++)
    for (j=0; j<noutas; j++)
      testwordsall[ tchnums[i][j] ]++; /* # of occurrences of each word */
  nalltok= noutas*nsents - /* number of words with synonyms */
	 testwordsall[-1] - testwordsall[0] - testwordsall[1] - 
	 testwordsall[2] - testwordsall[3] - testwordsall[5] -
	 testwordsall[9] - testwordsall[10] - testwordsall[11] -
	 testwordsall[12] - testwordsall[24] - testwordsall[26] -
	 testwordsall[28] - testwordsall[30] - testwordsall[31] -
	 testwordsall[32];
}


init_snapshot()
{
  register int i,j;
  for(i=0; i<maxdam; i++)
    {
      for(j=0; j<maxerr; j++)
	totnerr[i][j]=0; /* number of units within error range */
      deltasum[i]=0.0;   /* cumulative error over all units */
      correctwords[i]=0;
      for(j=(-1); j<nwords; j++)
	testwordscorrect[i][j]=0;
    }
}

/*******************   backprop  ************************************/

forward_prop(nokinprep)
int nokinprep;
{
  register int i,j,k,p;
  float sigmoid();
  
  for(k=0; k<nhidrep; k++)
    {
      hidrep[k]=0.0;
      for(i=0; i<ninpas; i++)
	{
	  for(j=0; j<nokinprep; j++)
	    hidrep[k] += inprep[i][j]*wih[i][j][k];
	  for(j=nokinprep; j<ninprep; j++)
	    hidrep[k] += 0.5*wih[i][j][k];
	}
      hidrep[k]=sigmoid(hidrep[k]);
    }
  for(i=0; i<noutas; i++)
    for(j=0;j<noutrep; j++)
      {
	outrep[i][j]=0;
	for(k=0; k<nhidrep; k++)
	  outrep[i][j] += hidrep[k]*who[i][j][k];
	outrep[i][j] = sigmoid(outrep[i][j]);
      }
}      


/*******************   collect data  ************************************/

cumulate_within(d)
/* cumulate data for counting units within error range */
int d;
{
  register int i,j,k;
  int nerr[maxerr];
  for(i=0; i<maxerr; i++) nerr[i]=0.0;
  for(i=0; i<noutas; i++)
    for(j=0;j<noutrep; j++)
      for(k=0; k<maxerr; k++)
	if (fabs(outrep[i][j]-tchrep[i][j]) < err[k]) nerr[k]++;
      
  for(i=0; i<maxerr; i++)
    {
      if (itemized_display) printf("%6.1f", 100.0*nerr[i]/(noutas*noutrep));
      totnerr[d][i] += nerr[i];
    }
}


cumulate_error(d)
/* cumulate data for average error per unit */
int d;
{
  register int i,j;
  float locdsum=0.0;
  for(i=0; i<noutas; i++)
    for(j=0; j<noutrep; j++)
      locdsum += fabs(tchrep[i][j]-outrep[i][j]);
  if (itemized_display) printf("%6.3f", locdsum/(noutas*noutrep));
  deltasum[d] += locdsum;
}

cumulate_nearest(senti,d)
/* cumulate data for counting the number of words nearest to the correct
   lexicon entry */
int senti,d;
{
  register int i,j;
  int correctnum=0;
  for(i=0; i<noutas; i++)
/* change these to switch between normalized and unnormalized distances */
/*    if(determine_norm_nearest(outrep[i], noutrep) == tchnums[senti][i])*/
    if(determine_nearest(outrep[i], noutrep) == tchnums[senti][i])
      {
	correctnum++;
	testwordscorrect[d][ tchnums[senti][i] ]++;
      }
/*    else
      printf("%s > %s\n", words[tchnums[senti][i]].chars,
	     words[determine_nearest(outrep[i], noutrep)].chars);*/
    
  correctwords[d] += correctnum;
}

int determine_nearest(rep,nrep)
int nrep;
float rep[];
{
  int i,bestindex;
  float lbest, dist, distance();
  lbest=999999999.9;
  for(i=(-1); i<nwords; i++)
    {
      dist=distance(rep,words[i].rep,nrep);
      if(dist<lbest)
	{
	  bestindex=i;
	  lbest=dist;
	}
    }
  return(bestindex);
}


float distance(v1, v2, nrep)
/* distance of two vectors */
float v1[], v2[];
int nrep;
{
  float sum=0.0;
  register int i;
  for(i=0; i<nrep; i++)
    sum += (v1[i]-v2[i])*(v1[i]-v2[i]);
  return(sqrt(sum));
}


int determine_norm_nearest(rep,nrep)
int nrep;
float rep[];
{
  int i,bestindex;
  float lbest, dist, sum=0.0, norm_distance();
  for(i=0; i<nrep; i++)
    sum += rep[i];
  if (sum/nrep< blanklength) return(-1);
  lbest=999999999.9;
  for(i=0; i<nwords; i++)
    {
      dist=norm_distance(rep,words[i].rep,nrep);
      if(dist<lbest)
	{
	  bestindex=i;
	  lbest=dist;
	}
    }
  return(bestindex);
}


float norm_distance(v1, v2, nrep)
/* distance of two normalized vectors */
float v1[], v2[];
int nrep;
{
  float sum=0.0, sum1=0.0, sum2=0.0;
  register int i;
  for(i=0; i<nrep; i++)		/* first normalize */
    sum1 += v1[i];
  for(i=0; i<nrep; i++)
    sum2 += v2[i];
  for(i=0; i<nrep; i++)
    sum += (v1[i]/sum1-v2[i]/sum2)*(v1[i]/sum1-v2[i]/sum2);
  return(sqrt(sum));
}


/*******************   print out results  *****************************/

print_header()
{
  register int i,j;
  printf("\n\nInput file: %s   Repr.size: %d   Hidden layer size: %d\n",
	 inputfile, nwordrep, nhidrep);
  printf("Phase: %-2d  Epoch: %-2d  Eta: %-4.2f  Damage%%:  ",
	 phase+1,epoch,eta);
  for(i=0; i<maxdam; i++)
    {
      printf("%-6.1f", 100.0*((int) (damage[i]*nwordrep))/nwordrep);
      for(j=0; j<6*maxerr; j++) printf(" ");
    }
  printf("\nGnrno Input %7s Within%%: %8s"," "," ");
  for(i=0; i<maxdam; i++)
    {
      printf("%6s", " ");
      for(j=0; j<maxerr; j++) printf("%6.1f",100.0*err[j]);
    }
  printf("\n");
  if (itemized_display)  
    {
      for(i=0; i<5+ninpas*8+maxdam*(maxerr*6+6); i++)
	printf("-");
      printf("\n");
    }
}

print_footer()
/* averages for the whole epoch */
{
  register int i,j;
  if (itemized_display)  
    {
      for(i=0; i<5+ninpas*8+maxdam*(maxerr*6+6); i++)
	printf("-");
      printf("\n");
    }
  printf("AVERAGE %29s"," ");
  for(i=0; i<maxdam; i++)
    {
      printf("%6.3f", deltasum[i]/(noutas*noutrep*nsents));
      for(j=0; j<maxerr; j++)
	printf("%6.1f", 100.0*totnerr[i][j]/(noutas*noutrep*nsents));
    }
  printf("\n");
}


print_words()
/* list of words and hits for the whole epoch */
{
  register int i,j;
  int correcttok;
  printf(" \nAll words:   out of %d, correct", noutas*nsents);
  for(i=0; i<maxdam; i++)
    printf(" %d (%3.1f%%) ",
	   correctwords[i], 100.0*correctwords[i]/(noutas*nsents));
  printf(" \nSynon. words: out of %d, correct", nalltok );
  for(i=0; i<maxdam; i++)
    {
      correcttok = correctwords[i] -
	testwordscorrect[i][-1] - testwordscorrect[i][0] -
	testwordscorrect[i][1] - testwordscorrect[i][2] -
	testwordscorrect[i][3] - testwordscorrect[i][5] -
	testwordscorrect[i][9] - testwordscorrect[i][10] -
	testwordscorrect[i][11] - testwordscorrect[i][12] -
	testwordscorrect[i][24] - testwordscorrect[i][26] -
	testwordscorrect[i][28] - testwordscorrect[i][30] -
	testwordscorrect[i][31] - testwordscorrect[i][32];
      printf(" %d (%3.1f%%) ", correcttok,
	     100.0*correcttok/nalltok);
    }
  printf("\n");
  for(i=(-1); i<nwords; i++)
    if (testwordsall[i]>0)
      {
	printf("%10s: %2d ", words[testwords[i]].chars, testwordsall[i]);
	for (j=0; j<maxdam; j++)
	  printf(" %2d", testwordscorrect[j][i]); 
	printf("\n");
      }
}


/*********************** I/O etc functions *****************/ 

get_current_params()
{
  /* update the necessary simulation parameters */
  int phase;
  for(phase=nphase-1;                   /* phase */
      phase>0 && phaseends[phase-1]>=epoch;
      phase--) {}
  eta=etas[phase];			/* eta */
/*  printf("epoch=%d, phase=%d, eta=%f, time=%d\n", epoch,phase,eta,time(0));*/
}


float sigmoid(activity)
float activity;
{
  /* transform the activity to a sigmoid response between 0 and 1 */
  return(1.0/(1.0+exp(-activity)));
}


iterate_weights(dofun,par1,par2,par3,par4)
int (*dofun)();
float par1,par2,par3,par4;
{
  register int i,j,k;

    for(i=0; i<nwords; i++)
      for(j=0; j<nwordrep; j++)
        (*dofun)(&words[i].rep[j],par3,par4);
  
  for(i=0; i<ninpas; i++)
    for(j=0; j<ninprep; j++)
      for(k=0; k<nhidrep; k++)
	(*dofun)(&wih[i][j][k],par1,par2);
  
  for(i=0; i<noutas; i++)
    for(j=0; j<noutrep; j++)
      for(k=0; k<nhidrep; k++)
	(*dofun)(&who[i][j][k],par1,par2);
}  


readfun(place)
float *place;
{
  fscanf(fp,"%f", place);
} 


#if toks
/* use this when cloning the synonymous words */
randomize_tokens()
{
  register int i,j;

  for(j=0; j<2; j++)
    {
      randfun(&words[4].rep[j], 0.0, 1.0);
      randfun(&words[6].rep[j], 0.0, 1.0);
      randfun(&words[7].rep[j], 0.0, 1.0);
      randfun(&words[8].rep[j], 0.0, 1.0);
      randfun(&words[13].rep[j], 0.0, 1.0);
      randfun(&words[14].rep[j], 0.0, 1.0);
      randfun(&words[15].rep[j], 0.0, 1.0);
      randfun(&words[16].rep[j], 0.0, 1.0);
      randfun(&words[17].rep[j], 0.0, 1.0);
      randfun(&words[18].rep[j], 0.0, 1.0);
      randfun(&words[19].rep[j], 0.0, 1.0);
      randfun(&words[20].rep[j], 0.0, 1.0);
      randfun(&words[21].rep[j], 0.0, 1.0);
      randfun(&words[22].rep[j], 0.0, 1.0);
      randfun(&words[23].rep[j], 0.0, 1.0);
      randfun(&words[25].rep[j], 0.0, 1.0);
      randfun(&words[27].rep[j], 0.0, 1.0);
      randfun(&words[29].rep[j], 0.0, 1.0);
    }  
}

/* use this when cloning all words */
/*randomize_tokens()
{
  register int i,j;

  for (i=4; i<nwords; i++)
    for(j=0; j<2; j++)
      randfun(&words[i].rep[j], 0.0, 1.0);
  
}*/
#endif

randfun(place,par1,par2)
float *place, par1,par2;
{
  *place = par1+par2*drand48();
} 


