# include "Tools.h"
# include "Timit.h"
# include "RM.h"

# define MAX_LENGTH		TIMIT_MAX_NFRAME_PER_ALLOPHONE
# define DEFAULT_NINP		RM1_DEFAULT_NINP
# define DUR_OFFSET		0.5
# define FOL_OFFSET		0.5
# define OCC_OFFSET		0.5
# define DEFAULT_LO_LIMIT    	(3.0 / 32.0)
# define DEFAULT_HI_LIMIT    	(1.0 / 512.0)
# define EXAMPLE_PHONE		"eh"
# define DEFAULT_PHNTAB		RM1_DEFAULT_PHNTAB
# define MAX_SILENCE_NSTATE	3
# define DEFAULT_NSTATERATIO	0.5

int main(int argc, char **argv) {
  FILE   *fp_pre, *fp_dur;
  float  **pduration, **pfollow, *slprob, nstateratio;
  uchar  *frame;
  int    *noccur, *min_duration, *max_duration, **nduration, **nfollow;
  int	 half, *nstate, nophn, nframe;
  int    i, j, nphone, size, length, markov, verbose, self_loop, original;
  int	 curr, prev = TIMIT_SILENCE_INDEX, last = TIMIT_SILENCE_INDEX;
  char   **plist;
  
  if(Scan_flag(argc, argv, "-h") == TRUE || argc <= 2 ||
     (fp_pre = Std_fopen(argv[argc - 2], "r")) == NULL ||
     (fp_dur = Std_fopen(argv[argc - 1], "w")) == NULL) {
    fprintf(stderr, "Syntax:\t%s\n", *argv);
    fprintf(stderr, "\t-pfp\n");
    fprintf(stderr, "\t-half\n");
    fprintf(stderr, "\t-nophn\n");
    fprintf(stderr, "\t-graph\n");
    fprintf(stderr, "\t-apriori\n");
    fprintf(stderr, "\t-markov\n");
    fprintf(stderr, "\t-verbose\n"); 
    fprintf(stderr, "\t-self_loop\n");
    fprintf(stderr, "\t-histogram\n");
    fprintf(stderr, "\t-dictionary\n");
    fprintf(stderr, "\t-ninp %d\n", DEFAULT_NINP);
    fprintf(stderr, "\t-lo_limit %f\n", DEFAULT_LO_LIMIT);
    fprintf(stderr, "\t-hi_limit %f\n", DEFAULT_HI_LIMIT);
    fprintf(stderr, "\t-nstateratio %f\n", DEFAULT_NSTATERATIO);
    fprintf(stderr, "\t-phntab %s\n", DEFAULT_PHNTAB);
    fprintf(stderr, "\t<-|preprocessed file>\n");
    fprintf(stderr, "\t<-|duration file>\n");
    exit(1);
  }

  plist = Read_table(Scan_string(argc, argv,"-phntab",DEFAULT_PHNTAB),&nphone);

  half	    = Scan_flag(argc, argv, "-half");
  nophn	    = Scan_flag(argc, argv, "-nophn");
  self_loop = Scan_flag(argc, argv, "-self_loop");
  markov    = Scan_flag(argc, argv, "-markov");
  verbose   = Scan_flag(argc, argv, "-verbose");
  original  = Scan_flag(argc, argv, "-original");
  size      = Scan_int(argc, argv, "-ninp", DEFAULT_NINP) + 1;
  nstateratio = Scan_float(argc, argv, "-nstateratio", DEFAULT_NSTATERATIO);

  frame	    = (uchar*) Panic_char_array(size);
  noccur    = Panic_int_array(nphone);
  nduration = Panic_int_2d_array(nphone, MAX_LENGTH);
  pduration = Panic_float_2d_array(nphone, MAX_LENGTH);
  nstate    = Panic_int_array(nphone);
  slprob    = Panic_float_array(nphone);
  nfollow   = Panic_int_2d_array(nphone, nphone);
  pfollow   = Panic_float_2d_array(nphone, nphone);
  min_duration = Panic_int_array(nphone);
  max_duration = Panic_int_array(nphone);

  for(i = 0; i < nphone; i++)
    noccur[i] = 0;

  for(i = 0; i < nphone; i++)
    for(j = 1; j < MAX_LENGTH; j++)
      nduration[i][j] = 0;

  for(i = 0; i < nphone; i++)
    for(j = 0; j < nphone; j++)
      nfollow[i][j] = 0;

  length = 0;
  for(nframe = 0; fread(frame, 1, size, fp_pre) == size; nframe++) {
    curr = frame[0] & TIMIT_LABEL_MASK;
    if(curr != last || (frame[0] & TIMIT_SENT_MASK) != 0) {
      nduration[last][MIN(length, MAX_LENGTH)]++;
      noccur[curr]++;
      last = curr;
      length = 0;
    }
    nfollow[prev][curr]++;
    prev = curr;
    length++;
  }

  if(nframe == nfollow[0][0]) {
    fprintf(stderr, "%s: WARNING: no phone labels present\n", *argv);
    nophn = TRUE;
  }

  for(i = 0; i < nphone; i++) {
    float rsum;
    int lo_limit, hi_limit, sum = 0;
    for(j = 1; j < MAX_LENGTH; j++) sum += nduration[i][j];
    rsum = sum + DUR_OFFSET * (MAX_LENGTH - 1);
    for(j = 1; j < MAX_LENGTH; j++)
      pduration[i][j] = - log((nduration[i][j] + DUR_OFFSET) / rsum);

    lo_limit = Scan_double(argc, argv, "-lo_limit", DEFAULT_LO_LIMIT) * sum;
    hi_limit = Scan_double(argc, argv, "-hi_limit", DEFAULT_HI_LIMIT) * sum;

    sum = 0;
    for(j = MAX_LENGTH - 1; j > 0 && sum <= hi_limit; j--)
      sum += nduration[i][j];
    max_duration[i] = j + 2;

    sum = 0;
    for(j = 1; j < MAX_LENGTH && (sum += nduration[i][j]) < lo_limit; j++);
    min_duration[i] = j;
  }

  if(original) {
    Panic_fwrite((char*) *pduration, sizeof(**pduration), nphone * MAX_LENGTH,
		 fp_dur);
    for(i = 0; i < nphone; i++)
      nfollow[i][i] = 0;
  }

  if(markov || original) {
    for(i = 0; i < nphone; i++) {
      float sum = 0.0;
      for(j = 0; j < nphone; j++) sum += nfollow[i][j] + FOL_OFFSET;
      for(j = 0; j < nphone; j++)
	pfollow[i][j] = - log((nfollow[i][j] + FOL_OFFSET) / sum);
    }	
    Panic_fwrite((char*) *pfollow, sizeof(**pfollow), nphone * nphone, fp_dur);
    Panic_fwrite((char*) min_duration, sizeof(*min_duration), nphone, fp_dur);
  }

  if(self_loop) {
    if(!half) {
      for(i = 0; i < nphone; i++) {
	int sum0 = 0, sum1 = 0;

	for(j = 1; j < min_duration[i]; j++)
	  sum1 += nduration[i][j];

	for(j = min_duration[i]; j < MAX_LENGTH; j++) {
	  int n = j  - (min_duration[i] - 1);
	  sum0 += (n - 1) * nduration[i][j];
	  sum1 += n * nduration[i][j];
	}
	if(sum1 == 0 || nophn) slprob[i] = 0.5;
	else slprob[i] = (float) sum0 / (float) sum1;
      }
    }
    else {
      for(i = 0; i < nphone; i++) {
	int sum0 = 0, sum1 = 0;
	float mean = 0.0;

	for(j = 1; j < MAX_LENGTH; j++) {
	  sum0 += nduration[i][j];
	  sum1 += j * nduration[i][j];
	}
	if(sum0 == 0 || nophn) mean = 2.0;
	else  mean = (float) sum1 / (float) sum0;

	nstate[i] = floor(nstateratio * mean + 0.5);

	if(i == TIMIT_SILENCE_INDEX && nstate[i] > MAX_SILENCE_NSTATE)
	  nstate[i] = MAX_SILENCE_NSTATE;

	if(nstate[i] < 1) nstate[i] = 1;

	slprob[i] = 1.0 - (float) nstate[i] / mean;
      }

      if(Scan_flag(argc, argv, "-sed")) {
	printf("#!/bin/sh\n");
	printf("sed");
	for(i = 0; i < nphone; i++) {
	  if(nstate[i] > 1) {
	    printf(" -e s'/ %s /", plist[i]);
	    for(j = 0; j < nstate[i]; j++) printf(" %s", plist[i]);
	    printf(" /g'");
	    printf(" -e s'/ %s$/", plist[i]);
	    for(j = 0; j < nstate[i]; j++) printf(" %s", plist[i]);
	    printf("/g'");
	  }
	}
	printf("\n");
      }

      if(verbose)
	printf("%s\t%d\t%f\t%f\n", plist[i], min_duration[i], slprob[i],
	       min_duration[i] - 1.0 + 1.0 / (1.0 - slprob[i]));
    }
    Panic_fwrite((char*) slprob, sizeof(*slprob), nphone, fp_dur);
    Panic_fwrite((char*) min_duration, sizeof(*min_duration), nphone, fp_dur);
  }

  if(Scan_flag(argc, argv, "-apriori")) {
    float *poccur = Panic_float_array(nphone);
    float sum = 0.0;

    if(nophn)
      for(i = 0; i < nphone; i++) poccur[i] = 1.0 / nphone;
    else {
      for(i = 0; i < nphone; i++) sum += noccur[i] + OCC_OFFSET;
      for(i = 0; i < nphone; i++) poccur[i] = (noccur[i] + OCC_OFFSET) / sum;
    }
    if(verbose) 
      for(i = 0; i < nphone; i++)
	printf("%s\t%f\n", plist[i], poccur[i]);
    Panic_fwrite((char*) poccur, sizeof(*poccur), nphone, fp_dur);
    Panic_free((char*) poccur);
  }    

  if(Scan_flag(argc, argv, "-pfp")) {
    int   *noccur = Panic_int_array(nphone);
    float *poccur = Panic_float_array(nphone);
    float sum = 0.0;

    for(i = 0; i < nphone; i++) {
      int nsum = 0;

      for(j = 0; j < nphone; j++) nsum += nfollow[i][j];
      noccur[i] = nsum;
    }

    for(i = 0; i < nphone; i++) sum += noccur[i] + OCC_OFFSET;
    for(i = 0; i < nphone; i++) poccur[i] = (noccur[i] + OCC_OFFSET) / sum;
    if(verbose) 
      for(i = 0; i < nphone; i++)
	printf("%s\t%f\n", plist[i], poccur[i]);
    Panic_fwrite((char*) poccur, sizeof(*poccur), nphone, fp_dur);
    Panic_free((char*) noccur);
    Panic_free((char*) poccur);
  }    

  if(Scan_flag(argc, argv, "-histogram")) {
    int k, max = 0;

    for(k = 0; k < MAX_LENGTH; k++) {
      int sum = 0;
      for(i = 0; i < nphone; i++) sum += nduration[i][k];
      if(sum > max) max = sum;
    }
      
    for(k = 0; k <= MAX_LENGTH; k++) {
      int n, sum = 0;

      printf("all\t%d\t", k);
      for(i = 0; i < nphone; i++) sum += nduration[i][k];
      for(n = 0; n < 64 * sum / max; n++)
	printf("*");
      printf("\n");
    }

    for(i = 0; i < nphone; i++) {
      int k, max = 0;

      for(k = 0; k <= max_duration[i]; k++)
	if(nduration[i][k] > max) max = nduration[i][k];
      
      for(k = 0; k <= max_duration[i]; k++) {
	int n;

	printf("%s\t%d\t", plist[i], k);
	for(n = 0; n < 64 * nduration[i][k] / max; n++)
	  printf("*");
	printf("\n");
      }
    }
  }

  if(Scan_flag(argc, argv, "-dictionary"))
    for(i = 0; i < nphone; i++) {
      printf("%s", plist[i]);
      for(j = 0; j < max_duration[i]; j++)
	printf(" %s", plist[i]);
      printf("\n");
    }

  if(Scan_flag(argc, argv, "-graph")) {
    int k, phone = String2index(EXAMPLE_PHONE, plist, nphone), sum = 0.0;
    float sum0, sum1, prob;
    FILE *ftxt;

    ftxt = Panic_fopen("fig_dur.txt", "w");
    fprintf(ftxt, "set data style linespoints\n");
    fprintf(ftxt, "set xlabel \"Phone duration/ms\"\n");
    fprintf(ftxt, "set ylabel \"p(phone duration)\"\n");
    fprintf(ftxt, "# set terminal latex\n");
    fprintf(ftxt, "plot \"fig_observed.txt\" title \"observed\", \"fig_unbounded.txt\" title \"unbounded\", \"fig_bounded.txt\" title \"bounded\"\n");
    fprintf(ftxt, "pause -1\n");
    Panic_fclose(ftxt);


    ftxt = Panic_fopen("fig_observed.txt", "w");
    for(k = 0; k <= max_duration[phone]; k++) sum += nduration[phone][k];
      
    for(k = 0; k <= max_duration[phone]; k++)
      fprintf(ftxt, "%d\t%f\n", k, (float) nduration[phone][k] / (float) sum);
    Panic_fclose(ftxt);


    ftxt = Panic_fopen("fig_unbounded.txt", "w");
    sum0 = sum1 = 0.0;
    for(k = 1; k < max_duration[phone]; k++) {
      int n = k;
      sum0 += (n - 1) * nduration[phone][k];
      sum1 += n * nduration[phone][k];
    }
    prob = (float) sum0 / (float) sum1;
    fprintf(ftxt, "0	0.0\n");
    for(k = 1; k <= max_duration[phone]; k++)
      fprintf(ftxt, "%d\t%f\n", k, pow(prob, (float) k) * (1.0 - prob) / prob);
    Panic_fclose(ftxt);


    ftxt = Panic_fopen("fig_bounded.txt", "w");
    sum0 = sum1 = 0.0;
    for(k = 1; k < min_duration[phone]; k++)
      sum1 += nduration[phone][k];

    for(k = min_duration[phone]; k <= max_duration[phone]; k++) {
      int n = k  - (min_duration[phone] - 1);
      sum0 += (n - 1) * nduration[phone][k];
      sum1 += n * nduration[phone][k];
    }
    prob = (float) sum0 / (float) sum1;
    for(k = 0; k < min_duration[phone]; k++)
      fprintf(ftxt, "%d\t0.0\n", k);
    for(k = min_duration[phone]; k <= max_duration[phone]; k++)
      fprintf(ftxt, "%d\t%f\n", k, pow(prob, (float) (k - min_duration[phone] + 1)) * (1.0 - prob) / prob);
    Panic_fclose(ftxt);
  }
  exit(0);
}
