# include "Tools.h"
# include "Timit.h"
# include "RM.h"

/* # define MKDSPLIKE */
# define LABEL_TYPE  int
# define OUTPUT_TYPE float

# define MAX_ALL_SIZE       8388608	/* was 831488, before that 640000 */
# define MAX_ADC_SIZE	    (MAX_ALL_SIZE / 2)
# define SIZEOF_SHORT	    2
# define MAX_PHN_SIZE       2048				/* in bytes  */
# define MAX_NPHONE         256
# define MAX_LPORDER	    128
# define NOISE_3DB_FREQ	    600.0
# define F0ALPHA	    0.95
# define POWER_SCALE	    (1.0 / 16.0)

# define DEFAULT_MINF	    60.0
# define DEFAULT_MAXF	    (TIMIT_SAMPLE_RATE / 2)
# define DEFAULT_SAMPLEF    (TIMIT_SAMPLE_RATE)
# define MAX_PITCH_FREQ	    (400.0)
# define MIN_PITCH_FREQ	    (64.0)
# define GMF0		    (148.0)
# define PVOICELIMIT	    (0.4)  /* all TIMIT phones were split into voiced
				      and unvoiced - this is the Bayes
				      decision on acf[peak]/acf[0] */
# define DEFAULT_PHNTAB	    RM1_DEFAULT_PHNTAB
# define DEFAULT_ADC_PREFIX RM1_CDROM_PATH
# define DEFAULT_PHN_PREFIX RM1_LOCAL_PATH
# define DEFAULT_ADC_SUFFIX ".wav"
# define DEFAULT_PHN_SUFFIX ".phn"

# define DEFAULT_NOISE	    0.0
# define DEFAULT_NPOWER     1
# define DEFAULT_NLPACF     0
# define DEFAULT_NLPFILT    0
# define DEFAULT_NLPBARK    0
# define DEFAULT_NLPCEPS    0
# define DEFAULT_NLPAREA    0
# define DEFAULT_NLPPPOS    0
# define DEFAULT_NLPPAMP    0
# define DEFAULT_NLPSGDS    0
# define DEFAULT_LPORDER    16
# define DEFAULT_LPSMOOTH   1.0
# define DEFAULT_NFTBARK    20
# define DEFAULT_NFTBSCP    0
# define DEFAULT_NFTPPOS    0
# define DEFAULT_NFTPAMP    0
# define DEFAULT_NFTSTDD    0
# define DEFAULT_NFTNCHN    0
# define DEFAULT_FRAMESIZE  256
# define DEFAULT_WINDOWSIZE 512
# define DEFAULT_HEADERSIZE TIMIT_HEADER_SIZE
# define DEFAULT_OFFSET     0
# define DEFAULT_COMPRESS   (1.0 / 3.0)
# define NCRUNCHERS         64

# define LIMIT(freq, limit) (freq < limit) ? freq : limit - 1

double (*Nonlinear2Hz)(double);
double (*Hz2Nonlinear)(double);

int	this_read_file(char*, char*, char*, int, char*);
int	get_phn(struct timing*, int, int, int);
void	lpc2ceps(float*, int, float*, int);
void	lpc2chan(float*, int, float*, int, float, float, float);
void	wave2zc(float*, int, float*);
void	wave2lnpow(float*, int, int, OUTPUT_TYPE*);
float	fft2halfp(float*, int);
void	fft2chan(float*, int, float*, int, float, float, float);
void	fft2f0norm(float*, int, float*, int, float, float, float, float);
int	fft2stdd(float*, int, float*, int, float, float, float);
int	spec2ceps(float*, int, float*, int);
void	lpc2sgds(float*, int, float*, int, float, float, float);
void	spec2ppos(float*, int, float*, int);
void	spec2pamp(float*, int, float*, int);
int	sprintf_dummy_phn(char*, int);
int	get_gender(char*);

int main(int argc, char **argv) {
  static FILE   *fp_dbl, *fp_pre;
  static float  *windowf, *window, *pspec, *bspec, *autoc;
  static float  compress, lpsmooth, noise, f0smooth = 0.0, minf, maxf, samplef;
  static int    i, norm, zc, f0, f0now, voice, verbose, peaksize;
  static int    nlpacf, nlpfilt, nlpchan, nlpceps, nlparea, nlpppos, nlppamp;
  static int    need_lp, need_ft, headersize, nophn, trimends, noswap,ftnonorm;
  static int	nftchan, nftbscp, nftppos, nftpamp, nftstdd, f0norm, halfp;
  static int    nphone, noutput, npower, lporder, wfdiff, nplist;
  static int    framesize, windowsize, sizeof_output, offset;
  static int    adc_size, phn_size, pre_size, nlpsgds, gender, gender_data;
  static char   **plist;
  static char   *text, *adc_prefix, *phn_prefix, *adc_suffix, *phn_suffix;
  static float  *flt_data;
  static char   base_name[MAX_NAME_SIZE];
  static char   all_data[MAX_ALL_SIZE];
  static char   phn_data[MAX_PHN_SIZE];
  static struct timing phn_time[MAX_NPHONE];

  if(sizeof(LABEL_TYPE) != sizeof(OUTPUT_TYPE))
    Panic("%s: sizeof(LABEL_TYPE) != sizeof(OUTPUT_TYPE)\n", *argv);

  if(strcmp(TOOLS_TYPE, "float") != 0)
    Panic("%s: assumes that Tools_type is float\n", *argv);

  if(Scan_flag(argc, argv, "-h") || argc <= 2 ||
     (fp_dbl = Std_fopen(argv[argc - 2], "r")) == NULL ||
     (fp_pre = Std_fopen(argv[argc - 1], "w")) == NULL) {
    FILE *stream;
    stream = Scan_flag(argc, argv, "-h") ? stdout : stderr;
    fprintf(stream, "Syntax:\t%s\n", *argv);
    fprintf(stream, "\t-verbose\n");
    fprintf(stream, "\t-norm\n");
    fprintf(stream, "\t-nophn\n");
    fprintf(stream, "\t-noswap\n");
    fprintf(stream, "\t-trimends\n");
    fprintf(stream, "\t-log\n");
    fprintf(stream, "\t-erb\n");
    fprintf(stream, "\t-mel\n");
    fprintf(stream, "\t-bark\n");
    fprintf(stream, "\t-basilarmm\n");
    fprintf(stream, "\t-nonlinear_exit\n");
    fprintf(stream, "\t-zc\n");
    fprintf(stream, "\t-f0\n");
    fprintf(stream, "\t-f0now\n");
    fprintf(stream, "\t-halfp\n");
    fprintf(stream, "\t-f0norm\n");
    fprintf(stream, "\t-voice\n");
    fprintf(stream, "\t-gender\n");
    fprintf(stream, "\t-peaksize\n");
    fprintf(stream, "\t-noise %f\n", DEFAULT_NOISE);
    fprintf(stream, "\t-npower %d\n", DEFAULT_NPOWER);
    fprintf(stream, "\t-nlpacf %d\n", DEFAULT_NLPACF);
    fprintf(stream, "\t-nlpfilt %d\n", DEFAULT_NLPFILT);
    fprintf(stream, "\t-nlpchan %d\n", DEFAULT_NLPBARK);
    fprintf(stream, "\t-nlpceps %d\n", DEFAULT_NLPCEPS);
    fprintf(stream, "\t-nlparea %d\n", DEFAULT_NLPAREA); 
    fprintf(stream, "\t-nlpppos %d\n", DEFAULT_NLPPPOS);
    fprintf(stream, "\t-nlppamp %d\n", DEFAULT_NLPPAMP);
    fprintf(stream, "\t-nlpsgds %d\n", DEFAULT_NLPSGDS);
    fprintf(stream, "\t-lporder %d\n", DEFAULT_LPORDER);
    fprintf(stream, "\t-lpsmooth %f\n", DEFAULT_LPSMOOTH);
    fprintf(stream, "\t-ftnonorm\n");
    fprintf(stream, "\t-nftchan %d\n", DEFAULT_NFTBARK);
    fprintf(stream, "\t-nftbscp %d\n", DEFAULT_NFTBSCP);
    fprintf(stream, "\t-nftppos %d\n", DEFAULT_NFTPPOS);
    fprintf(stream, "\t-nftpamp %d\n", DEFAULT_NFTPAMP);
    fprintf(stream, "\t-nftstdd %d\n", DEFAULT_NFTSTDD);
    fprintf(stream, "\t-minf %f\n", DEFAULT_MINF);
    fprintf(stream, "\t-maxf %f\n", DEFAULT_MAXF);
    fprintf(stream, "\t-samplef %f\n", DEFAULT_SAMPLEF);
    fprintf(stream, "\t-frame %d\n", DEFAULT_FRAMESIZE);
    fprintf(stream, "\t-window %d\n", DEFAULT_WINDOWSIZE);
    fprintf(stream, "\t-header %d\n", DEFAULT_HEADERSIZE);
    fprintf(stream, "\t-offset %d\n", DEFAULT_OFFSET);
    fprintf(stream, "\t-compress %f\n", DEFAULT_COMPRESS);
    fprintf(stream, "\t-phntab %s\n", DEFAULT_PHNTAB);
    fprintf(stream, "\t-adc_prefix %s\n", DEFAULT_ADC_PREFIX);
    fprintf(stream, "\t-phn_prefix %s\n", DEFAULT_PHN_PREFIX);
    fprintf(stream, "\t-adc_suffix %s\n", DEFAULT_ADC_SUFFIX);
    fprintf(stream, "\t-phn_suffix %s\n", DEFAULT_PHN_SUFFIX);
    fprintf(stream, "\t<filelist file>\n");
    fprintf(stream, "\t<preprocessed file>\n");
    exit(1);
  }

  verbose    = Scan_flag(argc, argv, "-verbose");
  norm       = Scan_flag(argc, argv, "-norm");
  nophn      = Scan_flag(argc, argv, "-nophn");
  noswap     = Scan_flag(argc, argv, "-noswap");
  trimends   = Scan_flag(argc, argv, "-trimends");
  zc         = Scan_flag(argc, argv, "-zc");
  f0         = Scan_flag(argc, argv, "-f0");
  f0now	     = Scan_flag(argc, argv, "-f0now");
  halfp      = Scan_flag(argc, argv, "-halfp");
  f0norm     = Scan_flag(argc, argv, "-f0norm");
  voice      = Scan_flag(argc, argv, "-voice");
  gender     = Scan_flag(argc, argv, "-gender");
  peaksize   = Scan_flag(argc, argv, "-peaksize");
  noise	     = Scan_double(argc, argv, "-noise", DEFAULT_NOISE);
  npower     = Scan_int(argc, argv, "-npower", DEFAULT_NPOWER);
  nlpacf     = Scan_int(argc, argv, "-nlpacf", DEFAULT_NLPACF);
  nlpfilt    = Scan_int(argc, argv, "-nlpfilt", DEFAULT_NLPFILT);
  nlpchan    = Scan_int(argc, argv, "-nlpchan", DEFAULT_NLPBARK);
  nlpceps    = Scan_int(argc, argv, "-nlpceps", DEFAULT_NLPCEPS);
  nlparea    = Scan_int(argc, argv, "-nlparea", DEFAULT_NLPAREA);
  nlpppos    = Scan_int(argc, argv, "-nlpppos", DEFAULT_NLPPPOS);
  nlppamp    = Scan_int(argc, argv, "-nlppamp", DEFAULT_NLPPAMP);
  nlpsgds    = Scan_int(argc, argv, "-nlpsgds", DEFAULT_NLPSGDS);
  lporder    = Scan_int(argc, argv, "-lporder", DEFAULT_LPORDER);
  lpsmooth   = Scan_double(argc, argv, "-lpsmooth", DEFAULT_LPSMOOTH);
  ftnonorm   = Scan_flag(argc, argv, "-ftnonorm");
  nftchan    = Scan_int(argc, argv, "-nftchan", DEFAULT_NFTBARK);
  nftbscp    = Scan_int(argc, argv, "-nftbscp", DEFAULT_NFTBSCP);
  nftppos    = Scan_int(argc, argv, "-nftppos", DEFAULT_NFTPPOS);
  nftpamp    = Scan_int(argc, argv, "-nftpamp", DEFAULT_NFTPAMP);
  nftstdd    = Scan_int(argc, argv, "-nftstdd", DEFAULT_NFTSTDD);
  minf	     = Scan_double(argc, argv, "-minf",  DEFAULT_MINF);
  maxf	     = Scan_double(argc, argv, "-maxf",  DEFAULT_MAXF);
  samplef    = Scan_double(argc, argv, "-samplef",  DEFAULT_SAMPLEF);
  framesize  = Scan_int(argc, argv, "-frame",  DEFAULT_FRAMESIZE);
  windowsize = Scan_int(argc, argv, "-window", DEFAULT_WINDOWSIZE);
  headersize = Scan_int(argc, argv, "-header", DEFAULT_HEADERSIZE);
  offset     = Scan_int(argc, argv, "-offset", DEFAULT_OFFSET);
  compress   = Scan_double(argc, argv, "-compress", DEFAULT_COMPRESS);
  adc_prefix = Scan_string(argc, argv, "-adc_prefix", DEFAULT_ADC_PREFIX);
  phn_prefix = Scan_string(argc, argv, "-phn_prefix", DEFAULT_PHN_PREFIX);
  adc_suffix = Scan_string(argc, argv, "-adc_suffix", DEFAULT_ADC_SUFFIX);
  phn_suffix = Scan_string(argc, argv, "-phn_suffix", DEFAULT_PHN_SUFFIX);
  wfdiff     = windowsize - framesize;
  if(wfdiff < 0) Panic("%s: windowsize < framesize\n", *argv);
  flt_data   = ((float*) all_data) + wfdiff / 2;

  if(Scan_flag(argc, argv, "-log")) {
    Hz2Nonlinear = log;
    Nonlinear2Hz = exp;
  }
  else if(Scan_flag(argc, argv, "-erb")) {
    Hz2Nonlinear = Hz2ErbScale;
    Nonlinear2Hz = ErbScale2Hz;
  }
  else if(Scan_flag(argc, argv, "-mel")) {
    Hz2Nonlinear = Hz2Mel;
    Nonlinear2Hz = Mel2Hz;
  }
  else if(Scan_flag(argc, argv, "-basilarmm")) {
    Hz2Nonlinear = Hz2Basilarmm;
    Nonlinear2Hz = Basilarmm2Hz;
  }
  else {
    Hz2Nonlinear = Hz2Mel;
    Nonlinear2Hz = Mel2Hz;
  }

  if(Scan_flag(argc, argv, "-nonlinear_exit")) {
    float  minchan = Hz2Nonlinear(minf), maxchan = Hz2Nonlinear(maxf);
    float  chanstep = (maxchan - minchan) / (float) nftchan;
    int    bin;

    for(bin = 0; bin < nftchan; bin++) {
      int lo = windowsize * Nonlinear2Hz(bin * chanstep + minchan)/samplef+0.5;
      int hi = windowsize * Nonlinear2Hz((bin+1)*chanstep+minchan)/samplef+0.5;

      printf("%d\t%d\t%d\t%d\n", bin, lo, hi, hi - lo);
    }
    exit(0);
  }

  plist = Read_table(Scan_string(argc,argv,"-phntab",DEFAULT_PHNTAB),&nplist);

  noutput = npower + nlpfilt + nlpchan + nlpacf + nlpceps + nlparea + nlpppos + nlppamp + nftchan + nftbscp + nftppos + nftpamp + nlpsgds + nftstdd;
  if(zc) noutput++;
  if(f0) noutput++;
  if(f0now) noutput++;
  if(halfp) noutput++;
  if(voice) noutput++;
  if(gender) noutput++;
  if(verbose) printf("noutput: %d\n", noutput);
  sizeof_output = sizeof(LABEL_TYPE) + noutput * sizeof(OUTPUT_TYPE);

  if(nftchan != 0 || nftbscp != 0 || nftppos != 0 || nftpamp != 0 || 
     f0 || f0now || halfp || voice || nftstdd != 0) need_ft = TRUE;
  else need_ft = FALSE;

  if(nlpacf != 0  || nlpfilt != 0 || nlpchan != 0 || nlpceps != 0 || 
     nlparea != 0 || nlpppos != 0 || nlppamp != 0 || nlpsgds != 0)
    need_lp = TRUE;
  else need_lp = FALSE;

  if(lporder > MAX_LPORDER)
    Panic("%s: %d exceeds MAX_LPORDER\n", *argv, lporder);

  windowf = Panic_float_hamming(windowsize);
  window  = Panic_float_array(windowsize);
  pspec   = Panic_float_array(windowsize);
  autoc   = Panic_float_array(windowsize);
  bspec   = Panic_float_array(windowsize / 2);

  while(fscanf(fp_dbl, "%s\n", base_name) == 1 && base_name[0] != '#') {
    /* read in the files and broadcast to crunchers */
    static int curr_offset, frame, nframe;
    OUTPUT_TYPE *pframe = (OUTPUT_TYPE*) all_data;

    adc_size = this_read_file(adc_prefix, base_name, adc_suffix,
			      headersize, all_data);
    if(adc_size == -1)
      Panic("Failed to read: %s%s%s\n", adc_prefix, base_name,
	    adc_suffix);

    if(!nophn) phn_size = this_read_file(phn_prefix, base_name, phn_suffix,
					 0, phn_data);
    if(phn_size == -1) {
      fprintf(stderr, "Failed to read: %s%s%s\n", phn_prefix,
	      base_name, phn_suffix);
      fprintf(stderr, "Setting -nophn\n");
      nophn = TRUE;
    }
    if(nophn) phn_size = sprintf_dummy_phn(phn_data, adc_size);

    if(adc_size > MAX_ADC_SIZE || phn_size > MAX_PHN_SIZE)
      Panic("%s: overflow when reading %s\n", *argv, base_name);
    if(gender) gender_data = get_gender(base_name);

    if(verbose) {
      printf("%d\t%s\n", adc_size, base_name);
      fflush(stdout);
    }

    /* convert adc data to flt data *in place* from 0 to adc_size - 1 */
    if(noswap)   /* can't trust swab() to work in place */
      for(i = 0; i < adc_size / 2; i += 2) {
	char tmp = all_data[i];
	all_data[i] = all_data[i + 1];
	all_data[i + 1] = tmp;
      }

    {
      short sdata;
      char *plo = (char*) &sdata;
      char *phi = plo + 1;

      for(i = adc_size / 2 - 1; i >= 0; i--) {
	uchar *ptmp = all_data + (i << 1);
	*plo = *(ptmp + 1);
	*phi = *(ptmp + 0);
	flt_data[i] = sdata;
      }
    }

    /* pad the overflow ends with wrap round */
    for(i = 0; i < wfdiff / 2; i++) {
      flt_data[-(i + 1)] = flt_data[i];
      flt_data[adc_size / 2 + i] = flt_data[adc_size / 2 - 1 - i];
    }
      
    if(norm)
      Normalise(flt_data - wfdiff / 2, adc_size / 2 + wfdiff, samplef);
    if(noise > 0.0)
      Noise(flt_data - wfdiff / 2, adc_size / 2 + wfdiff, noise,
	    NOISE_3DB_FREQ / samplef);

    /* parse the timing info */
    for(nphone = 0, text = phn_data; text < phn_data + phn_size; nphone++){
      char label[TIMIT_LABEL_SIZE];
      if(sscanf(text, "%d %d %s\n", &phn_time[nphone].start,
		&phn_time[nphone].stop, label) != 3)
	Panic("%s: has got corrupted reading %s\n", *argv, base_name);
      phn_time[nphone].index = String2index(label, plist, nplist);
      while(*text++ != '\n');
    }

    /* patch the timing info */
    phn_time[0].start = 0;
    phn_time[nphone - 1].stop = adc_size / 2;

    if(!trimends) {
      curr_offset = offset;
      nframe = (phn_time[nphone - 1].stop - offset) / framesize;
      }
    else {
      if(phn_time[0].stop > TIMIT_INIT_SILENCE)
	curr_offset = phn_time[0].stop - TIMIT_INIT_SILENCE + offset;
      else curr_offset = offset;
      if(phn_time[nphone - 1].stop - phn_time[nphone - 1].start <
	   TIMIT_QUIT_SILENCE)
	nframe = (phn_time[nphone - 1].stop - curr_offset) / framesize;
      else
	nframe = (phn_time[nphone - 1].start + TIMIT_QUIT_SILENCE - 
		  curr_offset) / framesize;
    }

    for(frame = 0; frame < nframe; frame++) {
      int    frame_index  = frame * framesize + curr_offset;
      int    window_index = frame * framesize + curr_offset - wfdiff / 2;
      float  *frame_start = flt_data + frame_index;
      float  *window_start = flt_data + window_index;
      
      { /* subtract the mean then hamming window */
	float sum = 0.0, mean;

	for(i = 0; i < windowsize; i++)
	  sum += window_start[i];
	mean = sum / windowsize;

	for(i = 0; i < windowsize; i++)
	  window[i] = windowf[i] * (window_start[i] - mean);
      }

      /* put the label at the very start of the frame */
      *((LABEL_TYPE*) pframe++) = 
	get_phn(phn_time, nphone, frame_index, framesize);

      if(gender) *pframe++ = gender_data;
	
      /* put the log of the segmental power */
#ifdef MKDSPLIKE
      wave2lnpow(window, windowsize, npower, pframe);
#else
      wave2lnpow(frame_start, framesize, npower, pframe);
#endif
      pframe += npower;

      /* put the number of zero crossings in the frame */
      if(zc) wave2zc(frame_start, framesize, pframe++);
	
      if(need_lp) {
	static float acf[MAX_LPORDER], ref[MAX_LPORDER], lpc[MAX_LPORDER];
	static float logarea[MAX_LPORDER];	
	static float lpchan[MAX_LPORDER];

	(void) lpredict(window, windowsize, acf, ref, lpc, lporder);
	
	/* put the normalised autocorrelation coefficients */
	for(i = 1; i <= nlpacf; i++)
	  *pframe++ = acf[i] / acf[0];
	    
	/* implement smoothing ref Singer, Umezalia and Itakura ICASSP90 */
	if(lpsmooth != 1.0) {
	  float scale = 1.0;

	  for(i = 1; i <= lporder; i++) {
	    scale *= lpsmooth;
	    lpc[i] *= scale;
	  }
	}

	/* put the lpc filter coefficients */
	for(i = 1; i <= nlpfilt; i++) 
	  *pframe++ = lpc[i];

	lpc2chan(lpc, lporder, lpchan, nlpchan, minf, maxf, samplef);
	if(compress > 0.0)
	  for(i = 0; i < nlpchan; i++)
	    *pframe++ = pow(lpchan[i], compress);
	else
	  for(i = 0; i < nlpchan; i++)
	    *pframe++ = log(lpchan[i]);

	/* lpc2ceps() returns ceps[1] onwards - bug fix by Andrew Tridgell */
	lpc2ceps(lpc, lporder, pframe - 1, nlpceps);
	pframe += nlpceps;

	lpc2sgds(lpc, lporder, pframe, nlpsgds, minf, maxf, samplef);
	pframe += nlpsgds;
	  
	ref2logarea(ref, logarea, nlparea);
	for(i = 1; i <= nlparea; i++)
	  *pframe++ = logarea[i];

	spec2ppos(lpchan, nlpchan, pframe, nlpppos);
	pframe += nlpppos;

	spec2pamp(lpchan, nlpchan, pframe, nlppamp);
	pframe += nlppamp;
      }

      /***** WARNING:  This corrupts window[] *****/
      if(need_ft) {
	float probvoice = 0.0, halfpfreq;
	int   halfwsize  = windowsize / 2;

	RealFFT(window, window, windowsize);
	pspec[0] = window[0] * window[0];
#ifdef MKDSPLIKE
	/* just for compatability with the DSP32C rftta() */
	for(i = 0; i < windowsize; i++) window[i] *= 2.0;
	pspec[halfwsize] = 0.0;
#else
	pspec[halfwsize] = window[1] * window[1];
#endif
	for(i = 1; i < halfwsize; i++)
	  pspec[windowsize - i] = pspec[i] =
	    window[2*i] * window[2*i] + window[2*i+1] * window[2*i+1];

	halfpfreq = fft2halfp(pspec, halfwsize);
	if(halfp) *pframe++ = halfpfreq;
	    
	if(f0 || f0now || voice || f0norm) {
	  int   best_posn;

	  for(i = 0; i < windowsize; i++) autoc[i] = pspec[i];
	  RealFFT(autoc, autoc, windowsize);
	  for(i = 0; i < halfwsize; i++) autoc[i] = autoc[2 * i];

	  best_posn = samplef / MAX_PITCH_FREQ;
	  for(i = samplef / MAX_PITCH_FREQ + 1; i < halfwsize; i++)
	    if(autoc[i] > autoc[best_posn]) best_posn = i;

	  probvoice = autoc[best_posn] / (autoc[0] + VERY_SMALL);
	    
	  if(probvoice > PVOICELIMIT)
	    f0smooth = F0ALPHA * f0smooth + 
	      (1.0 - F0ALPHA) * log(samplef / (GMF0 * best_posn));
	    
	  if(f0) *pframe++ = f0smooth;
	  if(f0now) *pframe++ = log(samplef / (GMF0 * best_posn));
	  if(voice) *pframe++ = probvoice;
	}

	if(f0norm) {
	  if(probvoice > PVOICELIMIT)
	    fft2f0norm(pspec, halfwsize, bspec, MAX(nftchan, nftbscp + 1),
		       minf, maxf, samplef, f0smooth);
	  else
	    fft2f0norm(pspec, halfwsize, bspec, MAX(nftchan, nftbscp + 1),
		       minf, maxf, samplef, 0.0);
	}
	else
	  fft2chan(pspec, halfwsize, bspec, MAX(nftchan, nftbscp + 1), minf,
		   maxf, samplef);

	spec2ceps(bspec, MAX(nftchan, nftbscp + 1), pframe, nftbscp);
	pframe += nftbscp;

	if(!ftnonorm) {
	  float sum = VERY_SMALL;
	  for(i = 0; i < nftchan; i++) sum += bspec[i];
	  for(i = 0; i < nftchan; i++) bspec[i] /= sum;
	}

	if(compress > 0.0)
	  for(i = 0; i < nftchan; i++)
	    *pframe++ = pow(bspec[i], compress);
	else
	  for(i = 0; i < nftchan; i++)
	    *pframe++ = log(bspec[i]);
	  
	spec2ppos(bspec, nftchan, pframe, nftppos);
	pframe += nftppos;

	spec2pamp(bspec, nftchan, pframe, nftpamp);
	pframe += nftpamp;

	pframe += fft2stdd(pspec,halfwsize,pframe,nftstdd,minf,maxf,samplef);
      }
    }
    *((LABEL_TYPE*) (pframe - (noutput + 1))) |= TIMIT_SENT_MASK;

    pre_size = (char*) pframe - all_data;
    if(fwrite(all_data, 1, pre_size, fp_pre) != pre_size)
      Panic("%s: failed writing %s after reading %s\n", *argv,
	    argv[argc - 1], base_name);
  }
  return(0);
}

int this_read_file(char *prefix, char *base, char *suffix, int headersize,
		   char *data) {
  FILE *stream;
  int   file_size;
  char *file_name;

  file_name = Panic_malloc(strlen(prefix) + strlen(base) + strlen(suffix) + 1);
  (void) strcpy(file_name, prefix);
  (void) strcat(file_name, base);
  (void) strcat(file_name, suffix);
  if((stream = fopen(file_name, "r")) != NULL) {
    Panic_fseek(stream, (long) 0, 2);
    file_size = ftell(stream) - headersize;
    Panic_fseek(stream, (long) headersize, 0);
    Panic_fread(data, 1, file_size, stream);
    Panic_fclose(stream);
    Panic_free(file_name);
  }
  else file_size = -1;
  return(file_size);
}

/* search the array of timings to return a phoneme index and context */
int get_phn(struct timing *phn, int nphone, int start, int size) {
  int i, index, stop = start + size;

  /* search for the first phoneme that overlaps with this area */
  for(i = 0; i < nphone && phn[i].stop <= start; i++);

  /* if off either end of the record, return the silence index */
  if(i == nphone || phn[0].start > start || phn[nphone-1].stop < stop)
    index = TIMIT_SILENCE_INDEX;
  else {
    int first = i, last;

    /* and search for the last phoneme that overlaps with this area */
    for(i = first; i < nphone && phn[i].start < stop; i++);

    if(phn[i - 1].start >= stop) last = nphone;
    else last = i - 1;

    switch(last - first) {
      case -1: /* this should never exist.. */
        Panic("Bye Bye");
      case 0:  /* there is a single segment that overlaps with this time */
	index = phn[first].index;
	break;
      case 1:  /* there are two segments which overlap, choose the largest */
	if(phn[first].stop - start > stop - phn[last].start)
	  index = phn[first].index; /* | TIMIT_BOUNDARY_MASK; */
	else
	  index = phn[last].index;  /* | TIMIT_BOUNDARY_MASK; */
	break;
      default: /* there are three or more segments which overlap */
	index = phn[first + 1].index;
	break;
    }
  }
  return(index);
}

/*
   Equations found in ASSP April 1981, pp255
*/

#define MAX_ABS_CEPS 4.0
void lpc2ceps(float *lpc, int nlpc, float *ceps, int nceps) {
  int n;
  
  for(n = 1; n <= nlpc && n <= nceps; n++) {
    float sum = 0.0;
    int   k;

    for(k = 1; k < n; k++) sum += k * ceps[k] * lpc[n - k];
    ceps[n] = lpc[n] + sum / n;
  }


  /* be wary of these interpolated values */
  for(n = nlpc + 1; n <= nceps; n++) {
    float sum = 0.0;
    int   k;

    for(k = n - (nlpc - 1); k < n; k++) sum += k * ceps[k] * lpc[n - k];
    ceps[n] = sum / n;
  }

  /* very occasionally the above can go unstable, fudge if this happens */
  for(n = 1; n <= nceps; n++) {
    if(ceps[n] >  MAX_ABS_CEPS) ceps[n] =  MAX_ABS_CEPS;
    if(ceps[n] < -MAX_ABS_CEPS) ceps[n] = -MAX_ABS_CEPS;
  }
}

void lpc2chan(float *lpc, int nlpc, float *chan, int nchan, float minf, float maxf, float samplef) {
  float  scale, total = 1.0e-36;
  float  minchan = Hz2Nonlinear(minf), maxchan = Hz2Nonlinear(maxf);
  float  chanstep = (maxchan - minchan) / (float) nchan;
  int    i, j;

  for(i = 0; i < nchan; i++) {
    float sums = 0.0, sumc = 1.0;
    float freq = 2.0 * M_PI * Nonlinear2Hz((i + 0.5)*chanstep+minchan)/samplef;

    for(j = 1; j <= nlpc; j++) {
      sums -= sin(freq * j) * lpc[j];
      sumc -= cos(freq * j) * lpc[j];
    }
    total += (chan[i] = sums * sums + sumc * sumc);
  }

  scale = nchan / total;
  for(i = 0; i < nchan; i++) chan[i] *= scale;
}

void wave2zc(float *frame, int size, float *pzc) {
  int i, zc = 0;
  
  for(i = 1; i < size; i++)
    if(SIGN(frame[i]) != SIGN(frame[i - 1])) zc++;
  *pzc = zc;
}

void wave2lnpow(float *frame, int size, int npow, OUTPUT_TYPE *out) {
  int   i, j;

  for(i = 0; i < npow; i++) {
    float *subframe = frame + (i * size) / npow;
    float sum1 = 0.0, sum2 = 0.0;
    
    for(j = 0; j < size / npow; j++) {
      sum1 += subframe[j];
      sum2 += subframe[j] * subframe[j];
    }

    /* used to use pow(sd, compress) but variations in the amplifier gain
    result in multiplicative changes, this way they result in additive
    changes which should be more easily compensated for. */

#ifdef MKDSPLIKE
    *out++ = POWER_SCALE * log((sum2 - sum1 * sum1 / size) / size + 1.0);
#else
    *out++ = log((sum2 - sum1 * sum1 / size) / size + 1.0);
#endif
  }
}

float fft2halfp(float *pspec, int size) {
  float total, limit;
  int   i;

  total = 0.0;
  for(i = 0; i < size; i++) total += pspec[i];
  limit = total / 2.0;
  total = 0.0;
  for(i = 0; total < limit; i++) total += pspec[i];
  return(((float) i - (total - limit) / pspec[i - 1]) / (float) size);
}

void fft2chan(float *pspec, int size, float *bspec, int nchan, float minf, float maxf, float samplef) {
  float  minchan = Hz2Nonlinear(minf), maxchan = Hz2Nonlinear(maxf);
  float  chanstep = (maxchan - minchan) / (float) nchan;
  int    i, bin;

  for(bin = 0; bin < nchan; bin++) {
    int lo = size * Nonlinear2Hz((bin+0)*chanstep+minchan)/(samplef/2.0)+0.5;
    int hi = size * Nonlinear2Hz((bin+1)*chanstep+minchan)/(samplef/2.0)+0.5;

    bspec[bin] = 0.0;
    for(i = lo; i < hi; i++) bspec[bin] += pspec[LIMIT(i, size)];
  }
}

void fft2f0norm(float *pspec, int size, float *bspec, int nchan, float minf, float maxf, float samplef, float lnf0) {
  float  fs = exp(lnf0 / 3.0);
  float  minchan = Hz2Nonlinear(fs * minf), maxchan = Hz2Nonlinear(fs * maxf);
  float  chanstep = (maxchan - minchan) / (float) nchan;
  int    i, bin;

  for(bin = 0; bin < nchan; bin++) {
    float flo = size * Nonlinear2Hz((bin + 0) *chanstep+minchan)/(samplef/2.0);
    float fhi = size * Nonlinear2Hz((bin + 1) *chanstep+minchan)/(samplef/2.0);    int ilo = (int) floor(flo + 1.0);
    int ihi = (int) floor(fhi);

    bspec[bin]  = ((float) ilo - flo) * pspec[LIMIT(ilo - 1, size)];
    for(i = ilo; i < ihi; i++) bspec[bin] += pspec[LIMIT(i, size)];
    bspec[bin] += (fhi - (float) ihi) * pspec[LIMIT(ihi, size)];
  }
}

int fft2stdd(float *pspec, int size, float *pframe, int nftstdd, float minf, float maxf, float samplef) {
  float  minchan = Hz2Nonlinear(minf), maxchan = Hz2Nonlinear(maxf);
  float  chanstep = (maxchan - minchan) / (float) nftstdd;
  int    i, bin;

  for(bin = 0; bin < nftstdd; bin++) {
    int lo = size * Nonlinear2Hz(bin * chanstep + minchan) /  (samplef / 2.0);
    int hi = size * Nonlinear2Hz((bin + 1)*chanstep+minchan)/ (samplef / 2.0);
    float sum1 = 0.0, sum2 = 0.0, mean, stdd;
    
    for(i = lo; i < hi; i++) {
      sum1 += pspec[i];
      sum2 += pspec[i] * pspec[i];
    }

    mean = sum1 / (hi - lo);
    stdd = sqrt(sum2 / (hi - lo) - mean * mean);
    pframe[bin] = stdd / mean;
  }

  return(nftstdd);
}

/* don't put out the 0th cepstral coefficient (1 to nceps inclusive) */
int spec2ceps(float *spec, int nspec, float *ceps, int nceps) {
  if(nceps != 0) {
    static int   nlndata = 0;
    static float *lndata = NULL;
    int i, j;

    if(nlndata != nspec) {
      if(nlndata != 0) Panic_free((char*) lndata);
      lndata = (float*) Panic_malloc(nspec * sizeof(*lndata));
      nlndata = nspec;
    }

    for(i = 0; i < nspec; i++) lndata[i] = log(spec[i] + VERY_SMALL);

    for(i = 0; i < nceps; i++) {
      float sum = 0.0;

      for(j = 0; j < nspec; j++) 
        sum += lndata[j] * cos(M_PI * (j + 0.5) * (i + 1) / nspec);
      ceps[i] = sum;
    }
  }
  return(nceps);
}

/******************************************************************************
The following is taken from:

"Low Bit rate Quantisation of the Smoothed Group Delay Spectrum for Speech
Recognition", Harald SINGER, Taizo UMEZAKI and Fumitada ITAKURA, ICASSP-90,
volume 2, page 761.
******************************************************************************/

void lpc2sgds(float *lpc, int nlpc, float *sgds, int nsgds, float minf, float maxf, float samplef) {
  if(nsgds > 0) {
    static int    nceps = 0;
    static float *pceps = NULL;
    float  minchan = Hz2Nonlinear(minf), maxchan = Hz2Nonlinear(maxf);
    float  chanstep = (maxchan - minchan) / (float) nsgds;
    int    i, k;

    if(nceps != nlpc) {
      if(nceps != 0) Panic_free((char*) pceps);
      pceps = (float*) Panic_malloc(nlpc * sizeof(*pceps));
      nceps = nlpc;
    }

    /*  Check this line before using it! */
    lpc2ceps(lpc, nlpc, pceps - 1, nceps);

    for(i = 0; i < nsgds; i++) {
      float cfreq = 2.0 * M_PI * Nonlinear2Hz((i + 0.5) * chanstep + minchan) /
                 TIMIT_SAMPLE_RATE;
      float hband = M_PI * (Nonlinear2Hz(i * chanstep + minchan) - 
		 Nonlinear2Hz((i + 1) * chanstep + minchan))/TIMIT_SAMPLE_RATE;
      float sum = 0.0;

      for(k = 0; k < nceps; k++) 
        sum += sin(k * hband) * pceps[k] * cos(k * cfreq);
      sgds[i] = sum / hband;
    }
  }
}

void spec2ppos(float *spec, int nspec, float *ppos, int npeak) {
  int i, nfound = 0;

  for(i = 1; i < nspec - 1 && nfound < npeak; i++)
    if(spec[i] > spec[i - 1] && spec[i] >= spec[i + 1]) {
      float c = spec[i];
      float b = (spec[i + 1] - spec[i - 1]) / 2.0;
      float a = spec[i - 1] + b - c;
      float x = -b / (2.0 * a);

      ppos[nfound++] = i + x;
  }
  for( ; nfound < npeak; nfound++) ppos[nfound] = 0.0;
}

void spec2pamp(float *spec, int nspec, float *pamp, int npeak) {
  int i, nfound = 0;

  for(i = 1; i < nspec - 1 && nfound < npeak; i++)
    if(spec[i] > spec[i - 1] && spec[i] >= spec[i + 1]) {
      float c = spec[i];
      float b = (spec[i + 1] - spec[i - 1]) / 2.0;
      float a = spec[i - 1] + b - c;
      float x = -b / (2.0 * a);

      pamp[nfound++] = a * x * x + b * x + c;
  }
  for( ; nfound < npeak; nfound++) pamp[nfound] = 0.0;
}

int sprintf_dummy_phn(char *data, int size) {
  int length = size / SIZEOF_SHORT;

  sprintf(data, "0\t%d\th#\n", length);
  return(strlen(data));	
}

int get_gender(char *name) {
  /* TIMIT is identified by starting with "dr" and name[4] holds 'm' or 'f' */

  if((strncmp(name, "dr", 2) != 0) || (name[4] != 'm' && name[4] != 'f'))
    Panic("get_gender: Can't decide on: %s\n", name);

  return(name[4] == 'f' ? 1 : 0);
}
