/* ----------------------------------------------------------------------------- 
   include file for Lotec, a cheap, low-quality speech recognizer 

   Nigel Ward, University of Tokyo, started July 1993. 
   ----------------------------------------------------------------------------- */

#include <math.h>
#include <stdio.h> 
#include <sys/file.h> 
#include <errno.h>

#include <multimedia/audio_hdr.h>
#include <multimedia/audio_filehdr.h>
#include <multimedia/ulaw2linear.h>

/* note: FFT requires the sample number to be a power of two */
extern RealFFT();
extern FILE *fopen();
extern FILE *fopen_with_check();
extern FILE *fopen_out_file();
extern FILE *fopen_or_exit();
FILE *la_open_file_check_header();
FILE *wh_open_file_check_header(); 

/* ----------------------------------------------------------------------------- */

#define MAX_SECONDS       4
#define SAMPLING_RATE     8000      /* samples per second */
#define MAX_SAMPLES       (MAX_SECONDS * SAMPLING_RATE) 
#define SAMPLES_PER_MS    (SAMPLING_RATE / 1000)

#define SAMPLES_PER_FRAME 256
#define FRAME_SPACING     10         /* milliseconds */ 
#define SAMPLES_BETWEEN_FRAMES  (SAMPLING_RATE * FRAME_SPACING / 1000)
#define MAX_FRAMES        (MAX_SAMPLES / SAMPLES_BETWEEN_FRAMES)
#define FFT_NPOINTS (SAMPLES_PER_FRAME / 2) /* number of hz points output by FFT */
#define MS_PER_FRAME      (1000 * SAMPLES_PER_FRAME / SAMPLING_RATE)

#define MS_PER_PIXEL        5
#define SAMPLES_PER_PIXEL   ((SAMPLING_RATE / 1000) * MS_PER_PIXEL)

#define FB_NBINS 8
#define DEFAULT_NBINS 8
#define MAXBINS 20
#define MAX_NTEMPLATES  60        
#define MAX_INFO_STRING_LEN 100

#define MAX_PATH_LEN 128              
#define FALSE 0
#define TRUE 1
#define NOT_FOUND -1
#define NORMAL 0   /* exit codes */
#define STRANGE 0

#define MAX_WORD_LEN  50   /* characters per word (ie, per label) */
#define MAX_LABELS    20   /* max words per input utterance */

#define DEFAULT_THRESHOLD 1.8   /* for signal detection */

/* ----------------------------------------------------------------------------- */
char *make_fb_path();
char *make_wh_path();

/* ----------------------------------------------------------------------------- */
/* might be nice to have a structure corresponding to a .au file.
   it would consist of data_array, nsamples, and info_string */


/* ----------------------------------------------------------------------------- */
/* the internal data structure for a featurized version of a speech sample.
   Corresponds to an .fb file */
struct signature { 
  char  info_string[MAX_INFO_STRING_LEN];
  char  feature_type[MAX_INFO_STRING_LEN];  /* i.e., filterbank or melcepstrum */
  int   nbins;                  /* features extracted per frame */
  int   nframes;   
  int   frame_period;           /* milliseconds between frames */
  int   frame_shift;            /* samples between frames */
  int   logp;                   /* log of energy, or just plain energy */

  float bins[MAX_FRAMES][MAXBINS];
  float energy[MAX_FRAMES];

  /* below this point is used in match only */
  float hyp_score[MAX_FRAMES];  /* unnormalized w.r.t length (unlike best_score) */
  float best_score;             /* score at best_offset */
  float cutoff;                 /* for pruning */
  int   best_offset;
  float length_factor;
} ;

/* ----------------------------------------------------------------------------- */
/* used by chopper, judge, and showmatch */
/* this structure also used for the veridical labels,
   although score and word_validp are meaningless then */
/* corresponds to one line of a .wh file */
/* start, end, and len are in milliseconds, usually */
struct whyp {
  char label[MAX_WORD_LEN];
  int start;        
  int end;           
  int len;           

  float score;       
  int word_validp;   /* TRUE or FALSE */
};

/* ----------------------------------------------------------------------------- */
