/* file = util.c */
/* Nigel Ward, University of Tokyo, April 1994 */ 
/* contents:
   general file i/o
   i/o for .au files 
   i/o for .fe files 
   i/o for .la files 
   i/o for .wh files 
   word label manipulations
   beep
   */

/* ----------------------------------------------------------------------------- */
#include "cheap.h"
/* ----------------------------------------------------------------------------- */

static char wh_id_string[] = "<Speech_data_file_in_wh_format>";
static char fe_id_string[] = "<Speech_data_file_in_new_fe_format>";
static char la_id_string[] = "<Speech_label_file_in_la_format>";

/*=============================================================================*/

/* allows us to create a new_path like lola.fe
   given path = lola.au, lola.wh, lola.fe, or lola, and extension = ".fe" 
   Note that extensions must have 3 characters (including '.') */
make_new_path(new_path, path, extension)
     char *new_path, *path, *extension;
{
  int i;
  strip_off_extension(path, new_path);
  for (i = 0; new_path[i] != NULL; i++);
  new_path[i  ] = extension[0];
  new_path[i+1] = extension[1];
  new_path[i+2] = extension[2];
  new_path[i+3] = NULL; }

/*-----------------------------------------------------------------------------*/
strip_off_extension(filename, new)
     char *filename; char *new;
{
  int i;
  strcpy(new, filename);
  for (i = strlen(new); i > 0; i--) {
    if (new[i] == '.') {
      new[i] = NULL;
      break; } } }

/*-----------------------------------------------------------------------------*/
/* example: converts /home/sanpon/nigel/speech/test/lola.au to lola */
basefilename(filename, pathstring)
     char *filename, *pathstring;
{
  char tmpstring[100];
  int i, last_slash_pos = 0;
  
  strip_off_extension(pathstring, tmpstring);
  
  /* strip off all directories, if any */
  for (i = 0 ; i < strlen(tmpstring); i++)
    if (tmpstring[i] == '/')
      last_slash_pos = i;
  if (last_slash_pos != 0) {
    for (i = 0; tmpstring[i] != NULL; i++)
      filename[i] = tmpstring[i + last_slash_pos + 1]; }
  else strcpy(filename,tmpstring);
}

/* ----------------------------------------------------------------------------- */
FILE *fopen_with_check(file_name, mode_string)      char *file_name, *mode_string;
{ FILE *in_fp;
  in_fp = fopen(file_name, mode_string);
  if ( (int) in_fp == NULL) {
    fprintf(stderr, "  couldn't open file `%s' (errno = %d) \n", file_name, errno);
    return(0); }
  return(in_fp); }

/* ----------------------------------------------------------------------------- */
FILE *fopen_or_exit(file_name, mode_string)      char *file_name, *mode_string;
{ FILE *in_fp;
  in_fp = fopen(file_name, mode_string);
  if ( (int) in_fp == NULL) {
    fprintf(stderr, "Can't open file `%s' (errno = %d) ... exiting \n",
	    file_name, errno);
    exit(STRANGE); }
  return(in_fp); }


/*-----------------------------------------------------------------------------*/
int open_rdonly_with_check(filename)      char *filename; 
{
  int fd;
  fd = open(filename, O_RDONLY);
  if (fd < 0) {
    printf("sorry, can't open file `%s' (error no=%d). \n", filename, errno); }
  return(fd); }

/*-----------------------------------------------------------------------------*/
FILE *fopen_out_file(path, extension)      char *path, *extension;
{
  char new_path[MAX_PATH_LEN]; 
  make_new_path(new_path, path, extension);
  return(fopen_with_check(new_path, "w")); }

/* ----------------------------------------------------------------------------- */
int good_file_format(in_fp, id_string, filename)   
     FILE *in_fp; char *id_string, *filename;
{
  char misc_string[100], file_id_string[100];
  fgets(misc_string, sizeof(misc_string), in_fp);
  sscanf(misc_string, "%s", file_id_string);
  if (strcmp(file_id_string, id_string) != 0) {
    fprintf(stderr, " expected      `%s' \n", id_string);
    fprintf(stderr, " but file had  `%s' (file=`%s')\n", file_id_string, filename);
    fclose(in_fp);
    return(0); }
  return(1); }


/*=============================================================================*/
int open_audio_write()
{ int audio_fd;
  audio_fd = open("/dev/audio", O_WRONLY);
  if ( audio_fd == -1) {
    printf("Can't open /dev/audio (errorno=%d).\n", errno);
    exit(STRANGE); }
  else
    return(audio_fd);}

/*-----------------------------------------------------------------------------*/
play_audio_data(raw_data, start_byte, bytes)
     char *raw_data; int start_byte, bytes;
{
  int audio_fd;
  audio_fd = open_audio_write();
  (void) write(audio_fd, raw_data + start_byte, bytes);
  close(audio_fd);
}

/* ----------------------------------------------------------------------------- */
int read_au_into_array(filename, header, info_string, raw_data)
     char *filename;
     Audio_filehdr header;
     char *info_string;
     char raw_data[];
{
  int read_size, samples_to_read, fd;

  fd = open_rdonly_with_check(filename);
  read_size = read(fd, &header, sizeof(header));

  if (header.magic != AUDIO_FILE_MAGIC) {
    fprintf(stderr, " note: wrong magic number: not a headered audio file\n" );  
    exit(STRANGE);
    /* On second thought, it's probably better to insist on headered files,
       especially since the raw2audio command is so convenient to use anyway
    fprintf(stderr, " reopening it as unheadered\n" );  
    close(fd);
    fd = open_rdonly_with_check(filename);
    read_size = read (fd, raw_data, MAX_SAMPLES);
    fprintf(stderr, " read %d bytes \n", read_size); 
    close(fd); 
    header.data_size = read_size;
    header.encoding = AUDIO_ENCODING_ULAW;
    header.sample_rate = SAMPLING_RATE;
    header.channels = 1;
    return(read_size); */
  }
  read_size = read(fd, info_string, header.hdr_size - sizeof(header));
  if (header.data_size <= MAX_SAMPLES)
    {samples_to_read = header.data_size;}
  else
    {samples_to_read = MAX_SAMPLES;
     fprintf(stderr, "\n long audio file!: only reading %d samples\n",
	     samples_to_read);}
  read_size = read (fd, raw_data, samples_to_read);
  close(fd); 
  return(read_size);
}


/*-----------------------------------------------------------------------------*/
write_audio_hdr(out_fd, info_string, data_size, encoding, sample_rate, channels)
     int out_fd; char *info_string; int data_size, encoding, sample_rate, channels;
{
  Audio_filehdr	out_hdr;  int out_string_len;
  out_hdr.magic = AUDIO_FILE_MAGIC;
  /* round up to the next multiple of 8 bytes */
  out_string_len = 8 * (1 + strlen(info_string) / 8);
  out_hdr.hdr_size     = sizeof(out_hdr) + out_string_len;
  out_hdr.data_size    = data_size;
  out_hdr.encoding     = encoding;
  out_hdr.sample_rate  = sample_rate;
  out_hdr.channels     = channels;
  (void) write(out_fd, (char *) &out_hdr, sizeof(out_hdr)); 
  (void) write(out_fd, info_string, out_string_len); 
}


/*=============================================================================*/
/* write header for .fe (in format expected by match.c) */

write_fe_header(fp, sig_ptr)   FILE* fp;    struct signature *sig_ptr;
{
  fprintf(fp, "%s\n", fe_id_string);
  fprintf(fp, "number_of_frames: %d \n", sig_ptr->nframes);
  if(sig_ptr->info_string[0] == NULL)
    fprintf(fp, "template_name: %s \n", "unknown");
  else
    fprintf(fp, "template_name: %s \n", sig_ptr->info_string);
  fprintf(fp, "features_per_frame: %d \n", sig_ptr->nbins);
  fprintf(fp, "features_from: %s \n", sig_ptr->feature_type);
  fprintf(fp, "frame_period: %d \n", sig_ptr->frame_period);
  fprintf(fp, "log: %d \n", sig_ptr->logp);
  fprintf(fp, "data:\n");
}

/* ----------------------------------------------------------------------------- */
/* write the featurized version of one frame of speech to the output file */
write_fe_data(out_fp, s_ptr)
     FILE *out_fp;      struct signature *s_ptr;    
{
  int frame, bin;

  for (frame = 0; frame < s_ptr->nframes; frame++) {
    write_fe_frame_number(out_fp, frame);
    for (bin = 0; bin < s_ptr->nbins; bin++) {
      write_fe_bin(out_fp, s_ptr->bins[frame][bin]); }
    write_fe_energy(out_fp, s_ptr->energy[frame]); }
  fflush(out_fp);     /* not strictly necessary */
}

write_fe_frame_number(out_fp, frame)       FILE *out_fp; int frame;
{     fprintf(out_fp, "frame %3d  ", frame); }

write_fe_bin(out_fp, value)       FILE *out_fp; float value;
{     fprintf(out_fp, " %7.4f ", value); }

write_fe_energy(out_fp, value)       FILE *out_fp; float value;
{     fprintf(out_fp, " ttl: %8.4f\n", value);}


/* ----------------------------------------------------------------------------- */
int read_fe_into_sig(fp, tptr)  
  FILE *fp;
  struct signature *tptr;
{
  int frame, bin, status, exit_flag1, exit_flag2;
  float temp, energy;
  char misc_string[100], keyword[50], info[50];
  tptr->frame_period = FRAME_SPACING;             /* default */

  if (!good_file_format(fp, fe_id_string, "")) {
    fprintf(stderr,"        apparently not a fe; skipping it. \n "); 
    return(FALSE);} 

  /* grab the information from the header.  Don't check for missing fields,
     since au2fe etc are trustworthy */
  exit_flag1 = FALSE;
  while (!exit_flag1) {
    fgets(misc_string, sizeof(misc_string), fp);
    sscanf(misc_string, " %s %s", keyword, info);
    if (strcmp(keyword, "number_of_frames:") == 0)
      { /* ignore it, since we get frame count from actually reading */ }
    else if (strcmp(keyword, "features_from:") == 0) {
      strcpy(tptr->feature_type,info);  }
    else if (strcmp(keyword, "features_per_frame:") == 0) {
      tptr->nbins = atoi(info);  }
    else if (strcmp(keyword, "template_name:") == 0) {
      strcpy(tptr->info_string, info);  }
    else if (strcmp(keyword, "frame_period:") == 0) {
      tptr->frame_period = atoi(info);   }
    else if (strcmp(keyword, "log:") == 0) {
      tptr->logp = atoi(info);   }
    else if (strcmp(keyword, "data:") == 0) {
      /* stop processing header and start reading data */
      exit_flag1 = TRUE;  }
    else {fprintf(stderr,"match: ignoring unknown fe header field: `%s'\n", keyword);}
  }
  /* fprintf(stderr, "    data_length  = %2d,  tptr->info_string = `%s', \n",
      data_length, tptr->info_string);
     fprintf(stderr, "    tptr->nbins = %d,  tptr->feature_type = `%s' \n", 
      tptr->nbins, tptr->feature_type);  */

  exit_flag2 = FALSE;
  /* read in the frames, each of form ``frame: XX   XX XX XX XX  ttl: XX'' */
  for (frame = 0; exit_flag2 == FALSE && frame < MAX_FRAMES; frame++) {
    status = fscanf(fp, " frame %d ", &temp);
    if (status == EOF) {
      /* fprintf(stderr,"    match: normal EOF after frame %d\n", frame); */ 
      tptr->nframes = frame;
      return(TRUE); }
    else if (status == 0) 
      {fprintf(stderr,"match: failed to find frame start. \n"); return(FALSE);}
    else {
      for (bin = 0; bin < tptr->nbins; bin++) {
	status = fscanf(fp, " %f ", &temp);
	if (status == EOF  || status == 0) 
	  {fprintf(stderr,"match: !! fscanf returned %d (frame %d, bin %d) \n",
		   status, frame, bin);
	   return(FALSE);}
	else
	  tptr->bins[frame][bin] = temp; } }
    fscanf(fp, " ttl: %f ", &energy);
    tptr->energy[frame] = energy; 
  }
  /* not reached, statment present only to satisfy lint */
  return(TRUE);
}

/*=============================================================================*/
write_la_file_header(out_fp)       FILE *out_fp;
{  fprintf(out_fp, "%s\n", la_id_string);}

write_la_entry(out_fp, label, start, end)
     FILE *out_fp;  char *label;  int start, end;
{  fprintf(out_fp, "%s %d %d\n", label, start, end); }


FILE *la_open_file_check_header(sister_filename)    char *sister_filename;
{
  FILE *in_fp;
  char la_file_name[MAX_PATH_LEN];

  make_new_path(la_file_name, sister_filename, ".la");
  in_fp = fopen_with_check(la_file_name, "r");
  if ((int) in_fp == NULL) return (0);
  else if (good_file_format(in_fp, la_id_string, la_file_name))
    return (in_fp);
  else return(0); }


/*-----------------------------------------------------------------------------*/
int read_label_file(filename, labels)    char *filename;   struct whyp labels[];
{
  FILE *label_fp;   int nlabels, status, exit_flag;

  label_fp = (FILE *) la_open_file_check_header(filename);
  if (label_fp == 0) {return(0);}

  nlabels = 0;
  exit_flag = FALSE;
  while(!exit_flag) {
    status = fscanf(label_fp, "%s %d %d ",
		    &labels[nlabels].label,  /* & is redundant here */
		    &labels[nlabels].start, 
		    &labels[nlabels].end);

    if (status == EOF)
      {exit_flag = TRUE;}
    else if (status == 0)
      {fprintf(stderr, " *** read failed\n"); exit_flag = TRUE;}
    else {
      labels[nlabels].len = labels[nlabels].end - labels[nlabels].start;
      nlabels++; }
  }
  fclose(label_fp);
  return(nlabels);
}


int read_label_file_from_dir(dirname, filename, labels)
  char *dirname, *filename;  struct whyp labels[];
{
  char new_filename[MAX_PATH_LEN];
  if (strcmp(dirname, "") == 0) 
    return(read_label_file(filename, labels));
  else {
    strcpy(new_filename, dirname);
    strcat(new_filename, "/");
    strcat(new_filename, filename);
    return(read_label_file(new_filename, labels)); }
}

/*=============================================================================*/
read_wh_file(filename, nframes_ptr, nwh_ptr, hyps)
     char *filename;
     int *nframes_ptr, *nwh_ptr; 
     struct whyp hyps[MAX_NTEMPLATES];
{
  int nwh, nframes;
  int status, exit_flag1, exit_flag2;
  char misc_string[100], raw_label[MAX_WORD_LEN], keyword[50], info[50];
  char wh_filename[MAX_PATH_LEN];
  FILE *wh_fp;

  make_new_path(wh_filename, filename, ".wh");
  wh_fp = fopen_with_check(wh_filename, "r");
  if (wh_fp == 0)
    {fprintf(stderr, "couldn't open wh file `%s'\n", wh_filename); 
     exit(STRANGE);}
  if (!good_file_format(wh_fp, wh_id_string, wh_filename)) {exit(STRANGE);}
  
  exit_flag1 = FALSE;
  while (!exit_flag1) {
    fgets(misc_string, sizeof(misc_string), wh_fp);
    sscanf(misc_string, " %s %s", keyword, info);
    /* fprintf(stderr, "read in: `%s'  and  `%s'\n", keyword, info); */
    if (strcmp(keyword, "number_of_frames:") == 0)
      {nframes = atoi(info);}
    else if (strcmp(keyword, "input_info_string:") == 0)
      {}    /* ignore input string */
    else if (strcmp(keyword, "frame_period:") == 0)
      {}    /* ignore input string */
    else if (strcmp(keyword, "features_from:") == 0)
      {}    /* ignore input string */
    else if (strcmp(keyword, "data:") == 0) {
      /* stop processing header and start reading data */
      exit_flag1 = TRUE; }
    else {fprintf(stderr,"*** ignoring unknown wh header field: `%s'\n", keyword);}
  }

  nwh = 0;
  exit_flag2 = FALSE;
  while(!exit_flag2) {
    status = fscanf(wh_fp, " %s %d %d %f ) ",
		    raw_label, &hyps[nwh].start, &hyps[nwh].end, &hyps[nwh].score);
    if (status == EOF)
      {exit_flag2 = TRUE;}
    else if (status != 4) {
      fprintf(stderr, "fscanf returned %d; something's wrong\n",status);
      fprintf(stderr, "%d templates read in \n", nwh);
      exit_flag2 = TRUE;}
    else {
      clip_word_name(hyps[nwh].label,raw_label);
      hyps[nwh].end   = hyps[nwh].end * FRAME_SPACING;
      hyps[nwh].start = hyps[nwh].start * FRAME_SPACING;
      hyps[nwh].len = hyps[nwh].end - hyps[nwh].start;

      /* printf("hypo `%s' from %d to %d with score %f\n",
	     hyps[nwh].label, hyps[nwh].start, hyps[nwh].end, hyps[nwh].score);  */
      nwh++;
    }
  }
  *nwh_ptr = nwh;
  *nframes_ptr = nframes;

  fclose(wh_fp);
}


/*-----------------------------------------------------------------------------*/
write_wh_file_hdr(file_ptr, input_ptr)
     FILE *file_ptr;  struct signature *input_ptr;
{
  fprintf(file_ptr, "%s\n", wh_id_string);
  fprintf(file_ptr, "number_of_frames: %d\n", input_ptr->nframes);
  fprintf(file_ptr, "input_info_string: %s\n", input_ptr->info_string);
  fprintf(file_ptr, "features_from: %s \n", input_ptr->feature_type);
  fprintf(file_ptr, "frame_period: %d \n", input_ptr->frame_period);
  fprintf(file_ptr, "data:\n");
}

write_wh_file_line(file_ptr, string, length, start, score)
     char *string; int length; int start; float score;
{
  fprintf(file_ptr,  " %s  %d %d  %f \n",  string, start, start + length, score);
}

/*=============================================================================*/
/* given a word, looks it up in the veridical list (labels)
   and return the index. If it doesn't occur there, so state.
   Assume each word appears only once per sentence. */
lookup_correct(word, labels, nlabels)
     char *word; struct whyp labels[MAX_LABELS]; int nlabels;
{
  int i;
  /* printf("looking up `%s',  ", word); */

  for(i = 0; i < nlabels; i++) {
    /* printf(" labels[%d]=`%s';  ", i, labels[i].label);   */
    if (strcmp(word, labels[i].label) == 0)
      return (i); }
  return(NOT_FOUND);
}

/*-----------------------------------------------------------------------------*/
/* extracts "john" from "john-fm-jms", "john-xxx-yyy" etc */ 
clip_word_name(output, raw_label)   char *output;  char *raw_label;
{
  int i;  char this_char;

  for (i = 0; i < MAX_WORD_LEN; i++) {
    this_char = raw_label[i];
    if (this_char  == '-' || this_char == NULL || this_char == ' ') {
      output[i] = NULL;
      break; }
    else
      output[i] = this_char; } }


/*=============================================================================*/
/* #define BEEP_LEN (SAMPLING_RATE / 50) */    /* fiftieth of a second */
#define BEEP_LEN 16  /* was 32 */
/* guitar 6th string (open E) should have frequency 329.6 Hz */
#define NOTE_FREQ  329.6  /* hertz */ 
#define AMPLITUDE  2. 
/* actually a click, not a beep, 
   however, if it's short enough, it's not objectionable to the ear */
beep()
{
  FILE *out_fd; 
  char data[BEEP_LEN];    
  int i, bytes_written;

  for (i = 0; i > BEEP_LEN; i++) {
    data[i] = audio_s2u( (short) (AMPLITUDE *
				  sin( (double)
				      (2 * M_PI * NOTE_FREQ * i / SAMPLING_RATE))));}
  out_fd = (FILE *) open("/dev/audio", O_WRONLY); 
  if ((int) out_fd < 0) fprintf(stderr, "couldn't open /dev/audio to beep \n");
  bytes_written = write(out_fd, data, BEEP_LEN);
  if (bytes_written  < BEEP_LEN) fprintf(stderr, "couldn't write full beep \n");
  close(out_fd);
}

/* ============================================================================= */
