/***********************************************************************/
/*  This file contains the main loop to generate .par files to be
    processed by one of the klatt programs.

    Aaron Smith
    Brown University
    20 April 1991

    This version uses a second order diff eq to do smoothing between
    segments.
*/
#include <stdio.h>
#include <curses.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "proto.h"
#include "phonemes.h"
#include "features.h"
#include "gen.h"

genpar_t Frames[MAX_FRAMES];
genpar_t LastTarget;

float f0[2];
float f1[2];
float f2[2];
float f3[2];
float bw1[2];
float bw2[2];
float bw3[2];
float av[2];
static float s_f0;
static float s_f1;
static float s_f2;
static float s_f3;
static float s_bw1;
static float s_bw2;
static float s_bw3;
static float s_av;
float fnp;
float fnz;
float af;
float ab;
float a1;
float a2;
float a3;
float a4;
float a5;
float a6;
float asp;
int total_frame_count;

int f0_change = 0;

void
save_file(Frames,frame_count,file)
genpar_ptr Frames;
int frame_count;
FILE * file;
{
 int i;
 for (i = 0; i < frame_count; i++)
  {
   fprintf(file, " %d  %d %d %d  %d %d %d  %d %d %d %d %d %d %d %d %d %d %d %d %d\n",
           Frames[i].f0,
           Frames[i].f1, Frames[i].f2, Frames[i].f3,
           Frames[i].bw1, Frames[i].bw2, Frames[i].bw3,
           Frames[i].fnz, Frames[i].fnp, Frames[i].av,
           Frames[i].tlt, Frames[i].asp, Frames[i].af,
           Frames[i].a1, Frames[i].a2, Frames[i].a3,
           Frames[i].a4, Frames[i].a5, Frames[i].a6,
           Frames[i].ab);
  }
}

/*TODO clumsy - can't we use a pipe or something ? */
void
play_it(Frames,frame_count)
genpar_ptr Frames;
int frame_count;
{
 FILE *temp = fopen("tmp.par", "w+");
 save_file(Frames,frame_count, temp);
 fclose(temp);
 system("sklatt tmp.par");
}

void
gen_pause(t)
int t;
{
 int k;
 t /= 10;
 for (k = 0; k < t; k++)
  {
#if 0
   Frames[total_frame_count].f0  = LastTarget.f0;
   Frames[total_frame_count].f1  = LastTarget.f1;
   Frames[total_frame_count].f2  = LastTarget.f2;
   Frames[total_frame_count].f3  = LastTarget.f3;
   Frames[total_frame_count].bw1 = LastTarget.bw1;
   Frames[total_frame_count].bw2 = LastTarget.bw2;
   Frames[total_frame_count].bw3 = LastTarget.bw3;
   Frames[total_frame_count].fnz = LastTarget.fnz;
   Frames[total_frame_count].fnp = LastTarget.fnp;
   Frames[total_frame_count].av  = 0;
   Frames[total_frame_count].tlt = LastTarget.tlt;
   Frames[total_frame_count].asp = 0;
   Frames[total_frame_count].af  = 0;
   Frames[total_frame_count].a1  = LastTarget.a1;
   Frames[total_frame_count].a2  = LastTarget.a2;
   Frames[total_frame_count].a3 = LastTarget.a3;
   Frames[total_frame_count].a4 = LastTarget.a4;
   Frames[total_frame_count].a5 = LastTarget.a5;
   Frames[total_frame_count].a6 = LastTarget.a6;
   Frames[total_frame_count].ab = LastTarget.ab;
#else
   Frames[total_frame_count].f0 = 100;
   Frames[total_frame_count].f1 = 100;
   Frames[total_frame_count].f2 = 200;
   Frames[total_frame_count].f3 = 300;
   Frames[total_frame_count].bw1 = 100;
   Frames[total_frame_count].bw2 = 200;
   Frames[total_frame_count].bw3 = 300;
   Frames[total_frame_count].fnz = 200;
   Frames[total_frame_count].fnp = 200;
   Frames[total_frame_count].av = 0;
   Frames[total_frame_count].tlt = 0;
   Frames[total_frame_count].asp = 0;
   Frames[total_frame_count].af = 0;
   Frames[total_frame_count].a1 = 0;
   Frames[total_frame_count].a2 = 0;
   Frames[total_frame_count].a3 = 0;
   Frames[total_frame_count].a4 = 0;
   Frames[total_frame_count].a5 = 0;
   Frames[total_frame_count].a6 = 0;
   Frames[total_frame_count].ab = 0;
#endif
   total_frame_count++;
  }
}

/* the following looks up initial values for each of the parameters in the
   database.  If the phoneme is a vowel with a dipthong, f[1] is filled with
   the dipthong target.  Otherwise, f[1] = f[0].
*/
int
set_params(type, off, length)
int type;
int off;
float length;
{
 int return_val = 0;

 fnp = 250;
 fnz = 250;

 switch (type)
  {
   case VOWEL_TYPE:
    {
     if (off >= NUM_VOWELS)
      abort();
     asp = ab = af = 0;
     a1 = a2 = a3 = a4 = a5 = a6 = 60;
     av[0] = VOWEL_AV;
     av[1] = VOWEL_AV;
     /* START = 0, END = 1 */
     f1[START] = (float) Vowels[off][START][F1];
     f1[END] = (float) Vowels[off][END][F1];
     f2[START] = (float) Vowels[off][START][F2];
     f2[END] = (float) Vowels[off][END][F2];
     f3[START] = (float) Vowels[off][START][F3];
     f3[END] = (float) Vowels[off][END][F3];
     bw1[START] = (float) Vowels[off][START][BW1];
     bw1[END] = (float) Vowels[off][END][BW1];
     bw2[START] = (float) Vowels[off][START][BW2];
     bw2[END] = (float) Vowels[off][END][BW2];
     bw3[START] = (float) Vowels[off][START][BW3];
     bw3[END] = (float) Vowels[off][END][BW3];

     s_f0 = (f0[1] - f0[0]) / length;
     s_f1 = (f1[1] - f1[0]) / length;
     s_f2 = (f2[1] - f2[0]) / length;
     s_f3 = (f3[1] - f3[0]) / length;
     s_bw1 = (bw1[1] - bw1[0]) / length;
     s_bw2 = (bw2[1] - bw2[0]) / length;
     s_bw3 = (bw3[1] - bw3[0]) / length;
     s_av = (av[1] - av[0]) / length;
     return_val = 1;
     break;
    }
   case SONORANT_TYPE:
    {
     if (off >= NUM_SONORANTS)
      abort();
     a1 = a2 = a3 = a4 = a5 = a6 = 60;
     ab = 0;
     af = 0;
     asp = (float) Sonorants[off][7];
     av[0] = (float) Sonorants[off][AV];
     f1[0] = (float) Sonorants[off][F1];
     f2[0] = (float) Sonorants[off][F2];
     f3[0] = (float) Sonorants[off][F3];
     bw1[0] = (float) Sonorants[off][BW1];
     bw2[0] = (float) Sonorants[off][BW2];
     bw3[0] = (float) Sonorants[off][BW3];
     fnp = 250.0;
     fnz = 250.0;
     break;
    }
   case NASAL_TYPE:
    {
     if (off >= NUM_NASALS)
      abort();
     a1 = a2 = a3 = a4 = a5 = a6 = 60;
     ab = af = asp = 0;
     av[0] = (float) Nasals[off][8];
     f1[0] = (float) Nasals[off][F1];
     f2[0] = (float) Nasals[off][F2];
     f3[0] = (float) Nasals[off][F3];
     bw1[0] = (float) Nasals[off][BW1];
     bw2[0] = (float) Nasals[off][BW2];
     bw3[0] = (float) Nasals[off][BW3];
     fnp = (float) Nasals[off][FNP];
     fnz = (float) Nasals[off][FNZ];
     break;
    }
   case FRICATIVE_TYPE:
    {
     if (off >= NUM_FRICATIVES)
      abort();
     a1 = 0;
     a2 = (float) Fricatives[off][A2];
     a3 = (float) Fricatives[off][A3];
     a4 = (float) Fricatives[off][A4];
     a5 = (float) Fricatives[off][A5];
     a6 = (float) Fricatives[off][A6];
     ab = (float) Fricatives[off][AB];

     av[0] = (float) Fricatives[off][AV];
     f1[0] = (float) Fricatives[off][F1];
     f2[0] = (float) Fricatives[off][F2];
     f3[0] = (float) Fricatives[off][F3];
     bw1[0] = (float) Fricatives[off][BW1];
     bw2[0] = (float) Fricatives[off][BW2];
     bw3[0] = (float) Fricatives[off][BW3];
     asp = (float) Fricatives[off][ASP];
     af = (float) Fricatives[off][AF];
     break;
    }
   case PLOSIVE_TYPE:
    {
     if (off >= NUM_PLOSIVES)
      abort();
     a1 = 0;
     a2 = (float) Plosives[off][A2];
     a3 = (float) Plosives[off][A3];
     a4 = (float) Plosives[off][A4];
     a5 = (float) Plosives[off][A5];
     a6 = (float) Plosives[off][A6];
     ab = (float) Plosives[off][AB];

     av[0] = (float) Plosives[off][AV];
     f1[0] = (float) Plosives[off][F1];
     f2[0] = (float) Plosives[off][F2];
     f3[0] = (float) Plosives[off][F3];
     bw1[0] = (float) Plosives[off][BW1];
     bw2[0] = (float) Plosives[off][BW2];
     bw3[0] = (float) Plosives[off][BW3];
     asp = (float) Plosives[off][ASP];
     af = (float) Plosives[off][AF];
     break;
    }
   case AFFRICATE_TYPE:
    {
     if (off >= NUM_AFFRICATES)
      abort();
     a1 = 0;
     a2 = (float) Affricates[off][A2];
     a3 = (float) Affricates[off][A3];
     a4 = (float) Affricates[off][A4];
     a5 = (float) Affricates[off][A5];
     a6 = (float) Affricates[off][A6];
     ab = (float) Affricates[off][AB];

     av[0] = (float) Affricates[off][AV];
     f1[0] = (float) Affricates[off][F1];
     f2[0] = (float) Affricates[off][F2];
     f3[0] = (float) Affricates[off][F3];
     bw1[0] = (float) Affricates[off][BW1];
     bw2[0] = (float) Affricates[off][BW2];
     bw3[0] = (float) Affricates[off][BW3];
     asp = (float) Affricates[off][ASP];
     af = (float) Affricates[off][AF];
     break;
    }
   case PSUEDO_VOWEL_TYPE:
    {
     if (off >= NUM_PSEUDO_VOWELS)
      abort();
     a1 = 0;
     a2 = (float) Psuedo_vowels[off][A2];
     a3 = (float) Psuedo_vowels[off][A3];
     a4 = (float) Psuedo_vowels[off][A4];
     a5 = (float) Psuedo_vowels[off][A5];
     a6 = (float) Psuedo_vowels[off][A6];
     ab = (float) Psuedo_vowels[off][AB];

     av[0] = (float) Psuedo_vowels[off][AV];
     f1[0] = (float) Psuedo_vowels[off][F1];
     f2[0] = (float) Psuedo_vowels[off][F2];
     f3[0] = (float) Psuedo_vowels[off][F3];
     bw1[0] = (float) Psuedo_vowels[off][BW1];
     bw2[0] = (float) Psuedo_vowels[off][BW2];
     bw3[0] = (float) Psuedo_vowels[off][BW3];
     asp = (float) Psuedo_vowels[off][ASP];
     break;
    }
   default:
    abort();
    break;
  }
 return return_val;
}

/*
  This seems to be only required because of the
  the way the parameters of the different types are stored
  differently.  see comment in features.c

  Actual generate uses bits in features to decide on "type".
*/
int
translate_phone(ph,offset)
int ph;
int *offset;

{
 if (ph < VOWELS_START || ph >= ALL_END)
  {
   *offset = -1;
   return LEXICAL_TYPE;
  }
 else if (ph <= VOWELS_END)
  {
   *offset = ph - VOWELS_START;
   return VOWEL_TYPE;
  }
 else if (ph <= SONORANTS_END)
  {
   *offset = ph - SONORANTS_START;
   return SONORANT_TYPE;
  }
 else if (ph <= NASALS_END)
  {
   *offset = ph - NASALS_START;
   return NASAL_TYPE;
  }
 else if (ph <= FRICATIVES_END)
  {
   *offset = ph - FRICATIVES_START;
   return FRICATIVE_TYPE;
  }
 else if (ph <= PLOSIVES_END)
  {
   *offset = ph - PLOSIVES_START;
   return PLOSIVE_TYPE;
  }
 else if (ph <= AFFRICATES_END)
  {
   *offset = ph - AFFRICATES_START;
   return AFFRICATE_TYPE;
  }
 else if (ph <= PSUEDO_VOWELS_END)
  {
   *offset = ph - PSUEDO_VOWELS_START;
   return PSUEDO_VOWEL_TYPE;
  }
}

void
generate(ph_list,count)
int *ph_list;
int count;
{
 /* was 10 */
 int dur_list[120];
 unsigned long prev_feat = 0;
 int new_word = 1;

 int i;
 a1 = 0;

 f0[0] = 1300;     /* inital F0 */
 f0[1] = 1300;
 total_frame_count = 0;

 /* calculate the length of each segment */
 segdur(ph_list, dur_list, count);

 for (i = 0; i < count; i++)
  {int phone = ph_list[i];
   f0_change = 0;
   if (phone >= VOWELS_START && phone < ALL_END)
    {
     unsigned long features = Features[phone];
     int off;
     /* translate to array type and offset */
     int type                = translate_phone(phone, &off);
     int next_phone          = (i < count) ? ph_list[i + 1] : lEND_LIST;
     unsigned long next_feat = Features[next_phone];
     int length = dur_list[i] / 10;
     int interp = set_params(type, off, (float) length);

     /* generate according to phoneme type */
     if (features & PLOSI)
      {
       gen_plosive(phone, dur_list[i], prev_feat, next_feat, next_phone);
      }
     else if (features & FRICA)
      {
       gen_fricative(phone, dur_list[i], prev_feat, next_feat, next_phone);
      }
     else if (features & VOWEL)
      {
       gen_vowel(phone, dur_list[i], prev_feat, next_feat, next_phone);
      }
     else if (features & SONOR)
      {
       /* also seems to include NASALs */
       gen_sonorant(phone, dur_list[i], prev_feat, next_feat, next_phone);
      }
     else
      {int j;
       /* PSEUDO_VOWEL(s) get here */
       if (phone != pAXP)
        {
         printf("%s is one of the odd ones\n", Phoneme_name[phone]);
        }
       for (j = 0; j < length; j++)
        {
         Frames[total_frame_count].f0 = (int) f0[0];
         Frames[total_frame_count].f1 = (int) f1[0];
         Frames[total_frame_count].f2 = (int) f2[0];
         Frames[total_frame_count].f3 = (int) f3[0];
         Frames[total_frame_count].bw1 = (int) bw1[0];
         Frames[total_frame_count].bw2 = (int) bw2[0];
         Frames[total_frame_count].bw3 = (int) bw3[0];
         Frames[total_frame_count].fnz = (int) fnz;
         Frames[total_frame_count].fnp = (int) fnp;
         Frames[total_frame_count].av = (int) av[0];
         Frames[total_frame_count].tlt = 0;
         Frames[total_frame_count].asp = (int) asp;
         Frames[total_frame_count].af = (int) af;
         Frames[total_frame_count].a1 = (int) a1;
         Frames[total_frame_count].a2 = (int) a2;
         Frames[total_frame_count].a3 = (int) a3;
         Frames[total_frame_count].a4 = (int) a4;
         Frames[total_frame_count].a5 = (int) a5;
         Frames[total_frame_count].a6 = (int) a6;
         Frames[total_frame_count].ab = (int) ab;

#if 0
         /* the following is a hack to give the F0 some life */
         if (j < (total_frame_count / 3))
          f0[0] += 10;
         else
          f0[0] -= 10;
#endif

         if (interp)
          {
           f0[0] += s_f0;
           f1[0] += s_f1;
           f2[0] += s_f2;
           f3[0] += s_f3;
           bw1[0] += s_bw1;
           bw2[0] += s_bw2;
           bw3[0] += s_bw3;
           av[0] += s_av;
          }
         total_frame_count++;
        }
      }
     new_word  = 0;
     prev_feat = features;
    }
   else if (phone == lEND_LIST)
    {
     /* something arround end of last word did this,
        need to resolve what it is supposed to do
     */
     f0_change = -1000;
     gen_pause(WORD_BREAK_TIME/5);
    }
   else if (phone == lWORD_BREAK)
    {
     gen_pause(WORD_BREAK_TIME);
     prev_feat = 0;
     new_word  = 1;
    }
   else
    {
     /* don't do anything at all for lSYLLABLE */
    }
  }
}

/*TODO How about being able to generate from command line arguments ?
  Need to pass volume & port down to hplay()
*/

int
main(argc,argv)
int argc;
char **argv;
{
 int ph_list[120];
 int count;

 while ((count = read_phonemes(ph_list)))
  {
   generate(ph_list, count);
   play_it(Frames,total_frame_count);
  }
 unlink("tmp.par");
 return 0;
}
