# include "Tools.h"
# include "Timit.h"
# include "RM.h"

# define DEFAULT_ABIAS		1.0
# define DEFAULT_PBIAS		1.0
# define DEFAULT_WBIAS_NOG	12.0
# define DEFAULT_WBIAS_WPG	10.0
# define DEFAULT_MAXLOAD	8.0
# define DEFAULT_SLP		RM1_DEFAULT_DUR_OFF
# define DEFAULT_FRAME		256
# define UNASSIGNED_DIST	(1 << 29)
# define MAX_GLOBAL_DIST	(1 << 28)
# define OLD_MAX_NNODE		15828 	/* must be less than MAX_SHORT */
# define MAX_NNODE		22749 	/* must be less than MAX_SHORT */
# define OLD_MAX_NHEAD		15828
# define MAX_NHEAD		22749
# define END_OF_TREE		(-1)
# define DEFAULT_WPLIST		RM1_DEFAULT_WPLIST
# define EOW_MASK		(1 << 15)

# define DEFAULT_ACT_PREFIX	RM1_LOCAL_PATH
# define DEFAULT_PHN_PREFIX	RM1_LOCAL_PATH
# define DEFAULT_WRD_PREFIX	RM1_LOCAL_PATH
# define DEFAULT_ACT_SUFFIX	".lna"
# define DEFAULT_PHN_SUFFIX	".Phn"
# define DEFAULT_WRD_SUFFIX_NOG	".wno"
# define DEFAULT_WRD_SUFFIX_WPG	".wwp"
# define DEFAULT_PHNTAB		RM1_DEFAULT_PHNTAB

# define NOREALPAIN
# define REALPAIN_MAXFRAME	512
# define REALPAIN_MAXNSILENCE	32

/* # define WANTBOUNDARIES */

typedef struct node_struct {
  struct node_struct *next;
  int    down, phone, word;
} node_type;

char  **read_dict(char*, int*, int, int, char**, int);
short **read_wplist(char*, int);
short **forced_wplist(int);
void  print_phn(ushort**, int*, char**, int, int*, int, int, FILE*);
void  print_wrd(ushort**, int*, char**, int, int*, int, int, int, FILE*);
void  print_nist(ushort**, int*, char**, int, int*, int, int, int, FILE*);
void  print_tree(node_type*, char**, char**);
void  print_wplist(short**, char**, int);

static int	   nnode = 0;
static node_type   node_stack[MAX_NNODE];
static int         nroot, nhead = 0;
static node_type   *head_stack[MAX_NHEAD];
static int	   head2node[MAX_NHEAD];

int main(int argc, char **argv) {
  FILE  *fdbl, *fnist;
  float abias, pbias, *selfprob;
#ifdef MAXLOAD
  float maxload;
#endif
  int	*slp, *cslp, *min_delay, *prior, *boundarymap;
  int	**emissiondd,    **global_rootdd,    **global_nodedd,   **from_node;
  int   **emissionddBUF, **global_rootddBUF, **global_nodeddBUF,**from_nodeBUF;
  node_type *wpgram_last_node;
  int   sentence_init_index = RM_NOTAWORD, sentence_quit_index = RM_NOTAWORD;
  int 	frame, nframe, nboundary, print_silence;
  int	i, j, k, phn, wrd, nphone, nword, wbias,  max_min_delay, modbase;
  int   debug, verbose, wpgram, forced, usingtree, conv2lna, nist, nskip = 0;
  char  *act_prefix, *phn_prefix, *wrd_prefix;
  char  *act_suffix, *phn_suffix, *wrd_suffix;
  char  basename[MAXPATHLEN];
  ushort **best_from;
  short **wplist;
  uchar **data;
  char  **plist, **wlist;
  
  if(Scan_flag(argc, argv, "-h") || argc < 2 ||
     (fdbl = Std_fopen(argv[argc - 1], "r")) == NULL) {
    fprintf(stderr, "Syntax:\t%s\n", *argv);
    fprintf(stderr, "\t-phn\n");
    fprintf(stderr, "\t-wrd\n");
    fprintf(stderr, "\t-debug\n");
    fprintf(stderr, "\t-bounds\n");
    fprintf(stderr, "\t-forced\n");
    fprintf(stderr, "\t-wpgram\n");
    fprintf(stderr, "\t-verbose\n");
    fprintf(stderr, "\t-conv2lna\n");
    fprintf(stderr, "\t-print_silence\n");
    fprintf(stderr, "\t-no_optional_silence\n");
    fprintf(stderr, "\t-print_wplist_exit\n");
    fprintf(stderr, "\t-print_duration_exit\n");
    fprintf(stderr, "\t-print_dictionary_exit\n");
    fprintf(stderr, "\t-frame %d\n", DEFAULT_FRAME);
    fprintf(stderr, "\t-abias %f\n", DEFAULT_ABIAS);
    fprintf(stderr, "\t-pbias %f\n", DEFAULT_PBIAS);
    fprintf(stderr, "\t-wbias <%f|%f>\n", DEFAULT_WBIAS_NOG,DEFAULT_WBIAS_WPG);
#ifdef MAXLOAD
    fprintf(stderr, "\t-maxload %f\n", DEFAULT_MAXLOAD);
#endif
    fprintf(stderr, "\t-slp %s\n", DEFAULT_SLP);
    fprintf(stderr, "\t-dict <%s|%s>\n", RM1_DEFAULT_RMDICT, RM1_DEFAULT_RMDICT_WPG);
    fprintf(stderr, "\t-wplist %s\n", DEFAULT_WPLIST);
    fprintf(stderr, "\t-phntab %s\n", DEFAULT_PHNTAB);
    fprintf(stderr, "\t-act_prefix %s\n", DEFAULT_ACT_PREFIX);
    fprintf(stderr, "\t-phn_prefix %s\n", DEFAULT_PHN_PREFIX);
    fprintf(stderr, "\t-wrd_prefix %s\n", DEFAULT_WRD_PREFIX);
    fprintf(stderr, "\t-act_suffix %s\n", DEFAULT_ACT_SUFFIX);
    fprintf(stderr, "\t-phn_suffix %s\n", DEFAULT_PHN_SUFFIX);
    fprintf(stderr, "\t-wrd_suffix <%s|%s>\n", DEFAULT_WRD_SUFFIX_NOG,
	    DEFAULT_WRD_SUFFIX_WPG);
    fprintf(stderr, "\t-nist <nist output file>\n");
    fprintf(stderr, "\t<-|list of files file>\n");
    exit(1);
  }

  phn	   = Scan_flag(argc, argv, "-phn");
  wrd	   = Scan_flag(argc, argv, "-wrd");
  nist 	   = Scan_flag(argc, argv, "-nist");
  debug	   = Scan_flag(argc, argv, "-debug");
  forced   = Scan_flag(argc, argv, "-forced");
  wpgram   = Scan_flag(argc, argv, "-wpgram");
  verbose  = Scan_flag(argc, argv, "-verbose");
  conv2lna = Scan_flag(argc, argv, "-conv2lna");
  frame	   = Scan_int(argc, argv, "-frame", DEFAULT_FRAME);
  abias    = Scan_double(argc, argv, "-abias", DEFAULT_ABIAS);
  pbias    = Scan_double(argc, argv, "-pbias", DEFAULT_PBIAS);
#ifdef MAXLOAD
  maxload  = Scan_double(argc, argv, "-maxload", DEFAULT_MAXLOAD);
#endif
  act_prefix = Scan_string(argc, argv, "-act_prefix", DEFAULT_ACT_PREFIX);
  phn_prefix = Scan_string(argc, argv, "-phn_prefix", DEFAULT_PHN_PREFIX);
  wrd_prefix = Scan_string(argc, argv, "-wrd_prefix", DEFAULT_WRD_PREFIX);
  act_suffix = Scan_string(argc, argv, "-act_suffix", DEFAULT_ACT_SUFFIX);
  phn_suffix = Scan_string(argc, argv, "-phn_suffix", DEFAULT_PHN_SUFFIX);
  print_silence = Scan_flag(argc, argv, "-print_silence");

  if(nist) {
    int ctmp, i;
    if(strcmp(Scan_string(argc, argv, "-nist", "-"), "-") != 0 &&
       (fnist = fopen(Scan_string(argc, argv, "-nist", ""), "r") )!= NULL) {
	while((ctmp = getc(fnist)) != EOF)
	  if(ctmp == '\n') nskip++;
	Panic_fclose(fnist);
	for(i = 0; i < nskip; i++)
	  if(fscanf(fdbl, "%s\n", basename) != 1)
	    Panic("%s: Problem skipping processed files\n", *argv);
      }
    fnist = Std_fopen(Scan_string(argc, argv, "-nist", "-"), "a");
    (void) setbuf(fnist, NULL);
  }
  if(verbose) Panic_fprintf_args(argc, argv, stdout);
  plist = Read_table(Scan_string(argc, argv,"-phntab",DEFAULT_PHNTAB),&nphone);

  { FILE *fslp = Panic_fopen(Scan_string(argc, argv, "-slp", DEFAULT_SLP),"r");
    
    slp       = Panic_int_array(nphone);
    cslp      = Panic_int_array(nphone); 
    min_delay = Panic_int_array(nphone);
    prior     = Panic_int_array(nphone);
    selfprob  = Panic_float_array(nphone);

    Panic_fread((char*) selfprob, nphone, sizeof(float), fslp);

    for(i = 0; i < nphone; i++) {
      slp[i]  = - LNPROB_FLOAT2INT * log(selfprob[i] + VERY_SMALL);
      cslp[i] = - LNPROB_FLOAT2INT * log(1.0 - selfprob[i]);
      if(debug) printf("%s\t%f\n", plist[i], selfprob[i]);
    }

    Panic_fread((char*) min_delay, nphone, sizeof(*min_delay), fslp);
    max_min_delay = 0;
    for(i = 0; i < nphone; i++)
      if(max_min_delay < min_delay[i]) 
	max_min_delay = min_delay[i];
    modbase = max_min_delay + 1;

    if(Scan_flag(argc, argv, "-bounds") && modbase != 2)
      Panic("%s: can't cope with -bounds and non-unity minimum durations\n");

    {
      float *fltprior = Panic_float_array(nphone);
      Panic_fread((char*) fltprior, nphone, sizeof(float), fslp);

      for(i = 0; i < nphone; i++)
	prior[i] = pbias * LNPROB_FLOAT2INT * log(fltprior[i]); /* inverted! */

      Panic_free((char*) fltprior);
    }

    Panic_fclose(fslp);
  }

  if(Scan_flag(argc, argv, "-print_duration_exit")) {
    printf("phone\tmin\tloop\ttotal\tround\tnew\told\n");
    for(i = 0; i < nphone; i++) {
      int max = 0.5 * (min_delay[i] - 1 + 1.0/(1.0 - selfprob[i])) + 0.5;
      printf("%s\t%d\t%4.2f\t%4.2f\t%d\t%d\t%d\n", plist[i], min_delay[i] - 1, 
	     1.0 / (1.0 - selfprob[i]),
	     min_delay[i] - 1 + 1.0 / (1.0 - selfprob[i]),
	     (int) floor(min_delay[i] - 1 + 1.0 / (1.0 - selfprob[i]) + 0.5),
	     max, min_delay[i]);
    }
    exit(0);
  }

  usingtree = forced || wpgram;
  if(!wpgram) {
    wrd_suffix = Scan_string(argc, argv, "-wrd_suffix",DEFAULT_WRD_SUFFIX_NOG);
    wbias =LNPROB_FLOAT2INT*Scan_double(argc, argv,"-wbias",DEFAULT_WBIAS_NOG);
    wlist = read_dict(Scan_string(argc, argv, "-dict", RM1_DEFAULT_RMDICT),
		      &nword, usingtree,
		      !Scan_flag(argc, argv, "-no_optional_silence"),
		      plist, nphone);
  }
  else {
    wrd_suffix = Scan_string(argc, argv, "-wrd_suffix",DEFAULT_WRD_SUFFIX_WPG);
    wbias =LNPROB_FLOAT2INT*Scan_double(argc, argv,"-wbias",DEFAULT_WBIAS_WPG);
    wlist = read_dict(Scan_string(argc, argv, "-dict", RM1_DEFAULT_RMDICT_WPG),
		      &nword, usingtree,
		      !Scan_flag(argc, argv, "-no_optional_silence"),
		      plist, nphone);
  }
  if(Scan_flag(argc, argv, "-print_dictionary_exit")) {
    for(i = 0; i < nroot; i++) print_tree(head_stack[i], wlist, plist);
    exit(0);
  }
  sentence_init_index = String2index("@INIT", wlist, nword);
  sentence_quit_index = String2index("@QUIT", wlist, nword);
  for(wpgram_last_node = head_stack[sentence_quit_index];
      wpgram_last_node->word == RM_NOTAWORD;
      wpgram_last_node = head_stack[wpgram_last_node->down]) {
  }

  if(forced) wplist = forced_wplist(nword);
  else if(wpgram) wplist = read_wplist(Scan_string(argc, argv, "-wplist",
						   DEFAULT_WPLIST), nword);
  else wplist = NULL;

  if(Scan_flag(argc, argv, "-print_wplist_exit")) {
    if(wplist == NULL)
      printf("wplist == NULL\n");
    else
      for(i = 0; i < nword; i++) {
	short *wplist_word = wplist[i];
    
	while((j = *wplist_word++) != RM_NOTAWORD)
	  printf("%s\t%s\n", wlist[i], wlist[j]);
      }
    exit(0);
  }

  if(debug) printf("# nnode: %d\tnhead: %d\n", nnode, nhead);
  if(debug) printf("modbase: %d\n", modbase);

  emissiondd    = (int**) Panic_pointer_array(modbase);
  global_nodedd = (int**) Panic_pointer_array(modbase);
  global_rootdd = (int**) Panic_pointer_array(modbase);
  from_node	= (int**) Panic_pointer_array(modbase);
  RETRY(((emissionddBUF    = Int_2d_array(modbase, nphone)) == NULL));
  RETRY(((global_rootddBUF = Int_2d_array(modbase, nhead))  == NULL));
  RETRY(((global_nodeddBUF = Int_2d_array(modbase, nnode))  == NULL));
  RETRY(((from_nodeBUF     = Int_2d_array(modbase, nhead))  == NULL));

#ifndef REALPAIN
  while(fscanf(fdbl, "%s\n", basename) == 1) {
#else
  nframe = nboundary = REALPAIN_MAXFRAME;
  RETRY(((data = Uchar_2d_array(1, nphone + 1)) == NULL));
  RETRY(((boundarymap = Int_array(nboundary + 1))== NULL));
  for(i = 0; i < nframe + 1; i++) boundarymap[i] = i;
  RETRY(((best_from = Ushort_2d_array(nboundary, nnode))== NULL));

  while(!feof(stdin)) {
#endif
    FILE *fact;

#ifndef REALPAIN
#ifdef MAXLOAD
    Nap(maxload);
#endif

    fact = Panic_fopen_3bits(act_prefix, basename, act_suffix, "r");
    Panic_fseek(fact, 0, 2);
    nframe = ftell(fact) / (nphone + 1);
    rewind(fact);

    RETRY(((data = Uchar_2d_array(nframe, nphone + 1)) == NULL));

    Panic_fread(*data, nphone + 1, nframe, fact);
    Panic_fclose(fact);

#ifdef WANTBOUNDARIES
    if(!Scan_flag(argc, argv, "-bounds"))
      for(i = 0; i < nframe; i++) data[i][0] |= TIMIT_BOUNDARY_MASK;

    for(i = 0, nboundary = 0; i < nframe; i++)
      if(data[i][0] & TIMIT_BOUNDARY_MASK) nboundary++;
#else
    nboundary = nframe;
#endif

    RETRY(((boundarymap = Int_array(nboundary + 1)) == NULL));
    RETRY(((best_from = Ushort_2d_array(nboundary, nnode)) == NULL));
    boundarymap[0] = 0;

#ifdef WANTBOUNDARIES
    for(i = 0, j = 1; i < nframe; i++)
      if(data[i][0] & TIMIT_BOUNDARY_MASK) boundarymap[j++] = i + 1;
#else
    for(i = 0, j = 1; i < nframe; i++) boundarymap[j++] = i + 1;
#endif

#endif
    for(i = 0; i < modbase; i++)
      for(j = 0; j < nphone; j++)
	emissionddBUF[i][j] = UNASSIGNED_DIST;

    for(i = 0; i < modbase; i++)
      for(j = 0; j < nnode; j++)
	global_nodeddBUF[i][j] = UNASSIGNED_DIST;

    for(i = 0; i < modbase; i++)
      for(j = 0; j < nroot; j++) {
	global_rootddBUF[i][j] = UNASSIGNED_DIST;
	from_nodeBUF[i][j] = (head_stack[sentence_init_index] - node_stack)
	  | EOW_MASK;
/*
	from_nodeBUF[i][j] = NULL;
*/
      }
    if(wpgram) global_rootddBUF[1][sentence_init_index] = 0;
    else global_rootddBUF[1][0] = 0;
    
#ifndef REALPAIN
    for(i = 0; i < nboundary; i++) {
#else    
    for(i = 0; i < nboundary && fread(*data, nphone + 1, 1, stdin) == 1 &&
	!(data[0][0] & TIMIT_SENT_MASK); i++) {
#endif
      ushort *best_from_i = best_from[i];

      for(j = 0; j < modbase; j++) {
	int wrappedj     = (modbase - (i % modbase) + j) % modbase;
	emissiondd[j]    = emissionddBUF[wrappedj];
	global_rootdd[j] = global_rootddBUF[wrappedj];
	global_nodedd[j] = global_nodeddBUF[wrappedj];
	from_node[j]	 = from_nodeBUF[wrappedj];
      }

      for(j = 0; j < nphone; j++) {
	emissiondd[1][j] = (boundarymap[i + 1] - boundarymap[i] - 1) * slp[j];

	for(k = boundarymap[i]; k < boundarymap[i + 1]; k++) {
#ifndef REALPAIN
	  int curr_data = data[k][j + 1];
#else
	  int curr_data = data[0][j + 1];
#endif
	  if(conv2lna) curr_data = - LNPROB_FLOAT2INT * 
	    log((curr_data + 1.0 / nphone) / MAX_UCHAR);
        
	  emissiondd[1][j] += floor(abias * (curr_data + prior[j]) + 0.5);
	}
	for(k = 2; k < modbase; k++)
	  emissiondd[k][j] += emissiondd[1][j];
      }

      for(j = 0; j < nroot; j++)
	global_rootdd[0][j] = UNASSIGNED_DIST;

      for(j = 0; j < nhead; j++) {
	node_type *pnode = head_stack[j];

	while(pnode != NULL) {
	  int k = pnode - node_stack;
	  int curr_phone = pnode->phone;
	  int curr_slp   =  slp[curr_phone];
	  int curr_cslp  = cslp[curr_phone];
	  int curr_min_delay = min_delay[curr_phone];
	  int opt1, optn, last_node;

	  if(j < nroot) {
	    int last_phone;
	    last_node = from_node[curr_min_delay][j];
	    last_phone = node_stack[last_node & ~EOW_MASK].phone;
            if(curr_phone == last_phone)
	      optn = global_rootdd[curr_min_delay][j] - curr_cslp + curr_slp +
		emissiondd[curr_min_delay][curr_phone];
	    else
	      optn = global_rootdd[curr_min_delay][j] +
		emissiondd[curr_min_delay][curr_phone];
#ifdef FEB15
	    if(curr_phone == last_phone)
	      optn = global_rootdd[curr_min_delay][j] +
		emissiondd[curr_min_delay][curr_phone];
	    else
	      optn = global_rootdd[curr_min_delay][j] - cslp[last_phone] +
		curr_slp + emissiondd[curr_min_delay][curr_phone];
#endif
#ifdef OLD
	    optn = global_rootdd[curr_min_delay][j] + 
	      emissiondd[curr_min_delay][curr_phone];
#endif
	  }
	  else {
	    last_node = head2node[j];
	    optn = global_nodedd[curr_min_delay][last_node] +
	           emissiondd[curr_min_delay][curr_phone] +
		   cslp[node_stack[last_node].phone];
	  }
	    
	  if((opt1 = global_nodedd[1][k] +emissiondd[1][curr_phone]+curr_slp) <
	      optn) {
	    global_nodedd[0][k] = opt1;
	    best_from_i[k] = k;
	  }
	  else {
	    global_nodedd[0][k] = optn;
	    best_from_i[k] = last_node;
	  }

	  if(pnode->word != RM_NOTAWORD) {
	    int poss_node_dist_word = global_nodedd[0][k] + curr_cslp + wbias;

	    if(usingtree) {
	      register int n;
	      short *wplist_word = wplist[pnode->word];

	      while((n = *wplist_word++) != RM_NOTAWORD) {
		if(poss_node_dist_word < global_rootdd[0][n]) {
		  global_rootdd[0][n] = poss_node_dist_word;
		  from_node[0][n] = k | EOW_MASK;
		}
	      }
	    }
	    else if(poss_node_dist_word < global_rootdd[0][0]) {
	      global_rootdd[0][0] = poss_node_dist_word;
	      from_node[0][0] = k | EOW_MASK;
	    }
	  }
	  pnode = pnode->next;
	}
      }
    }

    { int best_final_node;
      FILE *fphn= stdout, *fwrd = stdout;

      if(forced) best_final_node = head_stack[nword - 1] - node_stack;
      else if(wpgram) 
	best_final_node = wpgram_last_node - node_stack;
      else {
	for(j = 0, k = 1; k < nroot; k++)
	  if(global_rootdd[0][k] < global_rootdd[0][j]) j = k;
	best_final_node = from_node[0][j] & ~EOW_MASK;
      }

      if(verbose) printf("%s\t%d\t%d\t%d\n", basename, nframe, nboundary,
			 global_nodedd[0][best_final_node] / nframe);

      if(global_nodedd[0][best_final_node] > MAX_GLOBAL_DIST)
	Panic("%s: No valid parse found\n", *argv);

/* NOTE i == nboundary */

#ifndef REALPAIN
      if(phn) fphn = Panic_fopen_3bits(phn_prefix, basename, phn_suffix, "w");
      if(wrd) fwrd = Panic_fopen_3bits(wrd_prefix, basename, wrd_suffix, "w");
#endif
      if(phn) print_phn(best_from, min_delay, plist, frame, boundarymap,
			i - 1, best_final_node, fphn);
      if(wrd) print_wrd(best_from, min_delay, wlist, frame, boundarymap,
			i - 1, best_final_node, print_silence, fwrd);
      if(nist) {
	char *speaker, *sentence;
	int k;

	print_nist(best_from, min_delay, wlist, frame, boundarymap,
		   i - 1, best_final_node, print_silence, fnist);

	k = strlen(basename);
	while(basename[k] != '/') k--;
	sentence = basename + k + 1;
	basename[k] = '\0';
	while(basename[k] != '_') k--;
	basename[k] = '\0';
	while(basename[k] != '/') k--;
	speaker = basename + k + 1;
	for(k = 0; k < strlen(sentence); k++) sentence[k]=toupper(sentence[k]);
	for(k = 0; k < strlen(speaker); k++)  speaker[k] =toupper(speaker[k]);
	fprintf(fnist, "(%s-%s)\n", speaker, sentence);
      }
#ifndef REALPAIN
      if(phn) Panic_fclose(fphn);
      if(wrd) Panic_fclose(fwrd);
#endif
    }
#ifndef REALPAIN
    Panic_free((char*) data);
    Panic_free((char*) best_from);
    Panic_free((char*) boundarymap);
#endif
  }
  exit(0);
}

char **read_dict(char *filename, int *pnword, int buildtree, int optsilence,
		 char **plist, int nphone) {
  void	add_word(node_type**, int, int, int*);
  FILE	*fp_txt;
  int   *phonev;
  int   textsize, linesize, word, nsym, maxnsym, i;
  char  **wlist, *text, *line, *string;

  fp_txt = Panic_fopen(filename, "r");
  Panic_fseek(fp_txt, (long) 0, 2);
  textsize = ftell(fp_txt);
  text = Panic_malloc(textsize + 1);
  Panic_fseek(fp_txt, (long) 0, 0);
  Panic_fread(text, 1, textsize, fp_txt);
  Panic_fclose(fp_txt);

  /* add a new line at the end if the file isn't already terninated with one */
  if(text[textsize - 1] != '\n') text[textsize++ - 1] = '\n';

  /* go through and calculate the number of words and the maximum number of
     phonemes per word */
  *pnword = 0;
  maxnsym = nsym = 0;
  for(i = 0; i < textsize; i++) {
    if(text[i] == ' ') nsym++;
    if(text[i] == '\n') {
      (*pnword)++;
      if(nsym > maxnsym) maxnsym = nsym;
      nsym = 0;
    }
  }

  nhead = nroot = (buildtree) ? *pnword : 1;
  wlist = (char**) Panic_pointer_array(*pnword);
  phonev = Panic_int_array(maxnsym + 1);
  for(i = 0; i < nhead; i++) head_stack[i] = NULL;

  for(line = text, word = 0; line < text + textsize; line += linesize, word++){
    for(linesize = 0; line[linesize] != '\n' && 
	line + linesize < text + textsize; linesize++);
    line[linesize++] = '\0';

    wlist[word] = strtok(line, " ");
    for(i = 0; (string = strtok(NULL, " ")) != NULL; i++)
      phonev[i] = String2index(string, plist, nphone);

    phonev[i] = TIMIT_SILENCE_INDEX;
    if(buildtree) {
      add_word(&head_stack[word], word, i, phonev);
      if(optsilence) add_word(&head_stack[word], word, i + 1, phonev);
    }
    else {
      add_word(&head_stack[0], word, i, phonev);
      if(optsilence) add_word(&head_stack[0], word, i + 1, phonev);
    }     
  }
  return(wlist);
}

void add_word(node_type **pproot, int word, int phonec, int *phonev) {
  node_type **ppnode = pproot;

  if(phonec == 1) {
    while(*ppnode != NULL && ! ((*ppnode)->phone == phonev[0] &&
				(*ppnode)->word  == RM_NOTAWORD))
    ppnode = &((*ppnode)->next);
  }
  else
    while(*ppnode != NULL && (*ppnode)->phone != phonev[0])
      ppnode = &((*ppnode)->next);

  if(*ppnode == NULL) {
    if(nnode < MAX_NNODE) {
      *ppnode = &(node_stack[nnode++]);
      (*ppnode)->next  = NULL;
      (*ppnode)->down  = END_OF_TREE;
      (*ppnode)->phone = phonev[0];
      (*ppnode)->word  = RM_NOTAWORD;
    }
    else Panic("add_word: node_stack overflow\n");
  }

  if(phonec > 1) {
    if((*ppnode)->down == END_OF_TREE) {
      if(nhead < MAX_NHEAD) {
	head2node[nhead] = *ppnode - node_stack;
	head_stack[(*ppnode)->down = nhead++] = NULL;
      }
      else Panic("add_word: head_stack overflow\n");
    }
    add_word(&head_stack[(*ppnode)->down], word, phonec - 1, phonev + 1);
  }
  else if((*ppnode)->word  == RM_NOTAWORD) (*ppnode)->word = word;
  else Panic("add_word: already a word: %d\n", (*ppnode)->word);
}

void print_tree(node_type *pnode, char **wlist, char **plist) {
  while(pnode != NULL) {
    printf("%s ", plist[pnode->phone]);
    if(pnode->word != RM_NOTAWORD) printf("%s\n", wlist[pnode->word]);
    print_tree(head_stack[pnode->down], wlist, plist);
    pnode = pnode->next;
  }
  printf("< ");
}

short **forced_wplist(int nword) {
  int   i;
  short **wplist;

  wplist = Panic_short_2d_array(nword, 2);

  for(i = 0; i < nword - 1; i++) {
    wplist[i][0] = i + 1;
    wplist[i][1] = RM_NOTAWORD;
  }
  wplist[nword - 1][0] = RM_NOTAWORD;
  return(wplist);
}

short **read_wplist(char *filename, int nword) {
  FILE *fwplist = Panic_fopen(filename, "r");
  int  filesize, i;
  short **wplist, *wpdata;

  wplist = (short**) Panic_pointer_array(nword);
  fseek(fwplist, 0, 2);
  filesize = ftell(fwplist);
  rewind(fwplist);
  wpdata = (short*) Panic_malloc(filesize);
  Panic_fread((char*) wpdata, filesize, 1, fwplist);

  for(i = 0; i < nword; i++) {
    wplist[i] = wpdata;
    while(*wpdata++ != RM_NOTAWORD);
  }
  Panic_fclose(fwplist);
  return(wplist);
}

void print_wplist(short **wplist, char **wlist, int nword) {
  int i, j;

  for(i = 0; i < nword; i++)
    for(j = 0; wplist[i][j] != RM_NOTAWORD; j++)
      printf("%s\t%s\n", wlist[i], wlist[wplist[i][j]]);
}

void print_phn(ushort **best_from, int *min_delay, char **plist, int frame,
	       int *boundarymap, int end, int node, FILE *fphn) {
  int i = end, j;

  while((j = (best_from[i][node] & ~EOW_MASK)) == node &&
	!(best_from[i][node] & EOW_MASK)) i--;
  i -= min_delay[node_stack[node].phone];

  if(i >= 0)
    print_phn(best_from, min_delay, plist, frame, boundarymap, i, j, fphn);
  fprintf(fphn, "%d\t%d\t%s\n", frame * boundarymap[i + 1],
	  frame * boundarymap[end + 1], plist[node_stack[node].phone]);
}

void print_wrd(ushort **best_from, int *min_delay, char **wlist, int frame,
	       int *boundarymap, int end, int node, int print_silence,
	       FILE *fwrd) {
  int i = end, j, done = 0;
  int word = node_stack[node].word;
  
  while(!done) {
    while((j = (best_from[i][node] & ~EOW_MASK)) == node &&
	  !(best_from[i][node] & EOW_MASK)) i--;
    done = best_from[i][node] & EOW_MASK;
    i -= min_delay[node_stack[node].phone];
    node = j;
  }
  if(i >= 0) 
    print_wrd(best_from, min_delay, wlist, frame, boundarymap, i, j,
	      print_silence, fwrd);
  if(print_silence || wlist[word][0] != '@')
    fprintf(fwrd, "%d\t%d\t%s\n", frame * boundarymap[i + 1],
            frame * boundarymap[end + 1], wlist[word]);
}

void print_nist(ushort **best_from, int *min_delay, char **wlist, int frame,
	       int *boundarymap, int end, int node, int print_silence, FILE *fwrd) {
  int i = end, j, done = 0;
  int word = node_stack[node].word;
  
  while(!done) {
    while((j = (best_from[i][node] & ~EOW_MASK)) == node &&
	  !(best_from[i][node] & EOW_MASK)) i--;
    done = best_from[i][node] & EOW_MASK;
    i -= min_delay[node_stack[node].phone];
    node = j;
  }
  if(i >= 0) 
    print_nist(best_from, min_delay, wlist, frame, boundarymap, i, j,
	       print_silence, fwrd);
  if(print_silence || wlist[word][0] != '@')
    fprintf(fwrd, "%s ", wlist[word]);
}
