/* file = judge.c */ 
/* given a word spotter output (a .wh file) and a
   veridical labeling (a .la file), judges the quality of the .wh file */
/* Nigel Ward, University of Tokyo, April 1994 */


/* alignment_score + word_score, gives "alignment-based-goodness".
   This is an approximation to the quantity that determines how well
   ch4 (PEP's current construction spotter) does */
/* overlap_score * word_score gives "product".
   This is an approximation to the quantity that determines how well
   PEP's timeline-based computations exploit the word hypotheses */

/*-----------------------------------------------------------------------------*/
#include "cheap.h"

static int verbose;
static int nfiles;
static int active_file;
static char file_list[100][MAX_PATH_LEN];
static char label_dir[MAX_PATH_LEN];   /* directory in which to seek .la files */

static struct whyp labels[MAX_LABELS];
static struct whyp hyps[MAX_NTEMPLATES];
static int nframes;
static int nlabels;
static int nwh;            /* number of word hypotheses */
static float grand_total;

static float cum_word_score;
static float exp_word_score;
static float cum_correct_word_score;
static int ncorrect;      /* number of hypothesis which are for words truly in input */
static float cum_alignment_score, cum_alignment_based_goodness; 
static float cum_overlap, cum_expected_overlap;
static float cum_product, cum_exp_product;


/*-----------------------------------------------------------------------------*/
/* convert distances to scores, in the range 0 to 1 */
/* note that the normalization algorithm is probably something which should be
   judged, rather than being built into judge */
normalize_scores()
{
  int i;
  float min_dist;

  min_dist = hyps[0].score;
  for (i = 0; i < nwh; i++) {
    if (hyps[i].score < min_dist) min_dist = hyps[i].score;  }
  for (i = 0; i < nwh; i++) {  
    hyps[i].score = min_dist / hyps[i].score;  } }

/*-----------------------------------------------------------------------------*/
/* convert to the range 0 to 1 
   if distance is 0, then score is 1; if distance is HALF_SCORE_AT, then score = .5 
   Perhaps should consider alternative functions */
#define HALF_SCORE_AT 100.0 /* miliseconds */
float score_alignment(raw_distance)      int  raw_distance;    /* distance in ms */
{  return(HALF_SCORE_AT / (HALF_SCORE_AT + raw_distance)); }

/*-----------------------------------------------------------------------------*/
/* just an approximatation */
float random_expected_overlap(hyp_len, real_len, input_len, hyp_label)
     int hyp_len, real_len, input_len;   char *hyp_label;
{  int avg_len;
   if(hyp_len * 5 < real_len || real_len * 5 < hyp_len)
     printf("   warning: %s implausible: hypothesis length is %d, label length is %d ***\n",
	    hyp_label, hyp_len, real_len);
   avg_len = ((real_len + hyp_len) / 2.0);
   return( avg_len * avg_len / (float) input_len);  }


/*-----------------------------------------------------------------------------*/
/* Convert to range 0 to 1, by dividing overlap by max possible overlap.
   Note that this means that short words have to be matched more accurately
   in order to get the same score. */
float scale_overlap(overlap, len1, len2)      int overlap, len1, len2;
{  return(overlap / (float) ( len1 > len2 ? len1 : len2)); }

/*-----------------------------------------------------------------------------*/
compute_overlap(start1, end1, start2, end2)  int start1, end1, start2, end2;
{
  if(start1 <= start2) {
    if (end1 <= start2)
      return (0);
    else if (end1 <= end2)
      return(end1 - start2);
    else
      return(end2 - start2); }
  else {
    if (end2 <= start1)
      return(0);
    else if (end2 <= end1)
      return(end2 - start1);
    else
      return(end1 - start1);  }
}


/* ============================================================================= */
do_wh_file()
{
  printf("\nFor %s: --------------------------------------\n",
	 file_list[active_file]);
  nlabels = read_label_file_from_dir(label_dir, file_list[active_file], labels);
  if (nlabels == 0) {
    printf( "  !missing, invalid, or empty label file --- skipping file `%s'\n",
	    file_list[active_file]);
    return(0); }
  read_wh_file(file_list[active_file], &nframes, &nwh, hyps);
  if (verbose) printf("  %d frames, %d word-hypotheses \n", nframes, nwh);
  if (nframes == 0) {
    /* wh files created by rec2wh don't have frame counts, so guess */
    nframes = labels[nlabels -1].end;
    printf("** guessing that nframes is approx %d \n", nframes); 
  }
  normalize_scores();
  compute_scores(); 
  return(0);
}

/*-----------------------------------------------------------------------------*/
/* cum refers to scores cumulative over all whyps in a file */
/* exp refers to expected random value for scores */ 
compute_scores()
{
  int i; 
  /* static float cum_overlap, cum_product, cum_exp_product; */
  float avg_product, avg_exp_product;
  float normalized_avg_product;

  cum_correct_word_score = 0.0;
  cum_alignment_score = 0.0;
  cum_alignment_based_goodness = 0.0;
  cum_expected_overlap = 0.0;
  cum_word_score = 0.0;
  ncorrect = 0;
  cum_product = 0.0;
  cum_exp_product = 0.0;
  
  for (i = 0; i < nwh; i++) {
    cum_word_score += hyps[i].score; }
  exp_word_score = cum_word_score / nwh;

  if (verbose)
    printf("\n             match_sc (e);  overlap_sc (e);   product (e); | al_sc, al_gd\n");
  for (i = 0; i < nwh; i++) {
    compute_one_score(&hyps[i]); }
  if (verbose) printf("\n");
  printf("  avg overlap score = %5.2f      (expected %5.2f)\n",
	 cum_overlap / ncorrect, cum_expected_overlap / ncorrect);

  /* percent of evidence that is for words actually present */
  printf("  match score correctness ratio = %5.3f   (expected is  = %5.3f)\n",
	 cum_correct_word_score / cum_word_score, ncorrect / (float) nwh);

  avg_product = cum_product / nwh;
  avg_exp_product = cum_exp_product / nwh;
  printf("  avg product = %6.3f  (expected = %6.3f)\n",  avg_product, avg_exp_product);

  /* normalize to the range 0 to 1;
     (perhaps should do this by taking value/expected ratio instead) */
  normalized_avg_product = (1. - avg_exp_product) * avg_product;
  printf("  >> %.3f << = normalized avg product \n", normalized_avg_product);
  grand_total += normalized_avg_product;

  printf("          | avg alignment score = %5.2f ;   avg alignment goodness = %5.2f \n",
	 cum_alignment_score / ncorrect,   cum_alignment_based_goodness / nwh );
}


/*-----------------------------------------------------------------------------*/
compute_one_score(hyp_ptr)        struct whyp *hyp_ptr;
{
  float match_score;
  int pointer;
  int alignment_distance;   float alignment_score, alignment_based_goodness;
  int overlap;   float scaled_overlap, expected_overlap, scaled_exp_overlap;
  float product, exp_product;

  match_score = hyp_ptr->score;
  pointer = lookup_correct(hyp_ptr->label, labels, nlabels);
  if (pointer == NOT_FOUND) {
    if (verbose) { 
      printf("`%10s': %4.2f   (not in input)\n",  hyp_ptr->label, match_score); } }
  else {
    ncorrect++;

    alignment_distance = abs(hyp_ptr->start - labels[pointer].start)
      + abs(hyp_ptr->end - labels[pointer].end);
    alignment_score = score_alignment(alignment_distance);
    alignment_based_goodness = .5 * (alignment_score + match_score);
    
    overlap = compute_overlap(hyp_ptr->start, hyp_ptr->end,
			      labels[pointer].start, labels[pointer].end);
    scaled_overlap = scale_overlap(overlap, hyp_ptr->len, labels[pointer].len); 
    expected_overlap = random_expected_overlap(hyp_ptr->len, labels[pointer].len,
					       nframes * FRAME_SPACING, hyp_ptr->label);
    scaled_exp_overlap = scale_overlap((int) expected_overlap, hyp_ptr->len,
				       labels[pointer].len);
    product = scaled_overlap * match_score;
    exp_product = exp_word_score * scaled_exp_overlap;
    
    if (verbose)
      printf("`%10s': %4.2f (%4.2f);  %4.2f (%4.2f);     %5.3f (%5.3f) | %4.2f  %4.2f\n",
	     hyp_ptr->label,
	     match_score,
	     exp_word_score,
	     scaled_overlap,
	     scaled_exp_overlap,
	     product,
	     exp_product,
	     alignment_score,
	     alignment_based_goodness);
    cum_correct_word_score += match_score;
    cum_alignment_based_goodness += alignment_based_goodness;
    cum_alignment_score += alignment_score;
    cum_expected_overlap += scaled_exp_overlap;
    cum_overlap += scaled_overlap;
    cum_product += product;
    cum_exp_product += exp_product;
  } }

/*-----------------------------------------------------------------------------*/
judge_usage()    
{
  fprintf(stderr, "usage: \n");
  fprintf(stderr, "   judge [-l label_directory] [input_files.wh]+\n");
  fprintf(stderr, "Options \n");
  fprintf(stderr, "  -l <dir>   specify directory to use for .la files\n");
}

/*-----------------------------------------------------------------------------*/
main(argc,argv)      int argc;      char *argv[];
{
  int i, first_wh_file;

  /* should parse arguments in a more standard way */
  /* handle -l option */
  if (argc > 1 && argv[1][0] == '-' &&  argv[1][1] == 'l') {
    strcpy(label_dir, argv[2]);
    first_wh_file = 3; }
  else {
    strcpy(label_dir,"");
    first_wh_file = 1; }
  
  nfiles = argc - first_wh_file;
  if (nfiles == 0) {
    fprintf(stderr, "please specify one or more .wh files\n");
    judge_usage();
    exit(STRANGE);}
  else if (nfiles == 1) 
    verbose = TRUE;     /* if only one file, show details of scoring */
  for (i = 0; i < nfiles; i++)
    strcpy(file_list[i], argv[first_wh_file + i]);
  for(active_file = 0; active_file < nfiles; active_file++)
    (void) do_wh_file();
  if (nfiles > 1)
    printf("\n >>>> %6.3f <<<< (grand average over all .wh files) \n",
	   grand_total / nfiles);
}

/*-----------------------------------------------------------------------------*/

