Main Page   Compound List   File List   Compound Members   File Members  

validate.c

Go to the documentation of this file.
00001 
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00005 Ronald Rosenfeld and Philip Clarkson.
00006 
00007 All rights reserved.
00008 
00009 This software is made available for research purposes only.  It may be
00010 redistributed freely for this purpose, in full or in part, provided
00011 that this entire copyright notice is included on any copies of this
00012 software and applications and derivations thereof.
00013 
00014 This software is provided on an "as is" basis, without warranty of any
00015 kind, either expressed or implied, as to any matter including, but not
00016 limited to warranty of fitness of purpose, or merchantability, or
00017 results obtained from use of this software.
00018 ======================================================================*/
00019 
00020 #include "evallm.h"
00021 #include <stdlib.h>
00022 
00026 void validate(ng_t *ng,
00027               arpa_lm_t *arpa_ng,
00028               char **words,
00029               flag backoff_from_unk_inc,
00030               flag backoff_from_unk_exc,
00031               flag backoff_from_ccs_inc,
00032               flag backoff_from_ccs_exc,
00033               flag arpa_lm,
00034               char *fb_list_filename) {
00035 
00036 
00037   int *context;
00038   id__t *short_context;
00039   int dummy1;
00040   int dummy2;
00041   int i;
00042   fb_info *fb_list;
00043   double prob_so_far;
00044   flag found_unk_wrongly;
00045   int n;
00046 
00047   if (arpa_lm) {
00048     n = arpa_ng->n;
00049   }
00050   else {
00051     n = ng->n;
00052   }
00053 
00054   if (arpa_lm) {
00055     fb_list = gen_fb_list(arpa_ng->vocab_ht,
00056                           arpa_ng->vocab_size,
00057                           arpa_ng->vocab,
00058                           arpa_ng->context_cue,
00059                           backoff_from_unk_inc,
00060                           backoff_from_unk_exc,
00061                           backoff_from_ccs_inc,
00062                           backoff_from_ccs_exc,
00063                           fb_list_filename);
00064   }
00065   else {
00066     fb_list = gen_fb_list(ng->vocab_ht,
00067                           ng->vocab_size,
00068                           ng->vocab,
00069                           ng->context_cue,
00070                           backoff_from_unk_inc,
00071                           backoff_from_unk_exc,
00072                           backoff_from_ccs_inc,
00073                           backoff_from_ccs_exc,
00074                           fb_list_filename);
00075   }
00076   
00077   context = (int *) rr_malloc(sizeof(int)*(n-1));
00078   short_context = (id__t *) rr_malloc(sizeof(id__t)*(n-1));
00079   
00080   found_unk_wrongly = 0;
00081 
00082   for (i=0;i<=n-2;i++) {
00083     if (arpa_lm) {
00084       if (sih_lookup(arpa_ng->vocab_ht,words[i],&context[i]) == 0) {
00085         if (arpa_ng->vocab_type == CLOSED_VOCAB) {
00086           fprintf(stderr,"Error : %s is not in the vocabulary, and this is a closed \nvocabulary model.\n",words[i]);
00087           found_unk_wrongly = 1;
00088         }
00089         else {
00090           fprintf(stderr,"Warning : %s is an unknown word.\n",words[i]);
00091         }
00092       }
00093       if (context[i] > 65535) {
00094         quit(-1,"Error : returned value from sih_lookup is too high.\n");
00095       }
00096       else {
00097         short_context[i] = context[i];
00098       }
00099     }
00100     else {
00101       if (sih_lookup(ng->vocab_ht,words[i],&context[i]) == 0) {
00102         if (ng->vocab_type == CLOSED_VOCAB) {
00103           fprintf(stderr,"Error : %s is not in the vocabulary, and this is a closed \nvocabulary model.\n",words[i]);
00104           found_unk_wrongly = 1;
00105         }
00106         else {
00107           fprintf(stderr,"Warning : %s is an unknown word.\n",words[i]);
00108         }
00109       }
00110       if (context[i] > 65535) {
00111         quit(-1,"Error : returned value from sih_lookup is too high.\n");
00112       }
00113       else {
00114         short_context[i] = context[i];
00115       }
00116     }
00117   }
00118 
00119   /* Map down from context array to short_context array */
00120   /* sih_lookup requires the array to be ints, but prob_so_far
00121      requires short ints. */
00122 
00123   if (!found_unk_wrongly) {
00124 
00125     prob_so_far = 0.0;
00126     
00127     if (arpa_lm) {
00128       for (i=arpa_ng->first_id;i<=arpa_ng->vocab_size;i++) {
00129         prob_so_far += calc_prob_of(i,
00130                                     short_context,
00131                                     n-1,
00132                                     ng,
00133                                     arpa_ng,
00134                                     fb_list,
00135                                     &dummy1,
00136                                     &dummy2,
00137                                     arpa_lm);
00138       }
00139 
00140     }
00141     else {
00142       for (i=ng->first_id;i<=ng->vocab_size;i++) {
00143         prob_so_far += calc_prob_of(i,
00144                                     short_context,
00145                                     n-1,
00146                                     ng,
00147                                     arpa_ng,
00148                                     fb_list,
00149                                     &dummy1,
00150                                     &dummy2,
00151                                     arpa_lm);
00152       }
00153     }
00154     
00155     printf("Sum of P( * | ");
00156     for (i=0;i<=n-2;i++) {
00157       printf("%s ",words[i]);
00158     }
00159     printf(") = %f\n",prob_so_far);
00160     
00161   }
00162 
00163   free(context);
00164   free(fb_list);
00165 
00166 }

Generated on Tue Dec 21 13:54:46 2004 by doxygen1.2.18