calc_prob_of.c Source File

00001 
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00005 Ronald Rosenfeld and Philip Clarkson.
00006 
00007 All rights reserved.
00008 
00009 This software is made available for research purposes only.  It may be
00010 redistributed freely for this purpose, in full or in part, provided
00011 that this entire copyright notice is included on any copies of this
00012 software and applications and derivations thereof.
00013 
00014 This software is provided on an "as is" basis, without warranty of any
00015 kind, either expressed or implied, as to any matter including, but not
00016 limited to warranty of fitness of purpose, or merchantability, or
00017 results obtained from use of this software.
00018 ======================================================================*/
00019 
00025 #include <stdlib.h>
00026 #include "evallm.h"
00027 #include "idngram2lm.h"
00028 
00029 double calc_prob_of(id__t sought_word,
00030                     id__t *context,
00031                     int context_length,
00032                     ng_t *ng,
00033                     arpa_lm_t *arpa_ng,
00034                     fb_info *fb_list,
00035                     int *bo_case,
00036                     int *acl,
00037                     flag arpa_lm) {
00038 
00039   int i;
00040   flag exc_back_off;
00041   int most_recent_fb;
00042   int actual_context_length;
00043   id__t *sought_ngram;
00044   double prob;
00045 
00046   exc_back_off = 0;
00047 
00048   if (arpa_lm) {
00049     if (sought_word == 0 && arpa_ng->vocab_type == CLOSED_VOCAB) {
00050       quit(-1,"Error : Cannot generate probability for <UNK> since this is a closed \nvocabulary model.\n");
00051     }   
00052   }
00053   else {
00054     if (sought_word == 0 && ng->vocab_type == CLOSED_VOCAB) {
00055       quit(-1,"Error : Cannot generate probability for <UNK> since this is a closed \nvocabulary model.\n");
00056     }
00057   }
00058 
00059   most_recent_fb = -1;
00060   
00061   /* Find most recent word in the forced back-off list */
00062   
00063   for (i=context_length-1;i>=0;i--) {
00064 
00065     if (fb_list[context[i]].backed_off) {
00066       most_recent_fb = i;
00067       if (fb_list[context[i]].inclusive) {
00068         exc_back_off = 0;
00069       }
00070       else {
00071         exc_back_off = 1;
00072       }
00073       i = -2;
00074     }
00075 
00076   }
00077   
00078   actual_context_length = context_length - most_recent_fb -1;
00079 
00080   if (!exc_back_off && most_recent_fb != -1) {
00081     actual_context_length++;
00082   }
00083 
00084   sought_ngram = (id__t *) rr_malloc(sizeof(id__t)*(actual_context_length+1));
00085 
00086   for (i=0;i<=actual_context_length-1;i++) {
00087     if (exc_back_off) {
00088       sought_ngram[i] = context[i+most_recent_fb+1];
00089     }
00090     else {
00091       if (most_recent_fb == -1) {
00092         sought_ngram[i] = context[i+most_recent_fb+1];
00093       }
00094       else {
00095         sought_ngram[i] = context[i+most_recent_fb];
00096       }
00097     }
00098   }
00099   sought_ngram[actual_context_length] = sought_word;
00100 
00101 
00102   if (arpa_lm) {
00103     arpa_bo_ng_prob(actual_context_length,
00104                     sought_ngram,
00105                     arpa_ng,
00106                     2,       /* Verbosity */
00107                     &prob,
00108                     bo_case);
00109   }
00110   else {
00111     bo_ng_prob(actual_context_length,
00112                sought_ngram,
00113                ng,
00114                2,       /* Verbosity */
00115                &prob,
00116                bo_case);
00117   }
00118 
00119   *acl = actual_context_length;
00120 
00121   free(sought_ngram);
00122   
00123   return(prob);
00124 
00125 }
00126