00001 /*===================================================================== 00002 ======= COPYRIGHT NOTICE ======= 00003 Copyright (C) 1996, Carnegie Mellon University, Cambridge University, 00004 Ronald Rosenfeld and Philip Clarkson. 00005 00006 All rights reserved. 00007 00008 This software is made available for research purposes only. It may be 00009 redistributed freely for this purpose, in full or in part, provided 00010 that this entire copyright notice is included on any copies of this 00011 software and applications and derivations thereof. 00012 00013 This software is provided on an "as is" basis, without warranty of any 00014 kind, either expressed or implied, as to any matter including, but not 00015 limited to warranty of fitness of purpose, or merchantability, or 00016 results obtained from use of this software. 00017 ======================================================================*/ 00018 00019 00031 #include <stdio.h> 00032 #include "rr_libs/general.h" 00033 #include "ngram.h" 00034 #include "idngram2lm.h" 00035 00036 void calc_mem_req(ng_t *ng,flag is_ascii) { 00037 00038 ngram current_ngram; 00039 ngram previous_ngram; 00040 count_t *ng_count; 00041 int i,j; 00042 00043 current_ngram.id_array = (id__t *) rr_malloc(sizeof(id__t)*ng->n); 00044 previous_ngram.id_array = (id__t *) rr_malloc(sizeof(id__t)*ng->n); 00045 00046 ng_count = (count_t *) rr_calloc(ng->n,sizeof(count_t)); 00047 00048 current_ngram.n = ng->n; 00049 00050 rewind(ng->id_gram_fp); 00051 00052 while (!rr_feof(ng->id_gram_fp)) { 00053 for (i=0;i<=ng->n-1;i++) { 00054 previous_ngram.id_array[i]=current_ngram.id_array[i]; 00055 } 00056 get_ngram(ng->id_gram_fp,¤t_ngram,is_ascii); 00057 for (i=0;i<=ng->n-1;i++) { 00058 if (current_ngram.id_array[i] != previous_ngram.id_array[i]) { 00059 for (j=i;j<=ng->n-1;j++) { 00060 if (j>0) { 00061 if (ng_count[j] > ng->cutoffs[j-1]) { 00062 ng->table_sizes[j]++; 00063 } 00064 } 00065 ng_count[j] = current_ngram.count; 00066 } 00067 i=ng->n; 00068 } 00069 else { 00070 ng_count[i] += current_ngram.count; 00071 } 00072 } 00073 } 00074 00075 for (i=1;i<=ng->n-1;i++) { 00076 if (ng_count[i] > ng->cutoffs[i-1]) { 00077 ng->table_sizes[i]++; 00078 } 00079 } 00080 00081 /* Add a fudge factor, as problems can crop up with having to 00082 cut-off last few n-grams. */ 00083 00084 for (i=1;i<=ng->n-1;i++) { 00085 ng->table_sizes[i]+=10; 00086 } 00087 00088 rr_iclose(ng->id_gram_fp); 00089 ng->id_gram_fp = rr_iopen(ng->id_gram_filename); 00090 00091 } 00092 00093 00094 00095 00096 00097 00098