Main Page   Compound List   File List   Compound Members   File Members  

write_lms.c

Go to the documentation of this file.
00001 
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00005 Ronald Rosenfeld and Philip Clarkson.
00006 
00007 All rights reserved.
00008 
00009 This software is made available for research purposes only.  It may be
00010 redistributed freely for this purpose, in full or in part, provided
00011 that this entire copyright notice is included on any copies of this
00012 software and applications and derivations thereof.
00013 
00014 This software is provided on an "as is" basis, without warranty of any
00015 kind, either expressed or implied, as to any matter including, but not
00016 limited to warranty of fitness of purpose, or merchantability, or
00017 results obtained from use of this software.
00018 ======================================================================*/
00019 
00020 #include <stdio.h>
00021 #include <math.h>
00022 #include <stdlib.h>
00023 #include "pc_libs/pc_general.h"
00024 #include "idngram2lm.h"
00025 #include "rr_libs/mips_swap.h"
00026 #include "rr_libs/general.h"
00027 #include "ngram.h"
00028 
00029 
00030 #define BBO_FILE_VERSION 970314
00031 
00032 void write_arpa_lm(ng_t *ng,int verbosity) {
00033 
00034 /* This is the format introduced and first used by Doug Paul.
00035    Optionally use a given symbol for the UNK word (id==0).
00036 */
00070   int *current_pos;
00071   int *end_pos;
00072   int i;
00073   int j;
00074   double log_10_of_e = 1.0 / log(10.0);
00075 
00076   /* HEADER */
00077 
00078   pc_message(verbosity,1,"ARPA-style %d-gram will be written to %s\n",ng->n,ng->arpa_filename);
00079 
00080   fprintf(ng->arpa_fp,"#############################################################################\n");
00081   fprintf(ng->arpa_fp,"## Copyright (c) 1996, Carnegie Mellon University, Cambridge University,\n");
00082   fprintf(ng->arpa_fp,"## Ronald Rosenfeld and Philip Clarkson\n");
00083   fprintf(ng->arpa_fp,"#############################################################################\n");
00084   fprintf(ng->arpa_fp,"=============================================================================\n");
00085   fprintf(ng->arpa_fp,"===============  This file was produced by the CMU-Cambridge  ===============\n");
00086   fprintf(ng->arpa_fp,"===============     Statistical Language Modeling Toolkit     ===============\n"); 
00087   fprintf(ng->arpa_fp,"=============================================================================\n");
00088   fprintf(ng->arpa_fp,"This is a %d-gram language model, based on a vocabulary of %d words,\n",ng->n,ng->vocab_size);
00089   fprintf(ng->arpa_fp,"  which begins \"%s\", \"%s\", \"%s\"...\n",ng->vocab[1],ng->vocab[2],ng->vocab[3]);
00090   if (ng->vocab_type == CLOSED_VOCAB) {
00091     fprintf(ng->arpa_fp,"This is a CLOSED-vocabulary model\n");
00092     fprintf(ng->arpa_fp,"  (OOVs eliminated from training data and are forbidden in test data)\n");
00093   }
00094   else {
00095     if (ng->vocab_type == OPEN_VOCAB_1) {
00096       fprintf(ng->arpa_fp,"This is an OPEN-vocabulary model (type 1)\n");
00097       fprintf(ng->arpa_fp,"  (OOVs were mapped to UNK, which is treated as any other vocabulary word)\n");
00098     }
00099     else {
00100       if (ng->vocab_type == OPEN_VOCAB_2) {
00101         fprintf(ng->arpa_fp,"This is an OPEN-vocabulary model (type 2)\n");
00102         fprintf(ng->arpa_fp,"  (%.2f of the unigram discount mass was allocated to OOVs)\n",ng->oov_fraction); 
00103       }
00104     }
00105   }
00106   
00107   switch (ng->discounting_method) {
00108   case GOOD_TURING:
00109     fprintf(ng->arpa_fp,"Good-Turing discounting was applied.\n");
00110     for (i=1;i<=ng->n;i++) {
00111       fprintf(ng->arpa_fp,"%d-gram frequency of frequency : ",i);
00112       for (j=1;j<=ng->fof_size[i-1]-1;j++) {
00113         fprintf(ng->arpa_fp,"%d ",ng->freq_of_freq[i-1][j]);
00114       }
00115       fprintf(ng->arpa_fp,"\n");
00116     }
00117     for (i=1;i<=ng->n;i++) {
00118       fprintf(ng->arpa_fp,"%d-gram discounting ratios : ",i);
00119       for (j=1;j<=ng->disc_range[i-1];j++) {
00120         fprintf(ng->arpa_fp,"%.2f ",ng->gt_disc_ratio[i-1][j]);
00121       }
00122       fprintf(ng->arpa_fp,"\n");
00123     }
00124     break;
00125   case LINEAR:
00126     fprintf(ng->arpa_fp,"Linear discounting was applied.\n");
00127     for (i=1;i<=ng->n;i++) {
00128       fprintf(ng->arpa_fp,"%d-gram discounting ratio : %g\n",i,ng->lin_disc_ratio[i-1]);
00129     }
00130     break;
00131   case ABSOLUTE:
00132     fprintf(ng->arpa_fp,"Absolute discounting was applied.\n");
00133     for (i=1;i<=ng->n;i++) {
00134       fprintf(ng->arpa_fp,"%d-gram discounting constant : %g\n",i,ng->abs_disc_const[i-1]);
00135     }
00136     break;
00137   case WITTEN_BELL:
00138     fprintf(ng->arpa_fp,"Witten Bell discounting was applied.\n");
00139     break;
00140 }
00141 
00142 
00143   fprintf(ng->arpa_fp,"This file is in the ARPA-standard format introduced by Doug Paul.\n");
00144   fprintf(ng->arpa_fp,"\n");
00145   fprintf(ng->arpa_fp,"p(wd3|wd1,wd2)= if(trigram exists)           p_3(wd1,wd2,wd3)\n");
00146   fprintf(ng->arpa_fp,"                else if(bigram w1,w2 exists) bo_wt_2(w1,w2)*p(wd3|wd2)\n");
00147   fprintf(ng->arpa_fp,"                else                         p(wd3|w2)\n");
00148   fprintf(ng->arpa_fp,"\n");
00149   fprintf(ng->arpa_fp,"p(wd2|wd1)= if(bigram exists) p_2(wd1,wd2)\n");
00150   fprintf(ng->arpa_fp,"            else              bo_wt_1(wd1)*p_1(wd2)\n");
00151   fprintf(ng->arpa_fp,"\n");
00152   fprintf(ng->arpa_fp,"All probs and back-off weights (bo_wt) are given in log10 form.\n");
00153   fprintf(ng->arpa_fp,"\n");
00154   fprintf(ng->arpa_fp,"Data formats:\n");
00155   fprintf(ng->arpa_fp,"\n");
00156   fprintf(ng->arpa_fp,"Beginning of data mark: \\data\\\n");
00157 
00158   for (i=1;i<=ng->n;i++) {
00159     fprintf(ng->arpa_fp,"ngram %d=nr            # number of %d-grams\n",i,i);
00160   }
00161   fprintf(ng->arpa_fp,"\n");
00162   for (i=1;i<=ng->n;i++) {
00163     fprintf(ng->arpa_fp,"\\%d-grams:\n",i);
00164     fprintf(ng->arpa_fp,"p_%d     ",i);
00165     for (j=1;j<=i;j++) {
00166       fprintf(ng->arpa_fp,"wd_%d ",j);
00167     }
00168     if (i == ng->n) {
00169       fprintf(ng->arpa_fp,"\n");
00170     }
00171     else {
00172       fprintf(ng->arpa_fp,"bo_wt_%d\n",i);
00173     }
00174   }  
00175 
00176   fprintf(ng->arpa_fp,"\n");
00177   fprintf(ng->arpa_fp,"end of data mark: \\end\\\n");
00178   fprintf(ng->arpa_fp,"\n");
00179 
00180   fprintf(ng->arpa_fp,"\\data\\\n");
00181   fprintf(ng->arpa_fp,"ngram 1=%d\n",1+ng->vocab_size-ng->first_id);
00182   for (i=1;i<=ng->n-1;i++) {
00183     fprintf(ng->arpa_fp,"ngram %d=%d\n",i+1,ng->num_kgrams[i]);
00184   }
00185 
00186   /* Print unigram info */
00187 
00188   fprintf(ng->arpa_fp,"\n\\1-grams:\n");
00189 
00190   for (i=ng->first_id; i<=ng->vocab_size;i++) {
00191     
00192     double log10_uniprob;
00193     double log10_alpha;
00194     
00195     log10_uniprob = ng->uni_log_probs[i]*log_10_of_e;
00196 
00197     if (ng->uni_probs[i]<=0.0) {
00198       log10_uniprob = -99.999;
00199     }
00200     
00201     if (ng->four_byte_alphas) {
00202       if (ng->bo_weight4[0][i] > 0.0) {
00203         log10_alpha = log10(ng->bo_weight4[0][i]);
00204       }
00205       else {
00206         log10_alpha = -99.999;
00207       }
00208     }
00209     else {
00210 
00211       if (double_alpha(ng->bo_weight[0][i],
00212                        ng->alpha_array,
00213                        ng->size_of_alpha_array,
00214                        65535 - ng->out_of_range_alphas,
00215                        ng->min_alpha,
00216                        ng->max_alpha) > 0.0) {
00217         log10_alpha = log10(double_alpha(ng->bo_weight[0][i],
00218                                          ng->alpha_array,
00219                                          ng->size_of_alpha_array,
00220                                          65535 - ng->out_of_range_alphas,
00221                                          ng->min_alpha,
00222                                          ng->max_alpha));
00223       }
00224       else {
00225         log10_alpha = -99.999;
00226       }
00227 
00228     }
00229 
00230     if (ng->n>1) {
00231       fprintf(ng->arpa_fp,"%.4f %s\t%.4f\n",
00232               log10_uniprob,ng->vocab[i],log10_alpha);
00233     }
00234     else {
00235       fprintf(ng->arpa_fp,"%.4f %s\n",
00236               log10_uniprob,ng->vocab[i]);
00237     }
00238 
00239   }
00240 
00241   current_pos = (int *) rr_malloc(ng->n*sizeof(int));
00242   end_pos = (int *) rr_malloc(ng->n*sizeof(int)); 
00243 
00244 
00245   /* Print 2-gram, ... (n-1)-gram info. */
00246 
00247   for (i=1;i<=ng->n-1;i++) {
00248 
00249     /* Print out the (i+1)-gram */
00250 
00251 
00252     int current_table;
00253     int j;
00254 
00255     int ngcount;
00256     int marg_count;
00257     double discounted_ngcount;
00258     
00259     double ngprob;
00260     double log_10_ngprob;
00261     double ngalpha;
00262     double log_10_ngalpha;
00263 
00264     /* Initialise variables for the sake of warning-free compilation */
00265     
00266     discounted_ngcount = 0.0;
00267     log_10_ngalpha = 0.0;
00268 
00269     fprintf(ng->arpa_fp,"\n\\%d-grams:\n",i+1);
00270 
00271     /* Go through the n-gram list in order */
00272     
00273     for (j=0;j<=ng->n-1;j++) {
00274       current_pos[j] = 0;
00275       end_pos[j] = 0;
00276     }
00277 
00278     for (current_pos[0]=ng->first_id;
00279          current_pos[0]<=ng->vocab_size;
00280          current_pos[0]++) {
00281       
00282       if (return_count(ng->four_byte_counts,
00283                        ng->count_table[0], 
00284                        ng->marg_counts,
00285                        ng->marg_counts4,
00286                        current_pos[0]) > 0) {
00287     
00288         current_table = 1;
00289       
00290         if (current_pos[0] == ng->vocab_size) {
00291           end_pos[1] = ng->num_kgrams[1]-1;
00292         }
00293         else {
00294           end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1],
00295                                       ng->ptr_table[0],
00296                                       ng->ptr_table_size[0],
00297                                       current_pos[0]+1)-1;
00298         }
00299 
00300         while (current_table > 0) {
00301 
00302           if (current_table == i) {
00303 
00304             if (current_pos[i] <= end_pos[i]) {
00305 
00306               ngcount = return_count(ng->four_byte_counts,
00307                                      ng->count_table[i],
00308                                      ng->count[i],
00309                                      ng->count4[i],
00310                                      current_pos[i]);
00311             
00312               if (i==1) {
00313                 marg_count = return_count(ng->four_byte_counts,
00314                                           ng->count_table[0], 
00315                                           ng->marg_counts,
00316                                           ng->marg_counts4,
00317                                           current_pos[0]);
00318               }
00319               else {
00320                 marg_count = return_count(ng->four_byte_counts,
00321                                      ng->count_table[i-1],
00322                                      ng->count[i-1],
00323                                      ng->count4[i-1],
00324                                      current_pos[i-1]);
00325               }
00326 
00327               switch (ng->discounting_method) {
00328               case GOOD_TURING:
00329                 if (ngcount <= ng->disc_range[i]) {
00330                   discounted_ngcount = ng->gt_disc_ratio[i][ngcount] * ngcount;
00331                 }
00332                 else {
00333                   discounted_ngcount = ngcount;
00334                 }
00335                 break;
00336               case ABSOLUTE:
00337                 discounted_ngcount =  ngcount - ng->abs_disc_const[i];
00338                 break;
00339               case LINEAR:
00340                 discounted_ngcount = ng->lin_disc_ratio[i]*ngcount; 
00341                 break;
00342               case WITTEN_BELL:
00343                 discounted_ngcount = ( ((double) marg_count * ngcount) /
00344                   (marg_count + num_of_types(i-1,current_pos[i-1],ng)));
00345                 break;
00346               }
00347 
00348               ngprob = (double) discounted_ngcount / marg_count;
00349 
00350               if (ngprob > 1.0) {
00351                 fprintf(stderr,
00352                         "discounted_ngcount = %f marg_count = %d %d %d %d\n",
00353                        discounted_ngcount,marg_count,current_pos[0],
00354                        current_pos[1],current_pos[2]);
00355                 quit(-1,"Error : probablity of ngram is greater than one.\n");
00356               }
00357 
00358               if (ngprob > 0.0) {
00359                 log_10_ngprob = log10(ngprob);
00360               }
00361               else {
00362                 log_10_ngprob = -99.999;
00363               }
00364 
00365 
00366               if (i <= ng->n-2) {
00367                 if (ng->four_byte_alphas) {
00368                   ngalpha = ng->bo_weight4[i][current_pos[i]];
00369                 }
00370                 else {
00371                   ngalpha = double_alpha(ng->bo_weight[i][current_pos[i]],
00372                                          ng->alpha_array,
00373                                          ng->size_of_alpha_array,
00374                                          65535 - ng->out_of_range_alphas,
00375                                          ng->min_alpha,
00376                                          ng->max_alpha);
00377                 }
00378                 if (ngalpha > 0.0) {
00379                   log_10_ngalpha = log10(ngalpha);
00380                 }
00381                 else {
00382                   log_10_ngalpha = -99.999;
00383                 }
00384               }
00385 
00386               fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob);
00387               fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]);
00388               for (j=1;j<=i;j++) {
00389                 fprintf(ng->arpa_fp,"%s ",ng->vocab[ng->word_id[j][current_pos[j]]]);
00390               }
00391               if (i <= ng->n-2) {
00392                 fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha);
00393               }         
00394               else {
00395                 fprintf(ng->arpa_fp,"\n");
00396               }
00397               current_pos[i]++;
00398             }
00399             else {
00400               current_table--;
00401               if (current_table > 0) {
00402                 current_pos[current_table]++;
00403               }
00404             }
00405           }
00406           else {
00407             
00408             if (current_pos[current_table] <= end_pos[current_table]) {
00409               current_table++;
00410               if (current_pos[current_table-1] == ng->num_kgrams[current_table-1]-1) {
00411                 end_pos[current_table] = ng->num_kgrams[current_table]-1;
00412               }
00413               else {
00414                 end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],ng->ptr_table[current_table-1],ng->ptr_table_size[current_table-1],current_pos[current_table-1]+1)-1;
00415               }
00416             }
00417             else {
00418               current_table--;
00419               if (current_table > 0) {
00420                 current_pos[current_table]++;
00421               }
00422             }
00423           }
00424         }
00425       }
00426     }
00427   } 
00428 
00429   free(current_pos);
00430   free(end_pos);
00431 
00432 
00433   fprintf(ng->arpa_fp,"\n\\end\\\n");
00434 
00435   rr_oclose(ng->arpa_fp);
00436 
00437 } 
00438  
00439 void write_bin_lm(ng_t *ng,int verbosity) {
00440     
00441   int l_chunk;
00442   int from_rec;
00443   int i;
00444   int j;
00445 
00446   pc_message(verbosity,1,"Binary %d-gram language model will be written to %s\n",ng->n,ng->bin_filename);
00447   
00448   ng->version = BBO_FILE_VERSION;
00449 
00450   /* Scalar parameters */
00451 
00452   rr_fwrite(&ng->version,sizeof(int),1,ng->bin_fp,"version");
00453   rr_fwrite(&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n");
00454 
00455   rr_fwrite(&ng->vocab_size,sizeof(unsigned short),1,ng->bin_fp,"vocab_size");
00456   rr_fwrite(&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs");
00457   rr_fwrite(&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type");
00458 
00459   rr_fwrite(&ng->count_table_size,sizeof(count_ind_t),1,
00460             ng->bin_fp,"count_table_size");
00461   rr_fwrite(&ng->discounting_method,sizeof(unsigned short),1,
00462             ng->bin_fp,"discounting_method");
00463 
00464   rr_fwrite(&ng->min_alpha,sizeof(double),
00465             1,ng->bin_fp,"min_alpha");
00466   rr_fwrite(&ng->max_alpha,sizeof(double),
00467             1,ng->bin_fp,"max_alpha");
00468   rr_fwrite(&ng->out_of_range_alphas,sizeof(unsigned short),
00469             1,ng->bin_fp,"out_of_range_alphas");
00470   rr_fwrite(&ng->size_of_alpha_array,sizeof(unsigned short),
00471             1,ng->bin_fp,"size_of_alpha_array");  
00472 
00473   rr_fwrite(&ng->n_unigrams,sizeof(int),1,ng->bin_fp,"n_unigrams");
00474   rr_fwrite(&ng->zeroton_fraction,sizeof(double),1,
00475             ng->bin_fp,"zeroton_fraction");
00476   rr_fwrite(&ng->oov_fraction,sizeof(double),1,
00477             ng->bin_fp,"oov_fraction");
00478   rr_fwrite(&ng->four_byte_counts,sizeof(flag),1,
00479             ng->bin_fp,"four_byte_counts");
00480 
00481   rr_fwrite(&ng->four_byte_alphas,sizeof(flag),1,
00482             ng->bin_fp,"four_byte_alphas");
00483 
00484   rr_fwrite(&ng->first_id,sizeof(unsigned short),1,
00485             ng->bin_fp,"first_id");
00486 
00487   /* Short and shortish arrays */
00488 
00489   sih_val_write_to_file(ng->vocab_ht,ng->bin_fp,ng->bin_filename,0);
00490 
00491   /* (ng->vocab is not stored in file - will be derived from ng->vocab_ht) */
00492 
00493   if (ng->four_byte_counts) {
00494     rr_fwrite(ng->marg_counts4,sizeof(int),
00495               ng->vocab_size+1,ng->bin_fp,"marg_counts");
00496   }
00497   else {
00498     rr_fwrite(ng->marg_counts,sizeof(count_ind_t),
00499               ng->vocab_size+1,ng->bin_fp,"marg_counts");
00500   }
00501 
00502   rr_fwrite(ng->alpha_array,sizeof(double),
00503             ng->size_of_alpha_array,ng->bin_fp,"alpha_array");
00504 
00505   if (!ng->four_byte_counts) {
00506     for (i=0;i<=ng->n-1;i++) {
00507       rr_fwrite(ng->count_table[i],sizeof(count_t),
00508                 ng->count_table_size+1,ng->bin_fp,"count_table");
00509     } 
00510   }
00511 
00512   /* Could write count_table as one block, but better to be safe and
00513      do it in chunks. For motivation, see comments about writing tree
00514      info. */
00515 
00516   rr_fwrite(ng->ptr_table_size,sizeof(unsigned short),
00517             ng->n,ng->bin_fp,"ptr_table_size");
00518 
00519   for (i=0;i<=ng->n-1;i++) {
00520     rr_fwrite(ng->ptr_table[i],sizeof(ptr_tab_t),
00521               ng->ptr_table_size[i],ng->bin_fp,"ptr_table");
00522   }
00523   
00524   /* Unigram statistics */
00525 
00526   rr_fwrite(ng->uni_probs,sizeof(uni_probs_t),ng->vocab_size+1,
00527             ng->bin_fp,"uni_probs");
00528   rr_fwrite(ng->uni_log_probs,sizeof(uni_probs_t),ng->vocab_size+1,
00529             ng->bin_fp,"uni_log_probs");
00530   rr_fwrite(ng->context_cue,sizeof(flag),ng->vocab_size+1,
00531             ng->bin_fp,"context_cue");
00532 
00533   
00534   rr_fwrite(ng->cutoffs,sizeof(cutoff_t),ng->n,ng->bin_fp,"cutoffs");
00535 
00536   switch (ng->discounting_method) {
00537   case GOOD_TURING:
00538     rr_fwrite(ng->fof_size,sizeof(unsigned short),ng->n,ng->bin_fp,"fof_size");
00539     rr_fwrite(ng->disc_range,sizeof(unsigned short),ng->n,
00540               ng->bin_fp,"disc_range");
00541     for (i=0;i<=ng->n-1;i++) {
00542       rr_fwrite(ng->freq_of_freq[i],sizeof(int),
00543                 ng->fof_size[i]+1,ng->bin_fp,"freq_of_freq");
00544     }    
00545     for (i=0;i<=ng->n-1;i++) {
00546       rr_fwrite(ng->gt_disc_ratio[i],sizeof(disc_val_t),
00547                 ng->disc_range[i]+1,ng->bin_fp,"gt_disc_ratio");
00548     }    
00549   case WITTEN_BELL:
00550     break;
00551   case LINEAR:
00552     rr_fwrite(ng->lin_disc_ratio,sizeof(disc_val_t),
00553                 ng->n,ng->bin_fp,"lin_disc_ratio");
00554     break;
00555   case ABSOLUTE:
00556     rr_fwrite(ng->abs_disc_const,sizeof(double),
00557               ng->n,ng->bin_fp,"abs_disc_const");
00558     break;
00559   }
00560 
00561   /* Tree information */
00562 
00563   /* Unigram stuff first, since can be dumped all in one go */
00564 
00565   rr_fwrite(ng->num_kgrams,sizeof(int),ng->n,ng->bin_fp,"num_kgrams");
00566 
00567   if (ng->four_byte_counts) {
00568     rr_fwrite(ng->count4[0],sizeof(int),ng->vocab_size+1,
00569               ng->bin_fp,"unigram counts");
00570   }
00571   else {
00572     rr_fwrite(ng->count[0],sizeof(count_ind_t),ng->vocab_size+1,
00573               ng->bin_fp,"unigram counts");
00574   }
00575 
00576   if (ng->four_byte_alphas) {
00577     rr_fwrite(ng->bo_weight4[0],sizeof(four_byte_t),ng->vocab_size+1,
00578               ng->bin_fp,"unigram backoff weights");
00579   }
00580   else {
00581     rr_fwrite(ng->bo_weight[0],sizeof(bo_weight_t),ng->vocab_size+1,
00582               ng->bin_fp,"unigram backoff weights");
00583   }
00584 
00585   if (ng->n > 1) {
00586     rr_fwrite(ng->ind[0],sizeof(index__t),ng->vocab_size+1,
00587               ng->bin_fp,"unigram -> bigram pointers");
00588   }
00589 
00590   /* Write the rest of the tree structure in chunks, otherwise the
00591       kernel buffers are too big. */
00592 
00593   /* Need to do byte swapping */
00594 
00595   for (i=1;i<=ng->n-1;i++) {
00596     for (j=0;j<=ng->num_kgrams[i];j++) {
00597       SWAPHALF(&ng->word_id[i][j]);
00598     }
00599     if (ng->four_byte_counts) {
00600       for (j=0;j<=ng->num_kgrams[i];j++) {
00601         SWAPWORD(&ng->count4[i][j]);
00602       }
00603     }
00604     else {
00605       for (j=0;j<=ng->num_kgrams[i];j++) {
00606         SWAPHALF(&ng->count[i][j]);
00607       }
00608     }
00609   }
00610 
00611   for (i=1;i<=ng->n-2;i++) {
00612     for (j=0;j<=ng->num_kgrams[i];j++) {
00613       if (ng->four_byte_alphas) {
00614         SWAPWORD(&ng->bo_weight4[i][j]);
00615       }
00616       else {
00617         SWAPHALF(&ng->bo_weight[i][j]);
00618       }
00619     }
00620     for (j=0;j<=ng->num_kgrams[i];j++) {
00621       SWAPHALF(&ng->ind[i][j]);
00622     }
00623   }
00624 
00625   for (i=1;i<=ng->n-1;i++) {
00626 
00627     from_rec = 0;
00628     l_chunk = 100000;
00629     while(from_rec < ng->num_kgrams[i]) {
00630       if (from_rec+l_chunk > ng->num_kgrams[i]) {
00631         l_chunk = ng->num_kgrams[i] - from_rec;
00632       }
00633       rr_fwrite(&ng->word_id[i][from_rec],1,sizeof(id__t)*l_chunk,ng->bin_fp,"word ids");
00634       from_rec += l_chunk;
00635     }
00636    
00637   }
00638 
00639   for (i=1;i<=ng->n-1;i++) {
00640 
00641     from_rec = 0;
00642     l_chunk = 100000;
00643     while(from_rec < ng->num_kgrams[i]) {
00644       if (from_rec+l_chunk > ng->num_kgrams[i]) {
00645         l_chunk = ng->num_kgrams[i] - from_rec;
00646       }
00647       if (ng->four_byte_counts) {
00648         rr_fwrite(&ng->count4[i][from_rec],1,sizeof(int)*l_chunk,ng->bin_fp,"counts");
00649       }
00650       else {
00651         rr_fwrite(&ng->count[i][from_rec],1,sizeof(count_ind_t)*l_chunk,ng->bin_fp,"counts");
00652       }
00653       from_rec += l_chunk;
00654     }
00655     
00656   }
00657 
00658   for (i=1;i<=ng->n-2;i++) {
00659 
00660     from_rec = 0;
00661     l_chunk = 100000;
00662     while(from_rec < ng->num_kgrams[i]) {
00663       if (from_rec+l_chunk > ng->num_kgrams[i]) {
00664         l_chunk = ng->num_kgrams[i] - from_rec;
00665       }
00666       if (ng->four_byte_alphas) {
00667         rr_fwrite(&ng->bo_weight4[i][from_rec],1,sizeof(four_byte_t)*l_chunk,
00668                   ng->bin_fp,"backoff weights");
00669       }
00670       else {
00671         rr_fwrite(&ng->bo_weight[i][from_rec],1,sizeof(bo_weight_t)*l_chunk,
00672                   ng->bin_fp,"backoff weights");
00673       }
00674       from_rec += l_chunk;
00675     }
00676   }
00677 
00678   for (i=1;i<=ng->n-2;i++) {
00679 
00680 
00681     from_rec = 0;
00682     l_chunk = 100000;
00683     while(from_rec < ng->num_kgrams[i]) {
00684       if (from_rec+l_chunk > ng->num_kgrams[i]) {
00685         l_chunk = ng->num_kgrams[i] - from_rec;
00686       }
00687       rr_fwrite(&ng->ind[i][from_rec],1,sizeof(index__t)*l_chunk,ng->bin_fp,
00688                 "indices");
00689       from_rec += l_chunk;
00690     }
00691 
00692   }
00693 
00694   rr_oclose(ng->bin_fp);
00695 
00696   /* Swap back */
00697 
00698   for (i=1;i<=ng->n-1;i++) {
00699     for (j=0;j<=ng->num_kgrams[i];j++) {
00700       SWAPHALF(&ng->word_id[i][j]);
00701     }
00702     if (ng->four_byte_counts) {
00703       for (j=0;j<=ng->num_kgrams[i];j++) {
00704         SWAPWORD(&ng->count4[i][j]);
00705       }
00706     }
00707     else {
00708       for (j=0;j<=ng->num_kgrams[i];j++) {
00709         SWAPHALF(&ng->count[i][j]);
00710       }
00711     }
00712   }
00713 
00714   for (i=1;i<=ng->n-2;i++) {
00715     for (j=0;j<=ng->num_kgrams[i];j++) {
00716       if (ng->four_byte_alphas) {
00717         SWAPWORD(&ng->bo_weight4[i][j]);
00718       }
00719       else {
00720         SWAPHALF(&ng->bo_weight[i][j]);
00721       }
00722 
00723     }
00724     for (j=0;j<=ng->num_kgrams[i];j++) {
00725       SWAPHALF(&ng->ind[i][j]);
00726     }
00727   }
00728   
00729 }
00730 
00731 
00732 
00733 
00734 
00735 

Generated on Tue Dec 21 13:54:46 2004 by doxygen1.2.18