00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <stdio.h>
00021 #include <math.h>
00022 #include <stdlib.h>
00023 #include "pc_libs/pc_general.h"
00024 #include "idngram2lm.h"
00025 #include "rr_libs/mips_swap.h"
00026 #include "rr_libs/general.h"
00027 #include "ngram.h"
00028
00029
00030 #define BBO_FILE_VERSION 970314
00031
00032 void write_arpa_lm(ng_t *ng,int verbosity) {
00033
00034
00035
00036
00070 int *current_pos;
00071 int *end_pos;
00072 int i;
00073 int j;
00074 double log_10_of_e = 1.0 / log(10.0);
00075
00076
00077
00078 pc_message(verbosity,1,"ARPA-style %d-gram will be written to %s\n",ng->n,ng->arpa_filename);
00079
00080 fprintf(ng->arpa_fp,"#############################################################################\n");
00081 fprintf(ng->arpa_fp,"## Copyright (c) 1996, Carnegie Mellon University, Cambridge University,\n");
00082 fprintf(ng->arpa_fp,"## Ronald Rosenfeld and Philip Clarkson\n");
00083 fprintf(ng->arpa_fp,"#############################################################################\n");
00084 fprintf(ng->arpa_fp,"=============================================================================\n");
00085 fprintf(ng->arpa_fp,"=============== This file was produced by the CMU-Cambridge ===============\n");
00086 fprintf(ng->arpa_fp,"=============== Statistical Language Modeling Toolkit ===============\n");
00087 fprintf(ng->arpa_fp,"=============================================================================\n");
00088 fprintf(ng->arpa_fp,"This is a %d-gram language model, based on a vocabulary of %d words,\n",ng->n,ng->vocab_size);
00089 fprintf(ng->arpa_fp," which begins \"%s\", \"%s\", \"%s\"...\n",ng->vocab[1],ng->vocab[2],ng->vocab[3]);
00090 if (ng->vocab_type == CLOSED_VOCAB) {
00091 fprintf(ng->arpa_fp,"This is a CLOSED-vocabulary model\n");
00092 fprintf(ng->arpa_fp," (OOVs eliminated from training data and are forbidden in test data)\n");
00093 }
00094 else {
00095 if (ng->vocab_type == OPEN_VOCAB_1) {
00096 fprintf(ng->arpa_fp,"This is an OPEN-vocabulary model (type 1)\n");
00097 fprintf(ng->arpa_fp," (OOVs were mapped to UNK, which is treated as any other vocabulary word)\n");
00098 }
00099 else {
00100 if (ng->vocab_type == OPEN_VOCAB_2) {
00101 fprintf(ng->arpa_fp,"This is an OPEN-vocabulary model (type 2)\n");
00102 fprintf(ng->arpa_fp," (%.2f of the unigram discount mass was allocated to OOVs)\n",ng->oov_fraction);
00103 }
00104 }
00105 }
00106
00107 switch (ng->discounting_method) {
00108 case GOOD_TURING:
00109 fprintf(ng->arpa_fp,"Good-Turing discounting was applied.\n");
00110 for (i=1;i<=ng->n;i++) {
00111 fprintf(ng->arpa_fp,"%d-gram frequency of frequency : ",i);
00112 for (j=1;j<=ng->fof_size[i-1]-1;j++) {
00113 fprintf(ng->arpa_fp,"%d ",ng->freq_of_freq[i-1][j]);
00114 }
00115 fprintf(ng->arpa_fp,"\n");
00116 }
00117 for (i=1;i<=ng->n;i++) {
00118 fprintf(ng->arpa_fp,"%d-gram discounting ratios : ",i);
00119 for (j=1;j<=ng->disc_range[i-1];j++) {
00120 fprintf(ng->arpa_fp,"%.2f ",ng->gt_disc_ratio[i-1][j]);
00121 }
00122 fprintf(ng->arpa_fp,"\n");
00123 }
00124 break;
00125 case LINEAR:
00126 fprintf(ng->arpa_fp,"Linear discounting was applied.\n");
00127 for (i=1;i<=ng->n;i++) {
00128 fprintf(ng->arpa_fp,"%d-gram discounting ratio : %g\n",i,ng->lin_disc_ratio[i-1]);
00129 }
00130 break;
00131 case ABSOLUTE:
00132 fprintf(ng->arpa_fp,"Absolute discounting was applied.\n");
00133 for (i=1;i<=ng->n;i++) {
00134 fprintf(ng->arpa_fp,"%d-gram discounting constant : %g\n",i,ng->abs_disc_const[i-1]);
00135 }
00136 break;
00137 case WITTEN_BELL:
00138 fprintf(ng->arpa_fp,"Witten Bell discounting was applied.\n");
00139 break;
00140 }
00141
00142
00143 fprintf(ng->arpa_fp,"This file is in the ARPA-standard format introduced by Doug Paul.\n");
00144 fprintf(ng->arpa_fp,"\n");
00145 fprintf(ng->arpa_fp,"p(wd3|wd1,wd2)= if(trigram exists) p_3(wd1,wd2,wd3)\n");
00146 fprintf(ng->arpa_fp," else if(bigram w1,w2 exists) bo_wt_2(w1,w2)*p(wd3|wd2)\n");
00147 fprintf(ng->arpa_fp," else p(wd3|w2)\n");
00148 fprintf(ng->arpa_fp,"\n");
00149 fprintf(ng->arpa_fp,"p(wd2|wd1)= if(bigram exists) p_2(wd1,wd2)\n");
00150 fprintf(ng->arpa_fp," else bo_wt_1(wd1)*p_1(wd2)\n");
00151 fprintf(ng->arpa_fp,"\n");
00152 fprintf(ng->arpa_fp,"All probs and back-off weights (bo_wt) are given in log10 form.\n");
00153 fprintf(ng->arpa_fp,"\n");
00154 fprintf(ng->arpa_fp,"Data formats:\n");
00155 fprintf(ng->arpa_fp,"\n");
00156 fprintf(ng->arpa_fp,"Beginning of data mark: \\data\\\n");
00157
00158 for (i=1;i<=ng->n;i++) {
00159 fprintf(ng->arpa_fp,"ngram %d=nr # number of %d-grams\n",i,i);
00160 }
00161 fprintf(ng->arpa_fp,"\n");
00162 for (i=1;i<=ng->n;i++) {
00163 fprintf(ng->arpa_fp,"\\%d-grams:\n",i);
00164 fprintf(ng->arpa_fp,"p_%d ",i);
00165 for (j=1;j<=i;j++) {
00166 fprintf(ng->arpa_fp,"wd_%d ",j);
00167 }
00168 if (i == ng->n) {
00169 fprintf(ng->arpa_fp,"\n");
00170 }
00171 else {
00172 fprintf(ng->arpa_fp,"bo_wt_%d\n",i);
00173 }
00174 }
00175
00176 fprintf(ng->arpa_fp,"\n");
00177 fprintf(ng->arpa_fp,"end of data mark: \\end\\\n");
00178 fprintf(ng->arpa_fp,"\n");
00179
00180 fprintf(ng->arpa_fp,"\\data\\\n");
00181 fprintf(ng->arpa_fp,"ngram 1=%d\n",1+ng->vocab_size-ng->first_id);
00182 for (i=1;i<=ng->n-1;i++) {
00183 fprintf(ng->arpa_fp,"ngram %d=%d\n",i+1,ng->num_kgrams[i]);
00184 }
00185
00186
00187
00188 fprintf(ng->arpa_fp,"\n\\1-grams:\n");
00189
00190 for (i=ng->first_id; i<=ng->vocab_size;i++) {
00191
00192 double log10_uniprob;
00193 double log10_alpha;
00194
00195 log10_uniprob = ng->uni_log_probs[i]*log_10_of_e;
00196
00197 if (ng->uni_probs[i]<=0.0) {
00198 log10_uniprob = -99.999;
00199 }
00200
00201 if (ng->four_byte_alphas) {
00202 if (ng->bo_weight4[0][i] > 0.0) {
00203 log10_alpha = log10(ng->bo_weight4[0][i]);
00204 }
00205 else {
00206 log10_alpha = -99.999;
00207 }
00208 }
00209 else {
00210
00211 if (double_alpha(ng->bo_weight[0][i],
00212 ng->alpha_array,
00213 ng->size_of_alpha_array,
00214 65535 - ng->out_of_range_alphas,
00215 ng->min_alpha,
00216 ng->max_alpha) > 0.0) {
00217 log10_alpha = log10(double_alpha(ng->bo_weight[0][i],
00218 ng->alpha_array,
00219 ng->size_of_alpha_array,
00220 65535 - ng->out_of_range_alphas,
00221 ng->min_alpha,
00222 ng->max_alpha));
00223 }
00224 else {
00225 log10_alpha = -99.999;
00226 }
00227
00228 }
00229
00230 if (ng->n>1) {
00231 fprintf(ng->arpa_fp,"%.4f %s\t%.4f\n",
00232 log10_uniprob,ng->vocab[i],log10_alpha);
00233 }
00234 else {
00235 fprintf(ng->arpa_fp,"%.4f %s\n",
00236 log10_uniprob,ng->vocab[i]);
00237 }
00238
00239 }
00240
00241 current_pos = (int *) rr_malloc(ng->n*sizeof(int));
00242 end_pos = (int *) rr_malloc(ng->n*sizeof(int));
00243
00244
00245
00246
00247 for (i=1;i<=ng->n-1;i++) {
00248
00249
00250
00251
00252 int current_table;
00253 int j;
00254
00255 int ngcount;
00256 int marg_count;
00257 double discounted_ngcount;
00258
00259 double ngprob;
00260 double log_10_ngprob;
00261 double ngalpha;
00262 double log_10_ngalpha;
00263
00264
00265
00266 discounted_ngcount = 0.0;
00267 log_10_ngalpha = 0.0;
00268
00269 fprintf(ng->arpa_fp,"\n\\%d-grams:\n",i+1);
00270
00271
00272
00273 for (j=0;j<=ng->n-1;j++) {
00274 current_pos[j] = 0;
00275 end_pos[j] = 0;
00276 }
00277
00278 for (current_pos[0]=ng->first_id;
00279 current_pos[0]<=ng->vocab_size;
00280 current_pos[0]++) {
00281
00282 if (return_count(ng->four_byte_counts,
00283 ng->count_table[0],
00284 ng->marg_counts,
00285 ng->marg_counts4,
00286 current_pos[0]) > 0) {
00287
00288 current_table = 1;
00289
00290 if (current_pos[0] == ng->vocab_size) {
00291 end_pos[1] = ng->num_kgrams[1]-1;
00292 }
00293 else {
00294 end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1],
00295 ng->ptr_table[0],
00296 ng->ptr_table_size[0],
00297 current_pos[0]+1)-1;
00298 }
00299
00300 while (current_table > 0) {
00301
00302 if (current_table == i) {
00303
00304 if (current_pos[i] <= end_pos[i]) {
00305
00306 ngcount = return_count(ng->four_byte_counts,
00307 ng->count_table[i],
00308 ng->count[i],
00309 ng->count4[i],
00310 current_pos[i]);
00311
00312 if (i==1) {
00313 marg_count = return_count(ng->four_byte_counts,
00314 ng->count_table[0],
00315 ng->marg_counts,
00316 ng->marg_counts4,
00317 current_pos[0]);
00318 }
00319 else {
00320 marg_count = return_count(ng->four_byte_counts,
00321 ng->count_table[i-1],
00322 ng->count[i-1],
00323 ng->count4[i-1],
00324 current_pos[i-1]);
00325 }
00326
00327 switch (ng->discounting_method) {
00328 case GOOD_TURING:
00329 if (ngcount <= ng->disc_range[i]) {
00330 discounted_ngcount = ng->gt_disc_ratio[i][ngcount] * ngcount;
00331 }
00332 else {
00333 discounted_ngcount = ngcount;
00334 }
00335 break;
00336 case ABSOLUTE:
00337 discounted_ngcount = ngcount - ng->abs_disc_const[i];
00338 break;
00339 case LINEAR:
00340 discounted_ngcount = ng->lin_disc_ratio[i]*ngcount;
00341 break;
00342 case WITTEN_BELL:
00343 discounted_ngcount = ( ((double) marg_count * ngcount) /
00344 (marg_count + num_of_types(i-1,current_pos[i-1],ng)));
00345 break;
00346 }
00347
00348 ngprob = (double) discounted_ngcount / marg_count;
00349
00350 if (ngprob > 1.0) {
00351 fprintf(stderr,
00352 "discounted_ngcount = %f marg_count = %d %d %d %d\n",
00353 discounted_ngcount,marg_count,current_pos[0],
00354 current_pos[1],current_pos[2]);
00355 quit(-1,"Error : probablity of ngram is greater than one.\n");
00356 }
00357
00358 if (ngprob > 0.0) {
00359 log_10_ngprob = log10(ngprob);
00360 }
00361 else {
00362 log_10_ngprob = -99.999;
00363 }
00364
00365
00366 if (i <= ng->n-2) {
00367 if (ng->four_byte_alphas) {
00368 ngalpha = ng->bo_weight4[i][current_pos[i]];
00369 }
00370 else {
00371 ngalpha = double_alpha(ng->bo_weight[i][current_pos[i]],
00372 ng->alpha_array,
00373 ng->size_of_alpha_array,
00374 65535 - ng->out_of_range_alphas,
00375 ng->min_alpha,
00376 ng->max_alpha);
00377 }
00378 if (ngalpha > 0.0) {
00379 log_10_ngalpha = log10(ngalpha);
00380 }
00381 else {
00382 log_10_ngalpha = -99.999;
00383 }
00384 }
00385
00386 fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob);
00387 fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]);
00388 for (j=1;j<=i;j++) {
00389 fprintf(ng->arpa_fp,"%s ",ng->vocab[ng->word_id[j][current_pos[j]]]);
00390 }
00391 if (i <= ng->n-2) {
00392 fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha);
00393 }
00394 else {
00395 fprintf(ng->arpa_fp,"\n");
00396 }
00397 current_pos[i]++;
00398 }
00399 else {
00400 current_table--;
00401 if (current_table > 0) {
00402 current_pos[current_table]++;
00403 }
00404 }
00405 }
00406 else {
00407
00408 if (current_pos[current_table] <= end_pos[current_table]) {
00409 current_table++;
00410 if (current_pos[current_table-1] == ng->num_kgrams[current_table-1]-1) {
00411 end_pos[current_table] = ng->num_kgrams[current_table]-1;
00412 }
00413 else {
00414 end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],ng->ptr_table[current_table-1],ng->ptr_table_size[current_table-1],current_pos[current_table-1]+1)-1;
00415 }
00416 }
00417 else {
00418 current_table--;
00419 if (current_table > 0) {
00420 current_pos[current_table]++;
00421 }
00422 }
00423 }
00424 }
00425 }
00426 }
00427 }
00428
00429 free(current_pos);
00430 free(end_pos);
00431
00432
00433 fprintf(ng->arpa_fp,"\n\\end\\\n");
00434
00435 rr_oclose(ng->arpa_fp);
00436
00437 }
00438
00439 void write_bin_lm(ng_t *ng,int verbosity) {
00440
00441 int l_chunk;
00442 int from_rec;
00443 int i;
00444 int j;
00445
00446 pc_message(verbosity,1,"Binary %d-gram language model will be written to %s\n",ng->n,ng->bin_filename);
00447
00448 ng->version = BBO_FILE_VERSION;
00449
00450
00451
00452 rr_fwrite(&ng->version,sizeof(int),1,ng->bin_fp,"version");
00453 rr_fwrite(&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n");
00454
00455 rr_fwrite(&ng->vocab_size,sizeof(unsigned short),1,ng->bin_fp,"vocab_size");
00456 rr_fwrite(&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs");
00457 rr_fwrite(&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type");
00458
00459 rr_fwrite(&ng->count_table_size,sizeof(count_ind_t),1,
00460 ng->bin_fp,"count_table_size");
00461 rr_fwrite(&ng->discounting_method,sizeof(unsigned short),1,
00462 ng->bin_fp,"discounting_method");
00463
00464 rr_fwrite(&ng->min_alpha,sizeof(double),
00465 1,ng->bin_fp,"min_alpha");
00466 rr_fwrite(&ng->max_alpha,sizeof(double),
00467 1,ng->bin_fp,"max_alpha");
00468 rr_fwrite(&ng->out_of_range_alphas,sizeof(unsigned short),
00469 1,ng->bin_fp,"out_of_range_alphas");
00470 rr_fwrite(&ng->size_of_alpha_array,sizeof(unsigned short),
00471 1,ng->bin_fp,"size_of_alpha_array");
00472
00473 rr_fwrite(&ng->n_unigrams,sizeof(int),1,ng->bin_fp,"n_unigrams");
00474 rr_fwrite(&ng->zeroton_fraction,sizeof(double),1,
00475 ng->bin_fp,"zeroton_fraction");
00476 rr_fwrite(&ng->oov_fraction,sizeof(double),1,
00477 ng->bin_fp,"oov_fraction");
00478 rr_fwrite(&ng->four_byte_counts,sizeof(flag),1,
00479 ng->bin_fp,"four_byte_counts");
00480
00481 rr_fwrite(&ng->four_byte_alphas,sizeof(flag),1,
00482 ng->bin_fp,"four_byte_alphas");
00483
00484 rr_fwrite(&ng->first_id,sizeof(unsigned short),1,
00485 ng->bin_fp,"first_id");
00486
00487
00488
00489 sih_val_write_to_file(ng->vocab_ht,ng->bin_fp,ng->bin_filename,0);
00490
00491
00492
00493 if (ng->four_byte_counts) {
00494 rr_fwrite(ng->marg_counts4,sizeof(int),
00495 ng->vocab_size+1,ng->bin_fp,"marg_counts");
00496 }
00497 else {
00498 rr_fwrite(ng->marg_counts,sizeof(count_ind_t),
00499 ng->vocab_size+1,ng->bin_fp,"marg_counts");
00500 }
00501
00502 rr_fwrite(ng->alpha_array,sizeof(double),
00503 ng->size_of_alpha_array,ng->bin_fp,"alpha_array");
00504
00505 if (!ng->four_byte_counts) {
00506 for (i=0;i<=ng->n-1;i++) {
00507 rr_fwrite(ng->count_table[i],sizeof(count_t),
00508 ng->count_table_size+1,ng->bin_fp,"count_table");
00509 }
00510 }
00511
00512
00513
00514
00515
00516 rr_fwrite(ng->ptr_table_size,sizeof(unsigned short),
00517 ng->n,ng->bin_fp,"ptr_table_size");
00518
00519 for (i=0;i<=ng->n-1;i++) {
00520 rr_fwrite(ng->ptr_table[i],sizeof(ptr_tab_t),
00521 ng->ptr_table_size[i],ng->bin_fp,"ptr_table");
00522 }
00523
00524
00525
00526 rr_fwrite(ng->uni_probs,sizeof(uni_probs_t),ng->vocab_size+1,
00527 ng->bin_fp,"uni_probs");
00528 rr_fwrite(ng->uni_log_probs,sizeof(uni_probs_t),ng->vocab_size+1,
00529 ng->bin_fp,"uni_log_probs");
00530 rr_fwrite(ng->context_cue,sizeof(flag),ng->vocab_size+1,
00531 ng->bin_fp,"context_cue");
00532
00533
00534 rr_fwrite(ng->cutoffs,sizeof(cutoff_t),ng->n,ng->bin_fp,"cutoffs");
00535
00536 switch (ng->discounting_method) {
00537 case GOOD_TURING:
00538 rr_fwrite(ng->fof_size,sizeof(unsigned short),ng->n,ng->bin_fp,"fof_size");
00539 rr_fwrite(ng->disc_range,sizeof(unsigned short),ng->n,
00540 ng->bin_fp,"disc_range");
00541 for (i=0;i<=ng->n-1;i++) {
00542 rr_fwrite(ng->freq_of_freq[i],sizeof(int),
00543 ng->fof_size[i]+1,ng->bin_fp,"freq_of_freq");
00544 }
00545 for (i=0;i<=ng->n-1;i++) {
00546 rr_fwrite(ng->gt_disc_ratio[i],sizeof(disc_val_t),
00547 ng->disc_range[i]+1,ng->bin_fp,"gt_disc_ratio");
00548 }
00549 case WITTEN_BELL:
00550 break;
00551 case LINEAR:
00552 rr_fwrite(ng->lin_disc_ratio,sizeof(disc_val_t),
00553 ng->n,ng->bin_fp,"lin_disc_ratio");
00554 break;
00555 case ABSOLUTE:
00556 rr_fwrite(ng->abs_disc_const,sizeof(double),
00557 ng->n,ng->bin_fp,"abs_disc_const");
00558 break;
00559 }
00560
00561
00562
00563
00564
00565 rr_fwrite(ng->num_kgrams,sizeof(int),ng->n,ng->bin_fp,"num_kgrams");
00566
00567 if (ng->four_byte_counts) {
00568 rr_fwrite(ng->count4[0],sizeof(int),ng->vocab_size+1,
00569 ng->bin_fp,"unigram counts");
00570 }
00571 else {
00572 rr_fwrite(ng->count[0],sizeof(count_ind_t),ng->vocab_size+1,
00573 ng->bin_fp,"unigram counts");
00574 }
00575
00576 if (ng->four_byte_alphas) {
00577 rr_fwrite(ng->bo_weight4[0],sizeof(four_byte_t),ng->vocab_size+1,
00578 ng->bin_fp,"unigram backoff weights");
00579 }
00580 else {
00581 rr_fwrite(ng->bo_weight[0],sizeof(bo_weight_t),ng->vocab_size+1,
00582 ng->bin_fp,"unigram backoff weights");
00583 }
00584
00585 if (ng->n > 1) {
00586 rr_fwrite(ng->ind[0],sizeof(index__t),ng->vocab_size+1,
00587 ng->bin_fp,"unigram -> bigram pointers");
00588 }
00589
00590
00591
00592
00593
00594
00595 for (i=1;i<=ng->n-1;i++) {
00596 for (j=0;j<=ng->num_kgrams[i];j++) {
00597 SWAPHALF(&ng->word_id[i][j]);
00598 }
00599 if (ng->four_byte_counts) {
00600 for (j=0;j<=ng->num_kgrams[i];j++) {
00601 SWAPWORD(&ng->count4[i][j]);
00602 }
00603 }
00604 else {
00605 for (j=0;j<=ng->num_kgrams[i];j++) {
00606 SWAPHALF(&ng->count[i][j]);
00607 }
00608 }
00609 }
00610
00611 for (i=1;i<=ng->n-2;i++) {
00612 for (j=0;j<=ng->num_kgrams[i];j++) {
00613 if (ng->four_byte_alphas) {
00614 SWAPWORD(&ng->bo_weight4[i][j]);
00615 }
00616 else {
00617 SWAPHALF(&ng->bo_weight[i][j]);
00618 }
00619 }
00620 for (j=0;j<=ng->num_kgrams[i];j++) {
00621 SWAPHALF(&ng->ind[i][j]);
00622 }
00623 }
00624
00625 for (i=1;i<=ng->n-1;i++) {
00626
00627 from_rec = 0;
00628 l_chunk = 100000;
00629 while(from_rec < ng->num_kgrams[i]) {
00630 if (from_rec+l_chunk > ng->num_kgrams[i]) {
00631 l_chunk = ng->num_kgrams[i] - from_rec;
00632 }
00633 rr_fwrite(&ng->word_id[i][from_rec],1,sizeof(id__t)*l_chunk,ng->bin_fp,"word ids");
00634 from_rec += l_chunk;
00635 }
00636
00637 }
00638
00639 for (i=1;i<=ng->n-1;i++) {
00640
00641 from_rec = 0;
00642 l_chunk = 100000;
00643 while(from_rec < ng->num_kgrams[i]) {
00644 if (from_rec+l_chunk > ng->num_kgrams[i]) {
00645 l_chunk = ng->num_kgrams[i] - from_rec;
00646 }
00647 if (ng->four_byte_counts) {
00648 rr_fwrite(&ng->count4[i][from_rec],1,sizeof(int)*l_chunk,ng->bin_fp,"counts");
00649 }
00650 else {
00651 rr_fwrite(&ng->count[i][from_rec],1,sizeof(count_ind_t)*l_chunk,ng->bin_fp,"counts");
00652 }
00653 from_rec += l_chunk;
00654 }
00655
00656 }
00657
00658 for (i=1;i<=ng->n-2;i++) {
00659
00660 from_rec = 0;
00661 l_chunk = 100000;
00662 while(from_rec < ng->num_kgrams[i]) {
00663 if (from_rec+l_chunk > ng->num_kgrams[i]) {
00664 l_chunk = ng->num_kgrams[i] - from_rec;
00665 }
00666 if (ng->four_byte_alphas) {
00667 rr_fwrite(&ng->bo_weight4[i][from_rec],1,sizeof(four_byte_t)*l_chunk,
00668 ng->bin_fp,"backoff weights");
00669 }
00670 else {
00671 rr_fwrite(&ng->bo_weight[i][from_rec],1,sizeof(bo_weight_t)*l_chunk,
00672 ng->bin_fp,"backoff weights");
00673 }
00674 from_rec += l_chunk;
00675 }
00676 }
00677
00678 for (i=1;i<=ng->n-2;i++) {
00679
00680
00681 from_rec = 0;
00682 l_chunk = 100000;
00683 while(from_rec < ng->num_kgrams[i]) {
00684 if (from_rec+l_chunk > ng->num_kgrams[i]) {
00685 l_chunk = ng->num_kgrams[i] - from_rec;
00686 }
00687 rr_fwrite(&ng->ind[i][from_rec],1,sizeof(index__t)*l_chunk,ng->bin_fp,
00688 "indices");
00689 from_rec += l_chunk;
00690 }
00691
00692 }
00693
00694 rr_oclose(ng->bin_fp);
00695
00696
00697
00698 for (i=1;i<=ng->n-1;i++) {
00699 for (j=0;j<=ng->num_kgrams[i];j++) {
00700 SWAPHALF(&ng->word_id[i][j]);
00701 }
00702 if (ng->four_byte_counts) {
00703 for (j=0;j<=ng->num_kgrams[i];j++) {
00704 SWAPWORD(&ng->count4[i][j]);
00705 }
00706 }
00707 else {
00708 for (j=0;j<=ng->num_kgrams[i];j++) {
00709 SWAPHALF(&ng->count[i][j]);
00710 }
00711 }
00712 }
00713
00714 for (i=1;i<=ng->n-2;i++) {
00715 for (j=0;j<=ng->num_kgrams[i];j++) {
00716 if (ng->four_byte_alphas) {
00717 SWAPWORD(&ng->bo_weight4[i][j]);
00718 }
00719 else {
00720 SWAPHALF(&ng->bo_weight[i][j]);
00721 }
00722
00723 }
00724 for (j=0;j<=ng->num_kgrams[i];j++) {
00725 SWAPHALF(&ng->ind[i][j]);
00726 }
00727 }
00728
00729 }
00730
00731
00732
00733
00734
00735