interpolate.c Source File

00001 
00002 
00003 /*=====================================================================
00004                 =======   COPYRIGHT NOTICE   =======
00005 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00006 Ronald Rosenfeld and Philip Clarkson.
00007 
00008 All rights reserved.
00009 
00010 This software is made available for research purposes only.  It may be
00011 redistributed freely for this purpose, in full or in part, provided
00012 that this entire copyright notice is included on any copies of this
00013 software and applications and derivations thereof.
00014 
00015 This software is provided on an "as is" basis, without warranty of any
00016 kind, either expressed or implied, as to any matter including, but not
00017 limited to warranty of fitness of purpose, or merchantability, or
00018 results obtained from use of this software.
00019 ======================================================================*/
00020 
00021 /* copyright (C) Roni Rosenfeld, 1990, 1991, 1992, 1993 */
00022 /* Edited by Kristie Seymore, 4/16/97 */
00023 /* Editied further by Philip Clarkson April 1997, in order to provide
00024    consistency with the rest of the toolkit */
00025 
00051 #include <stdio.h>
00052 #include <strings.h>
00053 #include <math.h>
00054 #include <stdlib.h>
00055 #include "rr_libs/general.h"
00056 #include "pc_libs/pc_general.h"
00057 #include "toolkit.h"
00058 #define  ITEM_T  float
00059 #define  ITEM_FORMAT "%f"
00060 #define  MCAPTION 20
00061 
00062 /* update command line argument sequence */
00063 void updateArgs( int *pargc, char **argv, int rm_cnt )
00064 {
00065   int i ;             
00066 
00067   /* update the argument count */
00068   (*pargc)-- ;
00069 
00070   /* update the command line */
00071   for( i = rm_cnt ; i < *pargc ; i++ ) argv[i] = argv[i+1] ;
00072 }       
00073 
00074 void eval(double *sum_logprobs, double **fractions, int *tag_of, int *n_in_tag,
00075           double *prob_components, double **lambdas, ITEM_T **model_probs,
00076           int ntags, int from_item, int to_item, int nmodels, char **captions, 
00077           double *p_new_pp, int iter_no, double old_pp, int verbosity, 
00078           FILE *probs_fp) {
00079   int     itag, iitem, tag, imodel;
00080   double  total_prob, total_logprobs, new_pp;
00081 
00082   for (itag=0; itag<ntags; itag++) {
00083      sum_logprobs[itag] = 0.0;
00084      for (imodel=0; imodel<nmodels; imodel++) {
00085         fractions[imodel][itag] = 0.0;
00086      }
00087   }
00088   for (iitem=from_item; iitem<=to_item; iitem++) {
00089     tag = tag_of[iitem];
00090     total_prob = 0.0;
00091     for (imodel=0; imodel<nmodels; imodel++) {
00092       prob_components[imodel] =
00093         lambdas[imodel][tag] * model_probs[imodel][iitem];
00094       total_prob += prob_components[imodel];
00095     }
00096     for (imodel=0; imodel<nmodels; imodel++) {
00097       fractions[imodel][tag] += prob_components[imodel] / total_prob;
00098     }
00099     sum_logprobs[tag] += log(total_prob);
00100     pc_message(verbosity,3,"    item #%d (tag %d): ",iitem,tag);
00101     pc_message(verbosity,4,"\n        probs:  ");
00102     for (imodel=0; imodel<nmodels; imodel++) {
00103       pc_message(verbosity,4,"%.4f ",model_probs[imodel][iitem]);
00104     }
00105     pc_message(verbosity,4,"\n        comps:  ");
00106     for (imodel=0; imodel<nmodels; imodel++) {
00107       pc_message(verbosity,4,"%.4f ",prob_components[imodel]);
00108     }
00109     pc_message(verbosity,4,"\n        fracts: ");
00110     for (imodel=0; imodel<nmodels; imodel++) {
00111       pc_message(verbosity,4,"%.4f ",fractions[imodel][tag]);
00112     }
00113     pc_message(verbosity,4,"\n    ");
00114 
00115     pc_message(verbosity,3,
00116                "total_prob=%.4f  logprob=%.3f  sum_logprob[%d]=%.3f\n",
00117                total_prob, log(total_prob), tag, sum_logprobs[tag]);
00118 
00119     if (probs_fp) {
00120       fprintf(probs_fp,"%g\n",total_prob);
00121     }
00122   }
00123   pc_message(verbosity,2,"\n");
00124   total_logprobs = 0.0;
00125   for (itag=0; itag<ntags; itag++) {
00126     total_logprobs += sum_logprobs[itag];
00127     pc_message(verbosity,2,"%.*s weights: ",MCAPTION,captions[itag]);
00128     for (imodel=0; imodel<nmodels; imodel++) {
00129       pc_message(verbosity,2,"%.3f  ",lambdas[imodel][itag]);
00130     }
00131     pc_message(verbosity,2,"(%4d items)",n_in_tag[itag]);
00132     if (n_in_tag[itag]==0) {
00133       pc_message(verbosity,2,"\n");
00134     }
00135     else {
00136       pc_message(verbosity,2," --> PP=%f\n",
00137                  exp( -sum_logprobs[itag] / n_in_tag[itag]));
00138     }
00139   }
00140   new_pp = exp(-total_logprobs/(to_item-from_item+1));
00141   pc_message(verbosity,2,"\t\t\t=============>  TOTAL PP = %g",new_pp);
00142   if (iter_no>1) {
00143     pc_message(verbosity,2," (down %.4f)\n",(1.0-(new_pp/old_pp)));
00144   }
00145   else {
00146     pc_message(verbosity,2,"\n");
00147   }
00148   *p_new_pp = new_pp;
00149 }
00150 
00151 
00152 void main (int argc, char **argv) {
00153 
00154   int use_tags=0; 
00155   int use_captions=0; 
00156   int pure_test_mode=0; 
00157   int first_part = 0;
00158   int default_lambdas=1; 
00159   int verbosity=1; 
00160   int n_test_items=0; 
00161   int n_train_items=0; 
00162   int cv=0;
00163   int Mprobs = 60000; 
00164   int write_lambdas = 0;
00165   double stop_ratio = 0.999;
00166 
00167   static char *rname = "interpolate";
00168 
00169   char *tags_filename; 
00170   char *captions_filename; 
00171   char *lambdas_filename; 
00172   char *write_lambdas_filename;
00173   FILE *tags_fp; 
00174   FILE *captions_fp; 
00175   FILE *lambdas_fp; 
00176   FILE *probs_fp=NULL; 
00177   FILE *write_lambdas_fp = NULL;
00178 
00179   char   **model_filenames;      /* model_filenames[model]            */
00180   FILE   **model_fps;            /*       model_fps[model]            */
00181   Boolean *model_fixed_lambdas;  /*model_fixed_lambdas[model]         */
00182   ITEM_T **model_probs;          /*           probs[model][item]      */
00183   int    *tag_of;                /*              tag_of[item]         */
00184   double *prob_components;       /* prob_components[model]            */
00185   double **lambdas;              /*         lambdas[model][tag]       */
00186   double **fractions;            /*       fractions[model][tag]       */
00187   double *sum_logprobs;          /*           sum_logprobs[tag]       */
00188   int    *n_train_in_tag;        /*            n_in_tag[tag]          */
00189   int    *n_test_in_tag;         /*            n_in_tag[tag]          */
00190   int    nmodels=0; 
00191   int imodel; 
00192   int ntags; 
00193   int itag; 
00194   int tag; 
00195   int nitems; 
00196   int iitem;
00197   int iter_no;
00198   int half_point = 0; 
00199   int iter_num; 
00200   int first_test_items=0; 
00201   int second_test_items=0;
00202   double old_pp=0.0; 
00203   double new_pp; 
00204   double test_pp; 
00205   float dummyf;
00206   double first_part_pp=0.0; 
00207   double second_part_pp=0.0; 
00208   double total_pp; 
00209   double sum_logprob_1; 
00210   double sum_logprob_2;
00211   double total_logprob;
00212   char   **captions;
00213   FILE   *fp;
00214   ITEM_T *pitem;
00215   int    scanfrc; 
00216   int nnewitems;
00217   int temp_test_items;
00218   char *write_fprobs_filename;
00219   int i;
00220 
00221   /* Allocate memory for model data */
00222 
00223   model_filenames = (char **) rr_malloc(argc * (sizeof(char *)));
00224   model_fixed_lambdas = (Boolean *) rr_malloc(argc * (sizeof(Boolean)));
00225 
00226   /* Process command line */
00227 
00228   report_version(&argc,argv);
00229 
00230   if (argc == 1 || pc_flagarg(&argc,argv,"-help")) {
00231 
00232     fprintf(stderr,"Usage : interpolate +[-] model1.fprobs +[-] model2.fprobs ... \n");
00233     fprintf(stderr,"        [ -test_all | -test_first n | -test_last n | -cv ]\n");
00234     fprintf(stderr,"        [ -tags .tags ]\n");
00235     fprintf(stderr,"        [ -captions .captions ]\n");
00236     fprintf(stderr,"        [ -out_lambdas .lambdas ]\n");
00237     fprintf(stderr,"        [ -in_lambdas .lambdas ]\n");
00238     fprintf(stderr,"        [ -stop_ratio 0.999 ]\n");
00239     fprintf(stderr,"        [ -probs .fprobs ]\n");
00240     fprintf(stderr,"        [ -max_probs 6000000 ]\n");
00241     exit(1);
00242 
00243   }
00244 
00245   /* Grab all the model filename */
00246 
00247   i = 0;
00248   while (i<argc) {
00249 
00250     if (argv[i][0]=='+') {
00251       model_fixed_lambdas[nmodels] = (argv[i][1]=='-');
00252       model_filenames[nmodels++] = salloc(argv[i+1]);
00253       updateArgs( &argc, argv, i+1 ) ;
00254       updateArgs( &argc, argv, i ) ;
00255     }
00256     else {
00257       i++;
00258     }
00259   }
00260 
00261   /* Now process all the other switches */
00262 
00263   verbosity = pc_intarg(&argc,argv,"-verbosity",DEFAULT_VERBOSITY);
00264 
00265   pure_test_mode = pc_flagarg(&argc,argv,"-test_all");
00266 
00267   n_test_items = pc_intarg(&argc,argv,"-test_first",-1);
00268   if (n_test_items != -1) {
00269     first_part = 1;
00270   }
00271 
00272   temp_test_items = pc_intarg(&argc,argv,"-test_last",-1);
00273   if (n_test_items != -1 && temp_test_items != -1) {
00274     quit(-1,"Error : Cannot specify both -test_last and -test_first options.\n");
00275   }
00276 
00277   if (temp_test_items != -1) {
00278     n_test_items = temp_test_items;
00279     first_part = 0;
00280   }
00281 
00282   if (n_test_items == -1) {
00283     n_test_items = 0;
00284   }
00285 
00286   cv = pc_flagarg(&argc,argv,"-cv");
00287 
00288   tags_filename = salloc(pc_stringarg(&argc,argv,"-tags",""));
00289   if (strcmp(tags_filename,"")) {
00290     use_tags = 1;
00291   }
00292 
00293   captions_filename = salloc(pc_stringarg(&argc,argv,"-captions",""));
00294   if (strcmp(captions_filename,"")) {
00295     use_captions = 1;
00296   }
00297 
00298   if (use_captions && !use_tags) {
00299     pc_message(verbosity,1,"Warning - captions file specified, but no tags file.\n");
00300   }
00301 
00302   lambdas_filename = salloc(pc_stringarg(&argc,argv,"-in_lambdas",""));
00303   if (strcmp(lambdas_filename,"")) {
00304     default_lambdas = 0;
00305   }
00306   else {
00307     default_lambdas = 1;
00308   }
00309 
00310   write_lambdas_filename = salloc(pc_stringarg(&argc,argv,"-out_lambdas",""));
00311   if (strcmp(write_lambdas_filename,"")) {
00312     write_lambdas = 1;
00313   }
00314 
00315   stop_ratio = pc_doublearg(&argc,argv,"-stop_ratio",0.999);
00316 
00317   write_fprobs_filename = salloc(pc_stringarg(&argc,argv,"-probs",""));
00318   if (strcmp(write_fprobs_filename,"")) {
00319     if (n_test_items > 0 || cv) {
00320       probs_fp = rr_oopen(write_fprobs_filename);
00321     }
00322     else {
00323       pc_message(verbosity,2,"Warning : -write option ignored, as none of the data is used for testing.\n");
00324     }
00325   }
00326 
00327   Mprobs = pc_intarg(&argc,argv,"-max_probs",6000000);
00328 
00329   pc_report_unk_args(&argc,argv,verbosity);
00330 
00331   if (nmodels==0) quit(-1,"%s: no models specified\n",rname);
00332   if (pure_test_mode && default_lambdas)
00333     quit(-1,"%s: in pure test mode, initial lambdas must be supplied\n",rname);
00334   if (stop_ratio<0.0 || stop_ratio >1.0)
00335      quit(-1,"%s: illegal stop_ratio (%f) - must be a fraction\n",
00336               rname,stop_ratio);
00337 
00338   if (cv && pure_test_mode) {
00339     quit(-1,"%s : Error - cannot specify both -cv and -test_all.\n",rname);
00340   }
00341 
00342   if (cv && n_test_items != 0) {
00343     quit(-1,"%s : Error - cannot specify both -cv and -test_first or -test_last.\n",
00344          rname);
00345   }
00346 
00347   if (pure_test_mode && n_test_items != 0) {
00348     quit(-1,"%s : Error - cannot specify both -test_all and -test_first or -test_last.\n",
00349          rname);
00350   }
00351 
00352 
00353   model_fps       = (FILE   **) rr_malloc(nmodels * sizeof(FILE *));
00354   model_probs     = (ITEM_T **) rr_malloc(nmodels * sizeof(ITEM_T *));
00355   lambdas         = (double **) rr_malloc(nmodels * sizeof(double *));
00356   fractions       = (double **) rr_malloc(nmodels * sizeof(double *));
00357   prob_components = (double *)  rr_malloc(nmodels * sizeof(double));
00358 
00359   nitems = -1;
00360 
00361   pc_message(verbosity,2,"%s : Reading the probability streams....",rname);
00362   fflush(stderr);
00363   
00364   for (imodel=0; imodel<nmodels; imodel++) {
00365      model_fps[imodel] = rr_iopen(model_filenames[imodel]);
00366      model_probs[imodel] = (ITEM_T *) rr_malloc((Mprobs+1)*sizeof(ITEM_T));
00367 
00368      /* read in the models probabilities */
00369      fp=model_fps[imodel];
00370      pitem=model_probs[imodel];
00371      nnewitems = 0;
00372      for (iitem=0; iitem<Mprobs+1; iitem++) {
00373         if ((scanfrc=fscanf(fp,ITEM_FORMAT,pitem++)) != 1) break;
00374         nnewitems++;
00375      }
00376      if (nnewitems>Mprobs) quit(-1,
00377         "%s: more than %d probs on %s\n",rname,Mprobs,model_filenames[imodel]);
00378      if (imodel==0) nitems = nnewitems;
00379      else if (nnewitems != nitems)
00380         quit(-1,"%s: model '%s' has %d probs, but model '%s' has %d probs\n",
00381           rname,model_filenames[0],nitems,model_filenames[imodel],nnewitems);
00382 
00383      fclose(model_fps[imodel]);
00384   }
00385 
00386   pc_message(verbosity,2,"Done.\n");
00387   fflush(stderr);
00388 
00389   if (n_test_items >= nitems)
00390      quit(-1,"%s: \"-test_last %d\" was specified, but there are only %d items\n",
00391               rname, n_test_items, nitems);
00392   if (pure_test_mode) n_test_items=nitems;
00393 
00394   if (cv) half_point = (int) (nitems/2);
00395 
00396   if (write_lambdas == 1) {
00397       write_lambdas_fp = rr_oopen(write_lambdas_filename);
00398   }
00399 
00400   for (iter_num = 1; iter_num <= 2; iter_num++) {
00401 
00402      if (cv && iter_num == 1) {
00403         n_test_items = nitems - half_point;
00404         first_part = 0;
00405      }
00406      if (cv && iter_num == 2) {
00407         n_test_items = half_point;
00408         first_part = 1;
00409      }
00410 
00411      n_train_items = nitems - n_test_items;
00412      if (n_train_items>0 && n_test_items>0) {
00413        if (first_part) {
00414          pc_message(verbosity,2,
00415          "%s: %d models will be interpolated using the last %d data items\n",
00416                     rname, nmodels, n_train_items);
00417          pc_message(verbosity,2,
00418          "    The first %d data items will be used for testing\n",
00419                     n_test_items);
00420        }
00421        else {
00422          pc_message(verbosity,2,
00423          "%s: %d models will be interpolated using the first %d data items\n",
00424                  rname, nmodels, n_train_items);
00425          pc_message(verbosity,2,
00426          "    The last %d data items will be used for testing\n",
00427          n_test_items);
00428        }
00429      }
00430      else {
00431        if (n_train_items>0) {
00432          pc_message(verbosity,2,
00433                     "%s: %d models will be interpolated using %d data items\n",
00434                     rname, nmodels, n_train_items);
00435        }
00436        else {
00437          if (n_test_items>0) {
00438            pc_message(verbosity,2,
00439                       "%s: %d models will be tested using %d data items\n",
00440                       rname, nmodels, n_test_items);
00441          }
00442 
00443          else {
00444            if (cv) {
00445              pc_message(verbosity,2,
00446              "%s: %d models will be tested using cross validation\n",
00447                         rname, nmodels);
00448            }
00449          }
00450        }
00451      }
00452      if (!default_lambdas)
00453        pc_message(verbosity,2,"%s: %sweights will be read from \"%s\"\n",
00454           rname,(n_train_items ? "initial " : ""),
00455           (strcmp(lambdas_filename,"-")==0) ? "stdin" : lambdas_filename);
00456      for (imodel=0; imodel<nmodels; imodel++) {
00457        if (model_fixed_lambdas[imodel])
00458          pc_message(verbosity,2,"%s: weights of '%s' will be fixed\n",
00459                     rname, model_filenames[imodel]);
00460      }
00461 
00462      /* read the tags file, or set all tags to 1 */
00463      tag_of = (int *) rr_malloc(nitems * sizeof(int));
00464      if (use_tags) {
00465         int maxtag = -1;
00466         tags_fp = rr_iopen(tags_filename);
00467         pc_message(verbosity,2,"tags will be taken from \"%s\"\n",
00468                    tags_filename);
00469         for (iitem=0; iitem<nitems; iitem++) {
00470            if (fscanf(tags_fp,"%d",&tag_of[iitem]) != 1)
00471               quit(-1,"%s: problem reading %dth tag from %s\n",
00472                        rname, iitem, tags_filename);
00473            if ((tag_of[iitem]<0))
00474               quit(-1,"%s: illegal tag (%d)\n", rname, tag_of[iitem]);
00475            if (tag_of[iitem]>maxtag) maxtag = tag_of[iitem];
00476         }
00477         if (fscanf(tags_fp,"%d",&tag_of[iitem]) != EOF)
00478            quit(-1,"%s: %s contains more than %d items\n",
00479                  rname, tags_filename, nitems);
00480         ntags = maxtag+1;
00481         pc_message(verbosity,2,"%s: data is partitioned into %d tags\n", 
00482                    rname, ntags);
00483      }
00484      else {
00485         ntags = 1;
00486         for (iitem=0; iitem<nitems; iitem++)  tag_of[iitem] = 0;
00487         pc_message(verbosity,2,"%s: a single tag is used for all the data\n",
00488                    rname);
00489      }
00490 
00491      /* fill in the CAPTIONS strings */
00492 
00493      captions = (char **) rr_malloc(ntags*sizeof(char *));
00494      for (tag=0; tag<ntags; tag++)
00495          captions[tag] = (char *) rr_malloc((MCAPTION+1)*sizeof(char));
00496      if (use_captions) {
00497         char line[81];
00498         int len;
00499         captions_fp = rr_iopen(captions_filename);
00500         pc_message(verbosity,2,"captions will be taken from \"%s\"\n",
00501                    captions_filename);
00502 
00503         for (tag=0; tag<ntags; tag++) {
00504           if (fgets(line,80,captions_fp) == NULL) {
00505             quit(-1,"Error reading from captions file.\n");
00506           }
00507               len=strlen(line); line[len-1]='\0'; len--; /* remove the '\n' */
00508 
00509               sprintf(captions[tag],"%.*s",MCAPTION,line);
00510               strncat(captions[tag],"                    ",MCAPTION-len);
00511         } 
00512      }
00513      else {
00514        for (tag=0; tag<ntags; tag++) {
00515          sprintf(captions[tag],"          TAG %d",tag);
00516          strncat(captions[tag],"                    ",
00517                  MCAPTION-strlen(captions[tag]));
00518        }
00519      }
00520 
00521 
00522      /* Allocate rest of arrays */
00523      n_train_in_tag  = (int *) rr_calloc(ntags,sizeof(int));
00524      n_test_in_tag  = (int *) rr_calloc(ntags,sizeof(int));
00525      for (iitem=0; iitem<n_train_items; iitem++) {
00526        n_train_in_tag[tag_of[iitem]]++;
00527      }
00528      for (iitem=n_train_items; iitem<nitems; iitem++) {
00529        n_test_in_tag[tag_of[iitem]]++;
00530      }
00531      sum_logprobs = (double *) rr_malloc(ntags * sizeof(double));
00532      for (imodel=0; imodel<nmodels; imodel++) {
00533         lambdas[imodel]   = (double *) rr_malloc(ntags*sizeof(double));
00534         fractions[imodel] = (double *) rr_malloc(ntags*sizeof(double));
00535      }
00536 
00537      /* Initialize the weights (lambdas) */
00538      if (default_lambdas) {
00539         for (itag=0; itag<ntags; itag++)
00540            for (imodel=0; imodel<nmodels; imodel++)
00541              lambdas[imodel][itag] = 1.0 / nmodels;
00542      }
00543      else {
00544         lambdas_fp = rr_iopen(lambdas_filename);
00545         if (strcmp(lambdas_filename,"-")==0)
00546           fprintf(stderr,"Enter initial weights, by tag order\n");
00547         for (itag=0; itag<ntags; itag++) {
00548            double sum_lambdas = 0.0;
00549            for (imodel=0; imodel<nmodels; imodel++) {
00550               if (fscanf(lambdas_fp,"%lf",&lambdas[imodel][itag])!=1)
00551                  quit(-1,"%s: problems reading from '%s'\n",
00552                       rname,lambdas_filename);
00553               sum_lambdas += lambdas[imodel][itag];
00554            }
00555            if (fabs(1.0-sum_lambdas) > 1e-8)
00556               quit(-1,"%s: weights for tag #%d sum to %g, not to 1\n",
00557                        rname, itag, sum_lambdas);
00558         }
00559         if (fscanf(lambdas_fp,"%f",&dummyf) != EOF)
00560            quit(-1,"%s: too many numbers found in '%s'\n", 
00561                 rname, lambdas_filename);
00562         rr_iclose(lambdas_fp);
00563      }
00564 
00565      /* TRAINING: iterate the EM step */
00566      new_pp = 10e98;
00567      iter_no = 1;
00568      while (n_train_items>0 &&
00569             (iter_no==1 || (new_pp/old_pp < stop_ratio))) {
00570         old_pp = new_pp;
00571 
00572         /* re-estimate lambdas before all but the first iteration */
00573         if (iter_no > 1) {
00574            for (itag=0; itag<ntags; itag++) {
00575               double total_nonfixed_lambdas = 0.0;
00576               double total_nonfixed_fractions = 0.0;
00577               if (n_train_in_tag[itag] <= 0) continue;
00578               for (imodel=0; imodel<nmodels; imodel++) {
00579                  if (!model_fixed_lambdas[imodel]) {
00580                     total_nonfixed_lambdas += lambdas[imodel][itag];
00581                     total_nonfixed_fractions += fractions[imodel][itag];
00582                  }
00583               }
00584               for (imodel=0; imodel<nmodels; imodel++) {
00585                  if (!model_fixed_lambdas[imodel]) {
00586                     lambdas[imodel][itag] =
00587                              (fractions[imodel][itag] / n_train_in_tag[itag]);
00588                     /* correct s.t. the lambdas sum to 
00589                        'total_nonfixed_lambdas' */
00590                     lambdas[imodel][itag] *=
00591                         (total_nonfixed_lambdas /
00592                               (total_nonfixed_fractions/n_train_in_tag[itag]));
00593                  }
00594               }
00595            }
00596         }
00597         if (first_part) {     /* Train on last part and test on first part */
00598            eval(sum_logprobs, fractions, tag_of, n_train_in_tag,
00599                 prob_components, lambdas, model_probs,
00600                 ntags, n_test_items, nitems-1, nmodels, captions, &new_pp,
00601                 iter_no, old_pp, verbosity, NULL);
00602            iter_no++;
00603         }
00604         else {  /* Train on first part and test on last part */
00605            eval(sum_logprobs, fractions, tag_of, n_train_in_tag,
00606                 prob_components, lambdas, model_probs,
00607                 ntags, 0, n_train_items-1, nmodels, captions, &new_pp,
00608                 iter_no, old_pp, verbosity, NULL);
00609            iter_no++;
00610         }
00611      } /* e.o. while loop */
00612      /* (we avoid reestimating lambda after the last iteration, so that
00613         the PP we reported be accurate) */
00614 
00615      /* If training was done, write the weights to stdout as well */
00616      if (n_train_items>0) {
00617         fprintf(stderr,"\n");
00618         for (itag=0; itag<ntags; itag++) {
00619            for (imodel=0; imodel<nmodels; imodel++)
00620               printf("%12.10f ",lambdas[imodel][itag]);
00621            printf("\n");
00622         }
00623         fflush(stdout);
00624      }
00625      if (write_lambdas == 1) {
00626         for (itag=0; itag<ntags; itag++) {
00627            for (imodel=0; imodel<nmodels; imodel++)
00628               fprintf(write_lambdas_fp, "%s   %12.10f\n",
00629                       model_filenames[imodel],lambdas[imodel][itag]);
00630            printf("\n");
00631         }
00632      }
00633 
00634      if (n_test_items>0) {
00635         fprintf(stderr,"\nNOW TESTING ...\n");
00636         if (first_part) { /* Train on last part and test on first part */
00637            eval(sum_logprobs, fractions, tag_of, n_test_in_tag,
00638                 prob_components, lambdas, model_probs,
00639                 ntags, 0, n_test_items-1, nmodels,captions, &test_pp,
00640                 1, 0.0, verbosity, probs_fp);
00641            fprintf(stderr,"\n");
00642 
00643         }
00644         else {                  /* Train on first part and test on last part */
00645            eval(sum_logprobs, fractions, tag_of, n_test_in_tag,
00646                 prob_components, lambdas, model_probs,
00647                 ntags, n_train_items, nitems-1, nmodels,captions, &test_pp,
00648                 1, 0.0, verbosity, probs_fp);
00649            fprintf(stderr,"\n");
00650         }
00651      }
00652 
00653 
00654      if (iter_num == 1) {
00655         first_part_pp = test_pp;
00656         first_test_items = n_test_items;
00657      }
00658      else if (iter_num == 2) {
00659         second_part_pp = test_pp;
00660         second_test_items = n_test_items;
00661      }
00662 
00663      /* Free all memory allocated in the loop */
00664      free (tag_of);
00665      for (tag=0; tag<ntags; tag++)
00666          free (captions[tag]);
00667      free (captions);
00668      free (n_train_in_tag);
00669      free (n_test_in_tag);
00670      free (sum_logprobs);
00671      for (imodel=0; imodel<nmodels; imodel++) {
00672         free (lambdas[imodel]);
00673         free (fractions[imodel]);
00674      }
00675 
00676      if (!cv) { break; }
00677 
00678    }
00679 
00680    if (write_lambdas == 1) {
00681       fclose(write_lambdas_fp);
00682    }
00683 
00684    /* In cross-validation mode, calc total PP */
00685    if (cv) {
00686       sum_logprob_1 = -log(first_part_pp) * first_test_items;
00687       sum_logprob_2 = -log(second_part_pp) * second_test_items;
00688       total_logprob = sum_logprob_1 + sum_logprob_2;
00689       total_pp = exp(-total_logprob/nitems);
00690 
00691       fprintf(stderr, "Two-way cross validation: \n");
00692       fprintf(stderr, "     First half PP = %f\n", second_part_pp);
00693       fprintf(stderr, "     Second half PP = %f\n", first_part_pp);
00694       fprintf(stderr, "     =====> Total PP = %f\n", total_pp);
00695 
00696    }
00697 
00698    if (n_test_items>0) exit((int) test_pp);
00699 }
00700 
00701