Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

SimpleKLRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  *
00003  *  Original source copyright (c) 2001, Carnegie Mellon University.
00004  *  See copyright.cmu for details.
00005  *  Modifications copyright (c) 2002, University of Massachusetts.
00006  *  See copyright.umass for details.
00007  *
00008  *==========================================================================
00009 */
00010 
00011 
00012 #ifndef _SIMPLEKLRETMETHOD_HPP
00013 #define _SIMPLEKLRETMETHOD_HPP
00014 
00015 #include <cmath>
00016 #include "UnigramLM.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "SimpleKLDocModel.hpp"
00019 #include "TextQueryRep.hpp"
00020 #include "TextQueryRetMethod.hpp"
00021 #include "Counter.hpp"
00022 #include "DocUnigramCounter.hpp"
00023 
00025 
00026 class SimpleKLQueryModel : public ArrayQueryRep {
00027 public:
00029   SimpleKLQueryModel(TextQuery &qry, Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1, qry, dbIndex), qm(NULL), ind(dbIndex), colKLComputed(false) {
00030   }
00031 
00033   SimpleKLQueryModel(Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1), qm(NULL), ind(dbIndex), colKLComputed(false) {
00034     startIteration();
00035     while (hasMore()) {
00036       QueryTerm *qt = nextTerm();
00037       setCount(qt->id(), 0);
00038       delete qt;
00039     }
00040 
00041   }
00042 
00043 
00044   virtual ~SimpleKLQueryModel(){ if (qm) delete qm;}
00045 
00046 
00048 
00055   virtual void interpolateWith(UnigramLM &qModel, double origModCoeff, int howManyWord, double prSumThresh=1, double prThresh=0);
00056   virtual double scoreConstant() {
00057     return totalCount();
00058   }
00059   
00061   virtual void load(istream &is);
00062 
00064   virtual void save(ostream &os);
00065 
00067   virtual void clarity(ostream &os);
00069   virtual double clarity();
00070 
00071 
00073   double colDivergence() {
00074     if (colKLComputed) {
00075       return colKL;
00076     } else {
00077       colKLComputed = true;
00078       double d=0;
00079       startIteration();
00080       while (hasMore()) {
00081         QueryTerm *qt=nextTerm();
00082         double pr = qt->weight()/(double)totalCount();
00083         double colPr = (ind.termCount(qt->id())+1)/(double)(ind.termCount()+ind.termCountUnique()); // Laplace smoothing, same as in SimpleKLRetMethod
00084         d += pr*log(pr/colPr);
00085         delete qt;
00086         
00087       }
00088       colKL=d;
00089       return d;
00090     }
00091   }
00092 
00093 
00094 
00096   double KLDivergence(UnigramLM &refMod) {
00097     double d=0;
00098     startIteration();
00099     while (hasMore()) {
00100       QueryTerm *qt=nextTerm();
00101       double pr = qt->weight()/(double)totalCount();
00102       d += pr*log(pr/refMod.prob(qt->id()));
00103       delete qt;
00104     }
00105     return d;
00106   }
00107 
00108 
00109 
00110 protected:
00111 
00112   double colKL;
00113   bool colKLComputed;
00114 
00115   IndexedRealVector *qm;
00116   Index &ind;
00117 };
00118 
00119 
00120 
00122 
00137 class SimpleKLScoreFunc : public ScoreFunction {
00138 public:
00139 
00140   virtual double matchedTermWeight(QueryTerm *qTerm, TextQueryRep *qRep, DocInfo *info, DocumentRep *dRep) { 
00141     return (qTerm->weight()*log(dRep->termWeight(qTerm->id(),info)));
00142   }
00143 
00145   virtual double adjustedScore(double origScore, TextQueryRep *qRep, DocumentRep *dRep) {
00146     SimpleKLQueryModel *qm = (SimpleKLQueryModel *)qRep;
00147     // dynamic_cast<SimpleKLQueryModel *>qRep;
00148     SimpleKLDocModel *dm = (SimpleKLDocModel *)dRep;
00149       // dynamic_cast<SimpleKLDocModel *>dRep;
00150 
00152 
00154     // this is the original query likelihood scoring formula
00155     //  return (origScore+log(dm->scoreConstant())*qm->scoreConstant());
00156 
00158     // This is the normalized query-likelihood, i.e., cross-entropy
00159     // assert(qm->scoreConstant()!=0);
00160     // return (origScore/qm->scoreConstant() + log(dm->scoreConstant()));
00161 
00163     // This is the exact (negative) KL-divergence value, i.e., -D(Mq||Md)
00164     assert(qm->scoreConstant()!=0);
00165     return (origScore/qm->scoreConstant() + log(dm->scoreConstant())
00166             - qm->colDivergence());
00167 
00168 
00169   }
00170 };
00171 
00172 
00173 
00174 
00176 
00177 
00178 class SimpleKLRetMethod : public TextQueryRetMethod {
00179 public:
00180 
00182   SimpleKLRetMethod(Index &dbIndex, const char *supportFileName, ScoreAccumulator &accumulator);
00183   virtual ~SimpleKLRetMethod();
00184   
00185   virtual TextQueryRep *computeTextQueryRep(TextQuery &qry) {
00186     return (new SimpleKLQueryModel(qry, ind));
00187   }
00188   
00189   virtual DocumentRep *computeDocRep(int docID);
00190   
00191 
00192   virtual ScoreFunction *scoreFunc() {
00193     return (scFunc);
00194   }
00195   
00196 
00197   virtual void updateTextQuery(TextQueryRep &origRep, DocIDSet &relDocs);
00198 
00199   void setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam);
00200   void setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam);
00201 
00202 protected:
00203 
00205   double *mcNorm; 
00206   
00208   double *docProbMass; 
00210   int *uniqueTermCount; 
00212   UnigramLM *collectLM; 
00214   DocUnigramCounter *collectLMCounter; 
00216   SimpleKLScoreFunc *scFunc; 
00217 
00219 
00220 
00221   void computeMixtureFBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00223   void computeDivMinFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs);
00225   void computeMarkovChainFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs) ;
00227   void computeRM1FBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00229   void computeRM2FBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00231 
00232   SimpleKLParameter::DocSmoothParam docParam;
00233   SimpleKLParameter::QueryModelParam qryParam;
00234 
00235 };
00236 
00237 
00238 inline  void SimpleKLRetMethod::setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam)
00239 {
00240   docParam = docSmthParam;
00241 }
00242 
00243 inline  void SimpleKLRetMethod::setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam)
00244 {
00245   qryParam = queryModParam;
00246 }
00247 
00248 #endif /* _SIMPLEKLRETMETHOD_HPP */
00249 
00250 
00251 
00252 
00253 
00254 
00255 
00256 

Generated on Tue Nov 25 11:26:46 2003 for Lemur Toolkit by doxygen1.2.18