00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _SIMPLEKLRETMETHOD_HPP
00013 #define _SIMPLEKLRETMETHOD_HPP
00014
00015 #include <cmath>
00016 #include "UnigramLM.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "SimpleKLDocModel.hpp"
00019 #include "TextQueryRep.hpp"
00020 #include "TextQueryRetMethod.hpp"
00021 #include "Counter.hpp"
00022 #include "DocUnigramCounter.hpp"
00023
00025
00026 class SimpleKLQueryModel : public ArrayQueryRep {
00027 public:
00029 SimpleKLQueryModel(TextQuery &qry, Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1, qry, dbIndex), qm(NULL), ind(dbIndex), colKLComputed(false) {
00030 }
00031
00033 SimpleKLQueryModel(Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1), qm(NULL), ind(dbIndex), colKLComputed(false) {
00034 startIteration();
00035 while (hasMore()) {
00036 QueryTerm *qt = nextTerm();
00037 setCount(qt->id(), 0);
00038 delete qt;
00039 }
00040
00041 }
00042
00043
00044 virtual ~SimpleKLQueryModel(){ if (qm) delete qm;}
00045
00046
00048
00055 virtual void interpolateWith(UnigramLM &qModel, double origModCoeff, int howManyWord, double prSumThresh=1, double prThresh=0);
00056 virtual double scoreConstant() {
00057 return totalCount();
00058 }
00059
00061 virtual void load(istream &is);
00062
00064 virtual void save(ostream &os);
00065
00067 virtual void clarity(ostream &os);
00069 virtual double clarity();
00070
00071
00073 double colDivergence() {
00074 if (colKLComputed) {
00075 return colKL;
00076 } else {
00077 colKLComputed = true;
00078 double d=0;
00079 startIteration();
00080 while (hasMore()) {
00081 QueryTerm *qt=nextTerm();
00082 double pr = qt->weight()/(double)totalCount();
00083 double colPr = (ind.termCount(qt->id())+1)/(double)(ind.termCount()+ind.termCountUnique());
00084 d += pr*log(pr/colPr);
00085 delete qt;
00086
00087 }
00088 colKL=d;
00089 return d;
00090 }
00091 }
00092
00093
00094
00096 double KLDivergence(UnigramLM &refMod) {
00097 double d=0;
00098 startIteration();
00099 while (hasMore()) {
00100 QueryTerm *qt=nextTerm();
00101 double pr = qt->weight()/(double)totalCount();
00102 d += pr*log(pr/refMod.prob(qt->id()));
00103 delete qt;
00104 }
00105 return d;
00106 }
00107
00108
00109
00110 protected:
00111
00112 double colKL;
00113 bool colKLComputed;
00114
00115 IndexedRealVector *qm;
00116 Index &ind;
00117 };
00118
00119
00120
00122
00137 class SimpleKLScoreFunc : public ScoreFunction {
00138 public:
00139
00140 virtual double matchedTermWeight(QueryTerm *qTerm, TextQueryRep *qRep, DocInfo *info, DocumentRep *dRep) {
00141 return (qTerm->weight()*log(dRep->termWeight(qTerm->id(),info)));
00142 }
00143
00145 virtual double adjustedScore(double origScore, TextQueryRep *qRep, DocumentRep *dRep) {
00146 SimpleKLQueryModel *qm = (SimpleKLQueryModel *)qRep;
00147
00148 SimpleKLDocModel *dm = (SimpleKLDocModel *)dRep;
00149
00150
00152
00154
00155
00156
00158
00159
00160
00161
00163
00164 assert(qm->scoreConstant()!=0);
00165 return (origScore/qm->scoreConstant() + log(dm->scoreConstant())
00166 - qm->colDivergence());
00167
00168
00169 }
00170 };
00171
00172
00173
00174
00176
00177
00178 class SimpleKLRetMethod : public TextQueryRetMethod {
00179 public:
00180
00182 SimpleKLRetMethod(Index &dbIndex, const char *supportFileName, ScoreAccumulator &accumulator);
00183 virtual ~SimpleKLRetMethod();
00184
00185 virtual TextQueryRep *computeTextQueryRep(TextQuery &qry) {
00186 return (new SimpleKLQueryModel(qry, ind));
00187 }
00188
00189 virtual DocumentRep *computeDocRep(int docID);
00190
00191
00192 virtual ScoreFunction *scoreFunc() {
00193 return (scFunc);
00194 }
00195
00196
00197 virtual void updateTextQuery(TextQueryRep &origRep, DocIDSet &relDocs);
00198
00199 void setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam);
00200 void setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam);
00201
00202 protected:
00203
00205 double *mcNorm;
00206
00208 double *docProbMass;
00210 int *uniqueTermCount;
00212 UnigramLM *collectLM;
00214 DocUnigramCounter *collectLMCounter;
00216 SimpleKLScoreFunc *scFunc;
00217
00219
00220
00221 void computeMixtureFBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00223 void computeDivMinFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs);
00225 void computeMarkovChainFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs) ;
00227 void computeRM1FBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00229 void computeRM2FBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00231
00232 SimpleKLParameter::DocSmoothParam docParam;
00233 SimpleKLParameter::QueryModelParam qryParam;
00234
00235 };
00236
00237
00238 inline void SimpleKLRetMethod::setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam)
00239 {
00240 docParam = docSmthParam;
00241 }
00242
00243 inline void SimpleKLRetMethod::setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam)
00244 {
00245 qryParam = queryModParam;
00246 }
00247
00248 #endif
00249
00250
00251
00252
00253
00254
00255
00256