00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _SIMPLEKLRETMETHOD_HPP
00014 #define _SIMPLEKLRETMETHOD_HPP
00015
00016 #include <cmath>
00017 #include "UnigramLM.hpp"
00018 #include "ScoreFunction.hpp"
00019 #include "SimpleKLDocModel.hpp"
00020 #include "TextQueryRep.hpp"
00021 #include "TextQueryRetMethod.hpp"
00022 #include "Counter.hpp"
00023 #include "DocUnigramCounter.hpp"
00024
00026
00027 class SimpleKLQueryModel : public ArrayQueryRep {
00028 public:
00030 SimpleKLQueryModel(TextQuery &qry, Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1, qry, dbIndex), qm(NULL), ind(dbIndex) {
00031 }
00032
00034 SimpleKLQueryModel(Index &dbIndex) : ArrayQueryRep(dbIndex.termCountUnique()+1), qm(NULL), ind(dbIndex) {
00035 startIteration();
00036 while (hasMore()) {
00037 QueryTerm *qt = nextTerm();
00038 setCount(qt->id(), 0);
00039 delete qt;
00040 }
00041
00042 }
00043
00044
00045 virtual ~SimpleKLQueryModel(){ if (qm) delete qm;}
00046
00047
00049
00056 virtual void interpolateWith(UnigramLM &qModel, double origModCoeff, int howManyWord, double prSumThresh=1, double prThresh=0);
00057 virtual double scoreConstant() {
00058 return totalCount();
00059 }
00060
00062 virtual void load(istream &is);
00063
00065 virtual void save(ostream &os);
00066
00067 private:
00068 IndexedRealVector *qm;
00069 Index &ind;
00070 };
00071
00072
00073
00075
00090 class SimpleKLScoreFunc : public ScoreFunction {
00091 public:
00092
00093 virtual double matchedTermWeight(QueryTerm *qTerm, TextQueryRep *qRep, DocInfo *info, DocumentRep *dRep) {
00094 return (qTerm->weight()*log(dRep->termWeight(qTerm->id(),info)));
00095 }
00096
00098 virtual double adjustedScore(double origScore, TextQueryRep *qRep, DocumentRep *dRep) {
00099 SimpleKLQueryModel *qm = (SimpleKLQueryModel *)qRep;
00100
00101 SimpleKLDocModel *dm = (SimpleKLDocModel *)dRep;
00102
00103 return (origScore+log(dm->scoreConstant())*qm->scoreConstant());
00104 }
00105 };
00106
00107
00109
00110
00111 class SimpleKLRetMethod : public TextQueryRetMethod {
00112 public:
00113
00115 SimpleKLRetMethod(Index &dbIndex, const char *supportFileName, ScoreAccumulator &accumulator);
00116 virtual ~SimpleKLRetMethod();
00117
00118 virtual TextQueryRep *computeTextQueryRep(TextQuery &qry) {
00119 return (new SimpleKLQueryModel(qry, ind));
00120 }
00121
00122 virtual DocumentRep *computeDocRep(int docID);
00123
00124
00125 virtual ScoreFunction *scoreFunc() {
00126 return (scFunc);
00127 }
00128
00129
00130 virtual void updateTextQuery(TextQueryRep &origRep, DocIDSet &relDocs);
00131
00132 void setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam);
00133 void setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam);
00134
00135 protected:
00136
00138 double *mcNorm;
00139
00141 double *docProbMass;
00143 int *uniqueTermCount;
00145 UnigramLM *collectLM;
00147 DocUnigramCounter *collectLMCounter;
00149 SimpleKLScoreFunc *scFunc;
00150
00152
00153
00154 void computeMixtureFBModel(SimpleKLQueryModel &origRep, DocIDSet & relDocs);
00156 void computeDivMinFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs);
00158 void computeMarkovChainFBModel(SimpleKLQueryModel &origRep, DocIDSet &relDocs) ;
00160
00161 SimpleKLParameter::DocSmoothParam docParam;
00162 SimpleKLParameter::QueryModelParam qryParam;
00163
00164 };
00165
00166
00167 inline void SimpleKLRetMethod::setDocSmoothParam(SimpleKLParameter::DocSmoothParam &docSmthParam)
00168 {
00169 docParam = docSmthParam;
00170 }
00171
00172 inline void SimpleKLRetMethod::setQueryModelParam(SimpleKLParameter::QueryModelParam &queryModParam)
00173 {
00174 qryParam = queryModParam;
00175 }
00176
00177 #endif
00178
00179
00180
00181
00182
00183
00184
00185