00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _CORIRETMETHOD_HPP
00014 #define _CORIRETMETHOD_HPP
00015
00016
00017 #include "UnigramLM.hpp"
00018 #include "SimpleKLDocModel.hpp"
00019 #include "TextQueryRetMethod.hpp"
00020 #include "Param.hpp"
00021 #include <math.h>
00022
00023
00024
00025 class CORIQueryRep : public ArrayQueryRep {
00026 public:
00027 CORIQueryRep(TextQuery & qry, Index & dbIndex);
00028 virtual ~CORIQueryRep() {}
00029
00030 protected:
00031 Index & ind;
00032 };
00033
00034 class CORIDocRep : public DocumentRep {
00035 public:
00036 CORIDocRep(int docID, Index & dbIndex, double * cwRatio,
00037 double TFfact = 150, double TFbase = 50,
00038 SimpleKLDocModel * smoother = NULL,
00039 UnigramLM * collectLM = NULL);
00040 virtual ~CORIDocRep() { }
00041 virtual double termWeight(int termID, DocInfo * info);
00042
00043 virtual double scoreConstant() { return 0; }
00044
00045 private:
00046
00047 Index & ind;
00048
00049 int * cwCounts;
00050
00051 SimpleKLDocModel * dfSmooth;
00052 UnigramLM * collLM;
00053
00054 double c05;
00055 double idiv;
00056 double tnorm;
00057 };
00058
00059
00060
00061 class CORIRetMethod : public TextQueryRetMethod {
00062 public:
00063
00064 CORIRetMethod(Index & dbIndex, ScoreAccumulator &accumulator,
00065 String cwName,
00066 SimpleKLDocModel ** smoothers = NULL,
00067 UnigramLM * collectLM = NULL);
00068 ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00069
00070 virtual TextQueryRep * computeTextQueryRep(TextQuery & qry) {
00071 return new CORIQueryRep(qry, ind);
00072 }
00073 virtual DocumentRep * computeDocRep(int docID) {
00074 if (dfSmooth != NULL) {
00075 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00076 }
00077 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00078 }
00079 virtual ScoreFunction * scoreFunc() {
00080 return scFunc;
00081 }
00082
00083 virtual void scoreCollection(QueryRep &qry, IndexedRealVector &results);
00084
00085 virtual void updateTextQuery(TextQueryRep &qryRep, DocIDSet &relDocs) { }
00086
00087 void setTFFactor(double tf) { tffactor = tf; }
00088 void setTFBaseline(double tf) { tfbaseline = tf; }
00089
00090 protected:
00091
00092 ScoreFunction * scFunc;
00093 SimpleKLDocModel ** dfSmooth;
00094 UnigramLM * collLM;
00095
00096 double * cwRatio;
00097 double tffactor;
00098 double tfbaseline;
00099
00100 };
00101
00102 class CORIScoreFunc : public ScoreFunction {
00103 public:
00104 CORIScoreFunc(Index & index) : ind(index) {
00105 double dc = ind.docCount();
00106 c05 = dc + 0.5;
00107 idiv = log(dc + 1) / 0.6;
00108
00109 }
00110
00111 virtual double adjustedScore(double origScore, TextQueryRep * qRep,
00112 DocumentRep * dRep) {
00113 if (qr != qRep) {
00114 qr = qRep;
00115
00116 qRep->startIteration();
00117 rmax = 0;
00118 double qw = 0;
00119 while (qRep->hasMore()) {
00120 int qtid = qRep->nextTerm()->id();
00121
00122 rmax += (log(c05 / ind.docCount(qtid)) / idiv);
00123 }
00124 }
00125 return (origScore / rmax);
00126 }
00127
00128 private:
00129 Index & ind;
00130
00131 TextQueryRep * qr;
00132 double rmax;
00133
00134 double c05;
00135 double idiv;
00136 };
00137
00138
00139 #endif