00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 #ifndef _TFIDFRETMETHOD_HPP
00015 #define _TFIDFRETMETHOD_HPP
00016 
00017 #include "TextQueryRetMethod.hpp"
00018 
00020 namespace TFIDFParameter {
00021 
00022   enum TFMethod  {RAWTF=0, LOGTF=1, BM25=2};
00023   struct WeightParam {
00024     TFMethod tf;
00025     double bm25K1;
00026     double bm25B;
00027   };
00028   struct FeedbackParam {
00029     int howManyTerms;
00030     double posCoeff;
00031   };    
00032   static double defaultDocK1=1;
00033   static double defaultDocB = 0.5;
00034   static double defaultQryK1 = 1;
00035   static double defaultQryB = 0;
00036   static int defaultHowManyTerms = 50;
00037   static double defaultPosCoeff = 0.5;
00038 };
00039 
00041 class TFIDFQueryRep : public ArrayQueryRep {
00042 public:
00043   TFIDFQueryRep(TextQuery &qry, Index &dbIndex, double *idfValue, TFIDFParameter::WeightParam ¶m);
00044 
00045   virtual ~TFIDFQueryRep() {}
00046 
00047   double queryTFWeight(const double rawTF);
00048 protected:
00049   TFIDFParameter::WeightParam &prm;
00050   double *idf;
00051   Index &ind;
00052 };
00053 
00055 class TFIDFDocRep : public DocumentRep {
00056 public:
00057   TFIDFDocRep(int docID, Index &dbIndex, double *idfValue,
00058               TFIDFParameter::WeightParam ¶m) : 
00059     DocumentRep(docID), ind(dbIndex), prm(param), idf(idfValue) {
00060   }
00061   virtual ~TFIDFDocRep() { }
00062   virtual double termWeight(int termID, DocInfo *info) { 
00063     return (idf[termID]*docTFWeight(info->termCount())); 
00064   }
00065   virtual double scoreConstant() { return 0;}
00066 
00067   double docTFWeight(const double rawTF);
00068 private:
00069 
00070   Index & ind;
00071   TFIDFParameter::WeightParam &prm;
00072   double *idf;
00073 };
00074 
00075 
00077 
00078 class TFIDFRetMethod : public TextQueryRetMethod {
00079 public:
00080 
00081   TFIDFRetMethod(Index &dbIndex, ScoreAccumulator &accumulator);
00082   virtual ~TFIDFRetMethod() {delete [] idfV; delete scFunc;}
00083 
00084   virtual TextQueryRep *computeTextQueryRep(TextQuery &qry) {
00085     return (new TFIDFQueryRep(qry, ind, idfV, qryTFParam));
00086   }
00087 
00088   virtual DocumentRep *computeDocRep(int docID) { 
00089     return (new TFIDFDocRep(docID, ind, idfV, docTFParam));
00090   }
00091   virtual ScoreFunction *scoreFunc() {
00092     return (scFunc);
00093   }
00094 
00095 
00096   virtual void updateTextQuery(TextQueryRep &qryRep, DocIDSet &relDocs);
00097 
00098   void setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam);
00099 
00100   void setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam);
00101 
00102   void setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam);
00103 
00104   static double BM25TF(const double rawTF, const double k1, const double b, 
00105                        const double docLen, const double avgDocLen);
00106 
00107 protected:
00108   double *idfV;
00109   ScoreFunction *scFunc;
00110   
00112 
00113 
00114   TFIDFParameter::WeightParam qryTFParam;
00115   TFIDFParameter::WeightParam docTFParam;
00116   TFIDFParameter::FeedbackParam fbParam;
00117 
00119 
00120 };
00121 
00122 
00123 inline void TFIDFRetMethod::setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam)
00124 {
00125   docTFParam = docTFWeightParam;
00126 }
00127 
00128 
00129 
00130 inline void TFIDFRetMethod::setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam)
00131 {
00132   qryTFParam = queryTFWeightParam;
00133 }
00134 
00135 
00136 inline void TFIDFRetMethod::setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam)
00137 {
00138   fbParam = feedbackParam;
00139 }
00140 
00141 
00142 
00143 inline double TFIDFRetMethod ::BM25TF(const double rawTF, const double k1, const double b, 
00144                      const double docLen, const  double avgDocLen)
00145 {
00146   double x= rawTF+k1*(1-b+b*docLen/avgDocLen);
00147   return (k1*rawTF/x);
00148 }
00149 
00150 
00151 
00152 #endif 
00153 
00154 
00155 
00156 
00157 
00158 
00159