00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00015 #include <cmath>
00016
00019 class StructQryDocRep : public DocumentRep {
00020 public:
00022 StructQryDocRep(int docID, double *idfValue, int docLength, int docCount,
00023 double docLengthAverage, double db) :
00024 DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00025 docEnd(docLength), size(docLength), start(0),
00026 dla(docLengthAverage), defaultBelief(db) {
00027 oneMinusDB = 1 - defaultBelief;
00028 denom = log(docCount + 1.0);
00029 numer = docCount + 0.5;
00030 }
00032 virtual ~StructQryDocRep() {}
00034 virtual double termWeight(int termID, DocInfo *info) { return 0;}
00036 virtual double termWeight(int termID, double dtf, int df) {
00037 if (idf)
00038 return beliefScore(dtf, idf[termID]);
00039 else
00040 return beliefScore(dtf, computeIdfScore(df));
00041 }
00043 virtual double scoreConstant() { return 0;}
00044
00046 void startPassageIteration(int sz) {
00047 size = sz;
00048 increment = size/2;
00049 start = 0;
00050 end = size < docEnd ? size : docEnd;
00051 }
00053 bool hasMorePassage() {
00054
00055 return(start < docEnd);
00056 }
00057
00059 void nextPassage() {
00060 if(start + increment < docEnd)
00061 start += increment;
00062 else
00063 start = docEnd;
00064 end = (start + size) < docEnd ? (start + size) : docEnd;
00065 }
00066
00069 double computeIdfScore(double df) {
00070 return log(numer/df)/denom;
00071 }
00072
00074 double beliefScore(double df, double idf) {
00075 return (defaultBelief + oneMinusDB
00076 * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00077 }
00078
00080 int did;
00082 int start;
00084 int end;
00085
00086 private:
00088 double *idf;
00090 int size;
00092 int increment;
00094 int docEnd;
00096 double dla;
00098 double numer, denom;
00100 double defaultBelief, oneMinusDB;
00101 };
00102 #endif