00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00017 class StructQryDocRep : public DocumentRep {
00018 public:
00020 StructQryDocRep(int docID, double *idfValue, int docLength, int docCount,
00021 double docLengthAverage, double db) :
00022 DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00023 docEnd(docLength), size(docLength), start(0),
00024 dla(docLengthAverage), defaultBelief(db) {
00025 oneMinusDB = 1 - defaultBelief;
00026 denom = log(docCount + 1.0);
00027 numer = docCount + 0.5;
00028 }
00030 virtual ~StructQryDocRep() {}
00032 virtual double termWeight(int termID, DocInfo *info) { return 0;}
00034 virtual double termWeight(int termID, double dtf, int df) {
00035 if (idf)
00036 return beliefScore(dtf, idf[termID]);
00037 else
00038 return beliefScore(dtf, computeIdfScore(df));
00039 }
00041 virtual double scoreConstant() { return 0;}
00042
00044 void startPassageIteration(int sz) {
00045 size = sz;
00046 increment = size/2;
00047 start = 0;
00048 end = size < docEnd ? size : docEnd;
00049 }
00051 bool hasMorePassage() {
00052
00053 return(start < docEnd);
00054 }
00055
00057 void nextPassage() {
00058 if(start + increment < docEnd)
00059 start += increment;
00060 else
00061 start = docEnd;
00062 end = (start + size) < docEnd ? (start + size) : docEnd;
00063 }
00064
00067 double computeIdfScore(double df) {
00068 return log(numer/df)/denom;
00069 }
00070
00072 double beliefScore(double df, double idf) {
00073 return (defaultBelief + oneMinusDB
00074 * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00075 }
00076
00078 int did;
00080 int start;
00082 int end;
00083
00084 private:
00086 double *idf;
00088 int size;
00090 int increment;
00092 int docEnd;
00094 double dla;
00096 double numer, denom;
00098 double defaultBelief, oneMinusDB;
00099 };
00100 #endif