00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014
00015 #include "DocumentRep.hpp"
00016 #include "InvFPTermList.hpp"
00017 #include "InvFPIndex.hpp"
00018 #include "Counter.hpp"
00019
00022 class DocTermsCounter : public ArrayCounter <int> {
00023 public:
00025 DocTermsCounter(int docID, InvFPIndex &indx) :
00026 ind(indx), ArrayCounter<int>(indx.termCountUnique()+1)
00027 {
00028 TermInfoList *tList = indx.termInfoList(docID);
00029 TermInfo *info;
00030 tList->startIteration();
00031 while (tList->hasMore()) {
00032 info = tList->nextEntry();
00033 incCount(info->id(), info->count());
00034 }
00035 delete tList;
00036 }
00038 DocTermsCounter(int docID, InvFPIndex &indx, int start, int end) :
00039 ind(indx), ArrayCounter<int>(indx.termCountUnique()+1)
00040 {
00041 TermInfoList *dTerms=indx.termInfoListSeq(docID);
00042 InvFPTerm *term;
00043 dTerms->startIteration();
00044 while (dTerms->hasMore()) {
00045 term = (InvFPTerm *) dTerms->nextEntry();
00046 if(term->position()>start) {
00047 if(term->position() > end)
00048 break;
00049 incCount(term->id(), 1);
00050 }
00051 }
00052 delete dTerms;
00053 }
00054
00055 virtual ~DocTermsCounter() {}
00056
00057 protected:
00058 InvFPIndex &ind;
00059 };
00060
00063 class StructQryDocRep : public DocumentRep {
00064 public:
00066 StructQryDocRep(int docID, InvFPIndex &dbIndex, double *idfValue) :
00067 DocumentRep(docID), did(docID), ind(dbIndex), idf(idfValue) {
00068 start=0;
00069 end=dbIndex.docLength(docID);
00070 size=end;
00071 docEnd=end;
00072 docTermFrq = new DocTermsCounter(docID, dbIndex);
00073 }
00074
00077 StructQryDocRep(int docID, InvFPIndex &dbIndex, double *idfValue, int bg, int nd) :
00078 DocumentRep(docID), did(docID), ind(dbIndex), idf(idfValue), start(bg), end(nd) {
00079 size=nd-bg+1;
00080 docEnd=dbIndex.docLength(docID);
00081 maxScore=0;
00082 offset=0;
00083 docTermFrq = new DocTermsCounter(docID, dbIndex, bg, nd);
00084 }
00085
00086 virtual ~StructQryDocRep() { delete docTermFrq; }
00088 virtual double termWeight(int termID, DocInfo *info);
00090 virtual double scoreConstant() { return 0;}
00091
00092 virtual void startPassageIteration();
00093 virtual bool hasMorePassage();
00095 virtual void nextPassage();
00098 virtual double computeIdfScore(double df);
00100 virtual double beliefScore(double df, double idf);
00102 virtual double passageLength() {return end-start;};
00104 int did;
00105 InvFPIndex & ind;
00106
00107 double *idf;
00109 int start;
00111 int end;
00113 int size;
00115 int increment;
00117 int docEnd;
00119 int offset;
00121 double maxScore;
00122 DocTermsCounter *docTermFrq;
00123 };
00124 #endif