00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _STRUCTQUERYREP_HPP
00016 #define _STRUCTQUERYREP_HPP
00017
00018 #include "RetrievalMethod.hpp"
00019 #include "Document.hpp"
00020 #include "DocumentRep.hpp"
00021 #include "StructQryDocRep.hpp"
00022 #include "IndexedReal.hpp"
00023 #include "Index.hpp"
00024 #include "Counter.hpp"
00025 #include "WeightedIDSet.hpp"
00026 #include "FreqVector.hpp"
00027 #include "ScoreAccumulator.hpp"
00028
00029
00030
00031
00032
00034 class StructQuery : public Query {
00035 public:
00036 StructQuery(Document &doc) : d(doc) {}
00037 virtual ~StructQuery() {}
00038
00039 virtual char *id() { return d.getID();}
00040 virtual void startTermIteration() { d.startTermIteration();}
00041 virtual bool hasMore() { return d.hasMore();}
00042 virtual TokenTerm *nextTerm() { return d.nextTerm();}
00043 protected:
00044 Document &d;
00045 };
00046
00047
00048
00049
00050
00051
00052
00053
00054 class QueryNode;
00055
00059 class QnList {
00060 public:
00061 QnList() {}
00062 virtual ~QnList();
00064 virtual void startIteration() { i=0; }
00066 virtual bool hasMore() { return (i < qnList.size()); }
00068 virtual QueryNode * nextNode(){QueryNode *qn=qnList[i++]; return qn; }
00070 virtual QueryNode * getNode(int i){QueryNode *qn=qnList[i]; return qn; }
00072 virtual int size(){return qnList.size();}
00074 virtual void push_back(QueryNode *qn){qnList.push_back(qn);}
00075
00076 protected:
00078 int i;
00080 vector<QueryNode *> qnList;
00081 };
00082
00086 class ProxInfo {
00087 public:
00088 ProxInfo() : id(0),count(0),size(0),nextPos(0),posList(NULL),next(NULL) {}
00089 ~ProxInfo() {
00090 delete [](posList);
00091 }
00093 int id;
00095 int count;
00097 int size;
00100 int nextPos;
00102 int *posList;
00104 ProxInfo *next;
00105 };
00106
00111 class QueryNode {
00112 public:
00114 QueryNode(int id, double weight) :
00115 w(weight),it(id),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00116 proxList(NULL),dList(NULL) {}
00118 QueryNode() : w(1),it(0),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00119 proxList(NULL),dList(NULL){}
00121 QueryNode(double weight) : w(weight),it(0),ch(NULL),entries(0),
00122 nextDoc(0),dCnt(0),proxList(NULL),dList(NULL) {}
00124 ~QueryNode() {
00125 delete[](dList);
00126 while(proxList) {
00127 ProxInfo *prx=proxList->next;
00128 delete (proxList);
00129 proxList = prx;
00130 }
00131 delete(ch);
00132 }
00134 virtual QnList *children() { return ch;}
00136 virtual int id() { return it;}
00138 virtual double eval(DocumentRep *dRep) = 0;
00140 virtual double weight() { return w; }
00141
00142 virtual void setWeight(double wt) { w=wt; }
00146 virtual double proximityScore(DocumentRep *dR, double defaultScore) {
00147 StructQryDocRep *dRep = (StructQryDocRep *)dR;
00148 int tf=0;
00149 double score;
00150 ProxInfo *prx;
00151 InvFPIndex &ind = dRep->ind;
00152 if(dRep->did < nextDoc) {
00153 return defaultScore;
00154 }
00155 while(dRep->did > nextDoc) {
00156 prx=proxList->next;
00157 delete (proxList);
00158 proxList = prx;
00159 if(!proxList)
00160 nextDoc = ind.docCount();
00161 else
00162 nextDoc = proxList->id;
00163 }
00164 if(dRep->did == nextDoc && proxList) {
00165 double idf = dRep->computeIdfScore(dCnt);
00166 if(ind.docLength(dRep->did) == dRep->size) {
00167
00168 tf = proxList->count;
00169 } else {
00170
00171 for(int k=0; k<proxList->count; k++)
00172 if(proxList->posList[k]> dRep->start &&
00173 proxList->posList[k]<=dRep->end)
00174 tf++;
00175 }
00176 score = dRep->beliefScore(tf, idf);
00177 prx = proxList->next;
00178 delete (proxList);
00179 proxList = prx;
00180 if(!proxList)
00181 nextDoc = ind.docCount();
00182 else
00183 nextDoc = proxList->id;
00184 } else
00185 score = defaultScore;
00186 return score;
00187 }
00189 int dCnt;
00191 bool *dList;
00193 QnList *ch;
00195 ProxInfo * proxList;
00197 int entries;
00199 int nextDoc;
00200
00201 protected:
00203 int it;
00205 double w;
00206 };
00207
00212 class StructQueryRep : public QueryRep {
00213 public:
00215 StructQueryRep(StructQuery &qry, InvFPIndex &dbIndex, double dbelief=0);
00216
00217 virtual ~StructQueryRep() {
00218 delete(TopNode);
00219 }
00221 virtual QnList * getChildren(StructQuery &qry);
00223 virtual QnList * getWeightedChildren(StructQuery &qry);
00225 virtual QnList * getProxChildren(StructQuery &qry);
00227 virtual QueryNode * getQryNode(StructQuery &qry, TokenTerm *tok, double w);
00229 virtual QueryNode * getProxQryNode(StructQuery &qry, TokenTerm *tok);
00231 virtual QueryNode * topnode() {return TopNode;}
00233 virtual void setTopnode(QueryNode *qn) {TopNode=qn;}
00235 virtual void copyDocList(DocInfoList *dl, QueryNode *qn);
00237 virtual void unionDocList(QueryNode *qn);
00239 virtual void intersectDocList(QueryNode *qn);
00241 virtual void termOffsetList(QueryNode *qn, int did);
00243 virtual bool foundOrderedProx(int bpos, int wsize, QnList *cl, int ith);
00245 virtual void orderedProxList(QueryNode *qn);
00247 virtual void unorderedProxList(QueryNode *qn);
00249 virtual bool findUnorderedWin(QueryNode *cqn, QnList *cl, int winSize);
00251 virtual void synonymProxList(QueryNode *qn);
00252
00253 protected:
00255 QueryNode *TopNode;
00257 QueryNode *qStack[100];
00259 int TopqStack;
00261 double dw;
00263 InvFPIndex &ind;
00264 };
00265
00268 class SumQnode : public QueryNode {
00269 public:
00270 SumQnode(double wt) {}
00271 virtual ~SumQnode() {}
00274 virtual double eval(DocumentRep *dRep) {
00275 double count=0;
00276 double sum=0;
00277 QueryNode *qn;
00278 ch->startIteration();
00279 while(ch->hasMore()) {
00280 qn =ch->nextNode();
00281 count++;
00282 sum +=(double) qn->eval(dRep);
00283 }
00284 return sum/count;
00285 }
00286 };
00287
00290 class WsumQnode : public QueryNode {
00291 public:
00292 WsumQnode(double w) {}
00293 virtual ~WsumQnode() {}
00296 virtual double eval(DocumentRep *dRep) {
00297 double sum=0;
00298 double total_wt=0;
00299 QueryNode *qn;
00300 double wt;
00301 ch->startIteration();
00302 while(ch->hasMore()) {
00303 qn =ch->nextNode();
00304 wt = qn->weight();
00305 total_wt +=wt;
00306 sum += wt * (double) qn->eval(dRep);
00307 }
00308 if(total_wt>0)
00309 sum /= total_wt;
00310
00311 return sum;
00312 }
00313 };
00314
00315
00318 class AndQnode : public QueryNode {
00319 public:
00320 AndQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00321 virtual ~AndQnode() {}
00325 virtual double eval(DocumentRep *dRep) {
00326 double prod=1;
00327 QueryNode *qn;
00328 double wt;
00329 ch->startIteration();
00330 while(ch->hasMore()) {
00331 qn =ch->nextNode();
00332 wt = (double) qn->eval(dRep);
00333 if(wt > dw)
00334 prod *= wt;
00335 else
00336 prod *= dw;
00337 }
00338 return prod;
00339 }
00340 protected:
00342 double dw;
00343 };
00344
00348 class OrQnode : public QueryNode {
00349 public:
00350 OrQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00351 virtual ~OrQnode() {}
00354 virtual double eval(DocumentRep *dRep) {
00355 double prod=1.0;
00356 QueryNode *qn;
00357 double wt;
00358 ch->startIteration();
00359 while(ch->hasMore()) {
00360 qn =ch->nextNode();
00361 wt = (double) qn->eval(dRep);
00362 if(wt > dw)
00363 prod *= (1.0-wt);
00364 }
00365 return (1.0-prod);
00366 }
00367 protected:
00369 double dw;
00370 };
00371
00374 class NotQnode : public QueryNode {
00375 public:
00376 NotQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00377 virtual ~NotQnode() {}
00379 virtual double eval(DocumentRep *dRep) {
00380
00381 QueryNode *qn;
00382 ch->startIteration();
00383 qn =ch->nextNode();
00384 return (1.0-(double) qn->eval(dRep));
00385 }
00386 protected:
00388 double dw;
00389 };
00390
00393 class MaxQnode : public QueryNode {
00394 public:
00395 MaxQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00396 virtual ~MaxQnode() {}
00399 virtual double eval(DocumentRep *dRep) {
00400 double mx=dw;
00401 QueryNode *qn;
00402 double wt;
00403 ch->startIteration();
00404 while(ch->hasMore()) {
00405 qn =ch->nextNode();
00406 wt = (double) qn->eval(dRep);
00407 if(wt > mx)
00408 mx= wt;
00409 }
00410 return mx;
00411 }
00412 protected:
00414 double dw;
00415
00416 };
00417
00421 class BandQnode : public QueryNode {
00422 public:
00423 BandQnode(double dbelief, double w) : dw(dbelief), QueryNode(w) {}
00424 virtual ~BandQnode() {}
00428 virtual double eval(DocumentRep *dRep) {
00429 double prod=1;
00430 QueryNode *qn;
00431 double wt;
00432 ch->startIteration();
00433 while(ch->hasMore()) {
00434 qn =ch->nextNode();
00435 wt = (double) qn->eval(dRep);
00436 if(wt > dw)
00437 prod *= wt;
00438 else
00439 return 0;
00440 }
00441 return prod;
00442 }
00443 protected:
00445 double dw;
00446 };
00447
00452 class BandNotQnode : public QueryNode {
00453 public:
00454 BandNotQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00455 virtual ~BandNotQnode() {}
00459 virtual double eval(DocumentRep *dRep) {
00460 double child1Wt;
00461 double child2Wt;
00462
00463 QueryNode *qn;
00464 ch->startIteration();
00465 qn =ch->nextNode();
00466 child1Wt = (double) qn->eval(dRep);
00467 qn =ch->nextNode();
00468 child2Wt = (double) qn->eval(dRep);
00469 if(child2Wt > dw)
00470 return 0;
00471 else
00472 return (child1Wt * (1.0-child2Wt));
00473 }
00474 protected:
00475 double dw;
00476
00477 };
00478
00479
00484 class FiltRejQnode : public QueryNode {
00485 public:
00486 FiltRejQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00487 virtual ~FiltRejQnode() {}
00492 virtual double eval(DocumentRep *dRep) {
00493 double child1Wt;
00494 double child2Wt;
00495
00496 QueryNode *qn;
00497 ch->startIteration();
00498 qn =ch->nextNode();
00499 child1Wt = (double) qn->eval(dRep);
00500 qn =ch->nextNode();
00501 child2Wt = (double) qn->eval(dRep);
00502 if(child1Wt > dw && child2Wt <= dw)
00503 return child1Wt;
00504 else
00505 return dw;
00506 }
00507 protected:
00509 double dw;
00510
00511 };
00512
00513
00518 class FiltReqQnode : public QueryNode {
00519 public:
00520 FiltReqQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00521 virtual ~FiltReqQnode() {}
00525 virtual double eval(DocumentRep *dRep) {
00526 double child1Wt;
00527 double child2Wt;
00528
00529 QueryNode *qn;
00530 ch->startIteration();
00531 qn =ch->nextNode();
00532 child1Wt = (double) qn->eval(dRep);
00533 qn =ch->nextNode();
00534 child2Wt = (double) qn->eval(dRep);
00535 if(child1Wt > dw && child2Wt > dw)
00536 return child1Wt;
00537 else
00538 return dw;
00539 }
00540 protected:
00542 double dw;
00543
00544 };
00545
00547 class TermQnode : public QueryNode {
00548 public:
00549 TermQnode(int id, double weight) : QueryNode(id, weight) {}
00550 virtual ~TermQnode() {}
00552 virtual double eval(DocumentRep *dRep) {
00553 return(dRep->termWeight(it, NULL));
00554 }
00555 };
00556
00559 class OdnQNode : public QueryNode {
00560 public:
00561 OdnQNode(int size, double w, double d): winSize(size), dw(d),
00562 QueryNode(w) {}
00563 virtual ~OdnQNode() {}
00565 virtual double eval(DocumentRep *dRep) {
00566 return proximityScore(dRep, dw);
00567 }
00569 int winSize;
00570 protected:
00572 double dw;
00573 };
00574
00577 class UwnQNode : public QueryNode {
00578 public:
00579 UwnQNode(int size, double w, double d): winSize(size), dw(d),
00580 QueryNode(w) {}
00581 virtual ~UwnQNode() {}
00583 virtual double eval(DocumentRep *dRep) {
00584 return proximityScore(dRep, dw);
00585 }
00587 int winSize;
00588 protected:
00590 double dw;
00591 };
00592
00593
00597 class PassageQNode : public QueryNode {
00598 public:
00599 PassageQNode(int size, double w) : winSize(size), QueryNode(w) {}
00600 virtual ~PassageQNode() {}
00605 virtual double eval(DocumentRep *dR) {
00606 StructQryDocRep *dRep= (StructQryDocRep *)dR;
00607 int dl = dRep->docEnd;
00608 dRep->increment = winSize/2;
00609 dRep->startPassageIteration();
00610 while(dRep->hasMorePassage()) {
00611 StructQryDocRep *psgRep=
00612 new StructQryDocRep(dRep->did,dRep->ind,dRep->idf,
00613 dRep->start,dRep->end);
00614 double score=0;
00615 QueryNode *child;
00616 ch->startIteration();
00617 while(ch->hasMore()) {
00618 child =ch->nextNode();
00619 score +=(double) child->eval(psgRep);
00620 }
00621 delete psgRep;
00622 if(score > dRep->maxScore) {
00623 dRep->maxScore=score;
00624 dRep->offset=dRep->start;
00625 }
00626 dRep->nextPassage();
00627 }
00628 return dRep->maxScore/(double)winSize;
00629 }
00630 protected:
00632 int winSize;
00633 };
00634
00635
00638 class SynQNode : public QueryNode {
00639 public:
00640 SynQNode(double w, double d) : dw(d), QueryNode(w) {}
00641 virtual ~SynQNode() {}
00643 virtual double eval(DocumentRep *dRep) {
00644 return proximityScore(dRep, dw);
00645 }
00646 protected:
00648 double dw;
00649 };
00650
00651 #endif
00652