Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

StructQueryRep.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 /*
00012   author: fff
00013 */
00014 
00015 #ifndef _STRUCTQUERYREP_HPP
00016 #define _STRUCTQUERYREP_HPP
00017 
00018 #include "RetrievalMethod.hpp"
00019 #include "Document.hpp"
00020 #include "DocumentRep.hpp"
00021 #include "StructQryDocRep.hpp"
00022 #include "IndexedReal.hpp"
00023 #include "Index.hpp"
00024 #include "Counter.hpp"
00025 #include "WeightedIDSet.hpp"
00026 #include "FreqVector.hpp"
00027 #include "ScoreAccumulator.hpp"
00028 
00029 //------------------------------------------------------------
00030 //      Abstract Interface for Structured Query 
00031 //------------------------------------------------------------
00032 
00034 class StructQuery : public Query {
00035 public:
00036   StructQuery(Document &doc) : d(doc) {}
00037   virtual ~StructQuery() {}
00038 
00039   virtual char *id() { return d.getID();}
00040   virtual void startTermIteration() { d.startTermIteration();}
00041   virtual bool hasMore() { return d.hasMore();}
00042   virtual TokenTerm *nextTerm() { return d.nextTerm();}
00043 protected:
00044   Document &d;
00045 };
00046 
00047 
00048 
00049 //------------------------------------------------------------
00050 //      Abstract Interface for StructQuery Representation 
00051 //------------------------------------------------------------
00052 
00053 // forward declaration
00054 class QueryNode;
00055 
00059 class QnList {
00060 public:
00061   QnList() {}
00062   virtual ~QnList();
00064   virtual void startIteration() {  i=0; }
00066   virtual bool hasMore() {  return (i < qnList.size()); }
00068   virtual QueryNode * nextNode(){QueryNode *qn=qnList[i++]; return qn; }
00070   virtual QueryNode * getNode(int i){QueryNode *qn=qnList[i]; return qn; }
00072   virtual int size(){return qnList.size();}
00074   virtual void push_back(QueryNode *qn){qnList.push_back(qn);}
00075 
00076 protected:
00078   int i;
00080   vector<QueryNode *> qnList;
00081 };
00082 
00086 class ProxInfo {
00087 public: 
00088   ProxInfo() : id(0),count(0),size(0),nextPos(0),posList(NULL),next(NULL) {}
00089   ~ProxInfo() {
00090     delete [](posList);
00091   }
00093   int id;
00095   int count;
00097   int size;
00100   int nextPos;
00102   int *posList;
00104   ProxInfo *next;
00105 };
00106 
00111 class QueryNode {
00112 public:
00114   QueryNode(int id, double weight) : 
00115     w(weight),it(id),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00116     proxList(NULL),dList(NULL) {}
00118   QueryNode() : w(1),it(0),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00119                 proxList(NULL),dList(NULL){}
00121   QueryNode(double weight) : w(weight),it(0),ch(NULL),entries(0),
00122                              nextDoc(0),dCnt(0),proxList(NULL),dList(NULL) {}
00124   ~QueryNode() {
00125     delete[](dList);
00126     while(proxList) {
00127       ProxInfo *prx=proxList->next;
00128       delete (proxList);
00129       proxList = prx;
00130     }
00131     delete(ch);
00132   }
00134   virtual QnList *children() { return ch;}
00136   virtual int id() { return it;}
00138   virtual double eval(DocumentRep *dRep) = 0;
00140   virtual double weight() { return w; }
00141   // update weight
00142   virtual void setWeight(double wt) { w=wt; }
00146   virtual double proximityScore(DocumentRep *dR, double defaultScore) {
00147     StructQryDocRep *dRep = (StructQryDocRep *)dR;
00148     int tf=0;
00149     double score;
00150     ProxInfo *prx;
00151     InvFPIndex &ind = dRep->ind;
00152     if(dRep->did < nextDoc) {
00153       return defaultScore;
00154     }
00155     while(dRep->did > nextDoc) {
00156       prx=proxList->next;
00157       delete (proxList);
00158       proxList = prx;
00159       if(!proxList)
00160         nextDoc = ind.docCount();
00161       else
00162         nextDoc = proxList->id;
00163     }
00164     if(dRep->did == nextDoc && proxList) {
00165       double idf = dRep->computeIdfScore(dCnt);
00166       if(ind.docLength(dRep->did) == dRep->size) {
00167         // compute proximity score for whole doc
00168         tf = proxList->count;
00169       } else {
00170         // compute proximity score for a passage
00171         for(int k=0; k<proxList->count; k++)
00172           if(proxList->posList[k]> dRep->start && 
00173              proxList->posList[k]<=dRep->end)
00174             tf++;
00175       }
00176       score = dRep->beliefScore(tf, idf);
00177       prx = proxList->next;
00178       delete (proxList);
00179       proxList = prx;
00180       if(!proxList)
00181         nextDoc = ind.docCount();
00182       else
00183         nextDoc = proxList->id;
00184     } else
00185       score = defaultScore;
00186     return score;
00187   }
00189   int dCnt;
00191   bool *dList;
00193   QnList *ch;
00195   ProxInfo * proxList;
00197   int entries;
00199   int nextDoc;
00200 
00201 protected:
00203   int it;
00205   double w; 
00206 };
00207 
00212 class StructQueryRep : public QueryRep {
00213 public:
00215   StructQueryRep(StructQuery &qry, InvFPIndex &dbIndex, double dbelief=0);
00216 
00217   virtual ~StructQueryRep() {
00218     delete(TopNode);
00219   }
00221   virtual QnList * getChildren(StructQuery &qry);
00223   virtual QnList * getWeightedChildren(StructQuery &qry);
00225   virtual QnList * getProxChildren(StructQuery &qry);
00227   virtual QueryNode * getQryNode(StructQuery &qry, TokenTerm *tok, double w);
00229   virtual QueryNode * getProxQryNode(StructQuery &qry, TokenTerm *tok);
00231   virtual QueryNode * topnode() {return TopNode;}
00233   virtual void setTopnode(QueryNode *qn) {TopNode=qn;}
00235   virtual void copyDocList(DocInfoList *dl, QueryNode *qn);
00237   virtual void unionDocList(QueryNode *qn);
00239   virtual void intersectDocList(QueryNode *qn);
00241   virtual void termOffsetList(QueryNode *qn, int did);
00243   virtual bool foundOrderedProx(int bpos, int wsize, QnList *cl, int ith);
00245   virtual void orderedProxList(QueryNode *qn);
00247   virtual void unorderedProxList(QueryNode *qn);
00249   virtual bool findUnorderedWin(QueryNode *cqn, QnList *cl, int winSize);
00251   virtual void synonymProxList(QueryNode *qn);
00252 
00253 protected:
00255   QueryNode *TopNode;
00257   QueryNode *qStack[100];
00259   int TopqStack;
00261   double dw;
00263   InvFPIndex &ind;
00264 };
00265 
00268 class SumQnode : public QueryNode {
00269 public:
00270   SumQnode(double wt) {}
00271   virtual ~SumQnode() {}
00274   virtual double eval(DocumentRep *dRep) {
00275     double count=0;
00276     double sum=0;
00277     QueryNode *qn;
00278     ch->startIteration();
00279     while(ch->hasMore()) {
00280       qn =ch->nextNode();
00281       count++;
00282       sum +=(double) qn->eval(dRep);
00283     }
00284     return sum/count;
00285   }
00286 };
00287 
00290 class WsumQnode : public QueryNode {
00291 public:
00292   WsumQnode(double w) {}
00293   virtual ~WsumQnode() {}
00296   virtual double eval(DocumentRep *dRep) {
00297     double sum=0;
00298     double total_wt=0;
00299     QueryNode *qn;
00300     double wt;
00301     ch->startIteration();
00302     while(ch->hasMore()) {
00303       qn =ch->nextNode();
00304       wt = qn->weight();
00305       total_wt +=wt;
00306       sum += wt * (double) qn->eval(dRep);
00307     }
00308     if(total_wt>0) // normalized by the sum of the weights
00309       sum /= total_wt;
00310     // the belief is scaled by the weight 'w' of itself.
00311     return sum;
00312   }
00313 };
00314 
00315 
00318 class AndQnode : public QueryNode {
00319 public:
00320   AndQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00321   virtual ~AndQnode() {}
00325   virtual double eval(DocumentRep *dRep) {
00326     double prod=1;
00327     QueryNode *qn;
00328     double wt;
00329     ch->startIteration();
00330     while(ch->hasMore()) {
00331       qn =ch->nextNode();
00332       wt = (double) qn->eval(dRep);
00333       if(wt > dw)
00334         prod *= wt;
00335       else
00336         prod *= dw;
00337     }
00338     return prod;
00339   }
00340 protected:
00342   double dw;
00343 };
00344 
00348 class OrQnode : public QueryNode {
00349 public:
00350   OrQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00351   virtual ~OrQnode() {}
00354   virtual double eval(DocumentRep *dRep) {
00355     double prod=1.0;
00356     QueryNode *qn;
00357     double wt;
00358     ch->startIteration();
00359     while(ch->hasMore()) {
00360       qn =ch->nextNode();
00361       wt = (double) qn->eval(dRep);
00362       if(wt > dw)
00363         prod *= (1.0-wt);
00364     }
00365     return (1.0-prod);
00366   }
00367 protected:
00369   double dw;
00370 };
00371 
00374 class NotQnode : public QueryNode {
00375 public:
00376   NotQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00377   virtual ~NotQnode() {}
00379   virtual double eval(DocumentRep *dRep) {
00380     // inverting the belief in the only child node
00381     QueryNode *qn;
00382     ch->startIteration();
00383     qn =ch->nextNode();
00384     return (1.0-(double) qn->eval(dRep));
00385   }
00386 protected:
00388   double dw;
00389 };
00390 
00393 class MaxQnode : public QueryNode {
00394 public:
00395   MaxQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00396   virtual ~MaxQnode() {}
00399   virtual double eval(DocumentRep *dRep) {
00400     double mx=dw;
00401     QueryNode *qn;
00402     double wt;
00403     ch->startIteration();
00404     while(ch->hasMore()) {
00405       qn =ch->nextNode();
00406       wt = (double) qn->eval(dRep);
00407       if(wt > mx) 
00408         mx= wt;
00409     }
00410     return mx;
00411   }
00412 protected:
00414   double dw;
00415 
00416 };
00417 
00421 class BandQnode : public QueryNode {
00422 public:
00423   BandQnode(double dbelief, double w) : dw(dbelief), QueryNode(w) {}
00424   virtual ~BandQnode() {}
00428   virtual double eval(DocumentRep *dRep) {
00429     double prod=1;
00430     QueryNode *qn;
00431     double wt;
00432     ch->startIteration();
00433     while(ch->hasMore()) {
00434       qn =ch->nextNode();
00435       wt = (double) qn->eval(dRep);
00436       if(wt > dw) 
00437         prod *= wt;
00438       else 
00439         return 0;
00440     }
00441     return prod;
00442   }
00443 protected:
00445   double dw;
00446 };
00447 
00452 class BandNotQnode : public QueryNode {
00453 public:
00454   BandNotQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00455   virtual ~BandNotQnode() {}
00459   virtual double eval(DocumentRep *dRep) {
00460     double child1Wt;
00461     double child2Wt;
00462     // boolean_and_not consider only two children
00463     QueryNode *qn;
00464     ch->startIteration();
00465     qn =ch->nextNode();
00466     child1Wt = (double) qn->eval(dRep);
00467     qn =ch->nextNode();
00468     child2Wt = (double) qn->eval(dRep);
00469     if(child2Wt > dw)
00470       return 0;
00471     else
00472       return (child1Wt * (1.0-child2Wt));
00473   }
00474 protected:
00475   double dw;
00476 
00477 };
00478 
00479 
00484 class FiltRejQnode : public QueryNode {
00485 public:
00486   FiltRejQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00487   virtual ~FiltRejQnode() {}
00492   virtual double eval(DocumentRep *dRep) {
00493     double child1Wt;
00494     double child2Wt;
00495     // filter_reject consider only two children
00496     QueryNode *qn;
00497     ch->startIteration();
00498     qn =ch->nextNode();
00499     child1Wt = (double) qn->eval(dRep);
00500     qn =ch->nextNode();
00501     child2Wt = (double) qn->eval(dRep);
00502     if(child1Wt > dw && child2Wt <= dw)
00503       return child1Wt;
00504     else
00505       return dw;
00506   }
00507 protected:
00509   double dw;
00510 
00511 };
00512 
00513 
00518 class FiltReqQnode : public QueryNode {
00519 public:
00520   FiltReqQnode(double dbelief, double wt) : dw(dbelief), QueryNode(wt) {}
00521   virtual ~FiltReqQnode() {}
00525   virtual double eval(DocumentRep *dRep) {
00526     double child1Wt;
00527     double child2Wt;
00528     // filter_require consider only two children
00529     QueryNode *qn;
00530     ch->startIteration();
00531     qn =ch->nextNode();
00532     child1Wt = (double) qn->eval(dRep);
00533     qn =ch->nextNode();
00534     child2Wt = (double) qn->eval(dRep);
00535     if(child1Wt > dw && child2Wt > dw)
00536       return child1Wt;
00537     else
00538       return dw;
00539   }
00540 protected:
00542   double dw;
00543 
00544 };
00545 
00547 class TermQnode : public QueryNode {
00548 public:
00549   TermQnode(int id, double weight) : QueryNode(id, weight) {}
00550   virtual ~TermQnode() {}
00552   virtual double eval(DocumentRep *dRep) { 
00553     return(dRep->termWeight(it, NULL));
00554   }
00555 };
00556 
00559 class OdnQNode : public QueryNode {
00560 public:
00561   OdnQNode(int size, double w, double d): winSize(size), dw(d), 
00562                                           QueryNode(w) {}
00563   virtual ~OdnQNode() {}
00565   virtual double eval(DocumentRep *dRep) {
00566     return proximityScore(dRep, dw);
00567   }
00569   int winSize;
00570 protected:
00572   double dw;
00573 };
00574 
00577 class UwnQNode : public QueryNode {
00578 public:
00579   UwnQNode(int size, double w, double d): winSize(size), dw(d), 
00580                                           QueryNode(w) {}
00581   virtual ~UwnQNode() {}
00583   virtual double eval(DocumentRep *dRep) {
00584     return proximityScore(dRep, dw);
00585   }
00587   int winSize;
00588 protected:
00590   double dw;
00591 };
00592 
00593 
00597 class PassageQNode : public QueryNode {
00598 public:
00599   PassageQNode(int size, double w) : winSize(size), QueryNode(w) {}
00600   virtual ~PassageQNode() {}
00605   virtual double eval(DocumentRep *dR) {
00606     StructQryDocRep *dRep= (StructQryDocRep *)dR;
00607     int dl = dRep->docEnd;
00608     dRep->increment = winSize/2;
00609     dRep->startPassageIteration();
00610     while(dRep->hasMorePassage()) {
00611       StructQryDocRep *psgRep= 
00612         new StructQryDocRep(dRep->did,dRep->ind,dRep->idf,
00613                             dRep->start,dRep->end);
00614       double score=0;
00615       QueryNode *child;
00616       ch->startIteration();
00617       while(ch->hasMore()) {
00618         child =ch->nextNode();
00619         score +=(double) child->eval(psgRep);
00620       }
00621       delete psgRep;
00622       if(score > dRep->maxScore) {
00623         dRep->maxScore=score;
00624         dRep->offset=dRep->start;
00625       }
00626       dRep->nextPassage();
00627     }
00628     return dRep->maxScore/(double)winSize;
00629   }
00630 protected:
00632   int winSize;
00633 };
00634 
00635 
00638 class SynQNode : public QueryNode {
00639 public:
00640   SynQNode(double w, double d) : dw(d), QueryNode(w) {}
00641   virtual ~SynQNode() {}
00643   virtual double eval(DocumentRep *dRep) {
00644     return proximityScore(dRep, dw);
00645   }
00646 protected:
00648   double dw;
00649 };
00650 
00651 #endif /* _STRUCTQUERYREP_HPP */
00652 

Generated on Mon Sep 30 14:13:23 2002 for LEMUR by doxygen1.2.18