Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

QueryNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002-2003 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 /*
00012   author: fff, dmf
00013   * 03/31/2003 -- Split QueryNode out of StructQueryRep, refactor some 
00014   * code out of rep into node classes (more OOP that way).
00015 */
00016 
00017 #ifndef _QUERYNODE_HPP
00018 #define _QUERYNODE_HPP
00019 #include "StructQryDocRep.hpp"
00020 #include "ProxInfo.hpp"
00021 
00022 // forward declaration
00023 class QueryNode;
00024 
00028 class QnList {
00029 public:
00030   QnList() {}
00031   ~QnList(); 
00033   void startIteration() {i = 0;}
00035   bool hasMore() {  return (i < qnList.size()); }
00037   QueryNode * nextNode(){QueryNode *qn = qnList[i++]; return qn; }
00039   QueryNode * getNode(int j){QueryNode *qn = qnList[j]; return qn; }
00041   int size(){return qnList.size();}
00043   void push_back(QueryNode *qn){qnList.push_back(qn);}
00045   QueryNode *popNode() { 
00046     QueryNode *qn = qnList[i]; 
00047     qnList[i++] = NULL;
00048     return qn;  
00049   }
00050 private:
00052   int i;
00054   vector<QueryNode *> qnList;
00055 };
00056 //------------------------------------------------------------
00057 //      Abstract Interface for Query Nodes 
00058 //------------------------------------------------------------
00059 
00060 
00065 class QueryNode {
00066 public:
00068   QueryNode(int id, double weight) : 
00069     w(weight),it(id),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00070     dList(NULL), proxList(NULL), dw(0) {}
00072   QueryNode() : w(1),it(0),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00073                 dList(NULL), proxList(NULL), dw(0){}
00075   QueryNode(double weight, double dWeight = 0) : w(weight), it(0), ch(NULL),
00076                                                  entries(0), nextDoc(0),
00077                                                  dCnt(0),proxList(NULL),
00078                                                  dList(NULL), 
00079                                                  dw(dWeight) {}
00081   virtual ~QueryNode() {
00082     delete[](dList);
00083     delete(proxList);
00084     delete(ch);
00085   }
00087   QnList *children() { return ch;}
00089   void setChildren(QnList *cl) {ch = cl;}
00091   int id() { return it;}
00093   double weight() { return w; }
00094   // update weight
00095   void setWeight(double wt) { w = wt; }
00096   // update entries count
00097   void setEntries(int cnt) { entries = cnt; }
00098 
00100   virtual void copyDocList(int len, int tf, DocInfoList *dl, int dc) {
00101   }
00103   virtual void updateDocList(int numDocs) = 0;
00105   virtual double eval(DocumentRep *dRep) = 0;
00106 
00108   bool isProxNode() { return proxList != NULL; }
00110   void startProxIteration() {if (proxList) proxList->startIteration();}
00112   bool hasMoreProx() {return proxList && proxList->hasMore();}
00114   bool nextProxItem() {return proxList && proxList->nextDoc();}
00117   bool nextProxItem(int did) {
00118     return proxList && proxList->nextDoc(did);
00119   }
00120 
00122   int dCnt;
00124   bool *dList;
00126   int nextDoc;
00128   ProxInfo * proxList;
00129 
00130 protected:  
00133   void transformPassageOps();
00135   QnList *ch;
00137   int entries;
00139   int it;
00141   double w; 
00143   double dw;
00144 
00146   void unionDocList(int numDocs);
00148   void intersectDocList(int numDocs);
00149 };
00150 
00152 class BeliefNode : public QueryNode {
00153 public:
00154   BeliefNode(double wt) : QueryNode(wt) { }
00155   BeliefNode(int id, double weight) : QueryNode(id, weight) { }  
00156   BeliefNode(double wt, double dbelief) : QueryNode(wt, dbelief) { }
00157   virtual ~BeliefNode() { }
00159   virtual void updateDocList(int numDocs) {
00160     unionDocList(numDocs);   
00161   }
00162 };
00163 
00164 
00166 class ProxNode : public QueryNode {
00167 public:
00168   ProxNode(double wt) : QueryNode(wt) {}
00169   
00170   ProxNode(int id, double weight) :  QueryNode(id, weight) { }  
00171   ProxNode(double w, double d) : QueryNode(w, d), winSize(0) {}
00172   ProxNode(int size, double w, double d) : QueryNode(w, d), winSize(size) {}
00173   virtual ~ProxNode() { }
00175   virtual double eval(DocumentRep *dRep) {
00176     return proximityScore(dRep, dw);
00177   }
00179   virtual void updateDocList(int numDocs) {
00180     intersectDocList(numDocs);   
00181   }
00182 
00183 protected:
00185   int winSize;
00186 private:
00190   double proximityScore(DocumentRep *dR, double defaultScore) {
00191     StructQryDocRep *dRep = (StructQryDocRep *)dR;
00192     int tf = 0;
00193     double score;
00194     if(dRep->did < nextDoc) {
00195       return defaultScore;
00196     }
00197     // Skip to next doc id that we can score (> or == to current).
00198     if (proxList->nextDoc(dRep->did)) {
00199       double idf = dRep->computeIdfScore(dCnt);
00200       // compute proximity score for whole doc
00201       tf = proxList->count();
00202       score = dRep->beliefScore(tf, idf);
00203       // advance the prox pointer.
00204       if (proxList->nextDoc()) {
00205         nextDoc = proxList->id();
00206       } else {
00207         // No more prox entries.
00208         nextDoc = INT_MAX;
00209       }
00210     } else {
00211       score = defaultScore;
00212     }
00213     return score;
00214   }
00215 };
00216 
00217 
00220 class SumQnode : public BeliefNode {
00221 public:
00222   SumQnode(double wt) : BeliefNode(wt){}
00223   virtual ~SumQnode() {}
00226   virtual double eval(DocumentRep *dRep) {
00227     double sum = 0;
00228     QueryNode *qn;
00229     ch->startIteration();
00230     while(ch->hasMore()) {
00231       qn = ch->nextNode();
00232       sum += qn->eval(dRep);
00233     }
00234     return sum/entries;
00235   }
00236 };
00237 
00240 class WsumQnode : public BeliefNode {
00241 public:
00242   WsumQnode(double w) : BeliefNode(w) {}
00243   virtual ~WsumQnode() {}
00246   virtual double eval(DocumentRep *dRep) {
00247     double sum = 0;
00248     double total_wt = 0;
00249     QueryNode *qn;
00250     double wt;
00251     ch->startIteration();
00252     while(ch->hasMore()) {
00253       qn = ch->nextNode();
00254       wt = qn->weight(); 
00255       total_wt += wt;
00256       sum += wt * qn->eval(dRep);
00257     }
00258     if(total_wt > 0) // normalized by the sum of the weights
00259       sum /= total_wt;
00260     // the belief is scaled by the weight 'w' of itself.
00261     return sum;
00262   }
00263 };
00264 
00265 
00268 class AndQnode : public BeliefNode {
00269 public:
00270   AndQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00271   virtual ~AndQnode() {}
00275   virtual double eval(DocumentRep *dRep) {
00276     double prod = 1;
00277     QueryNode *qn;
00278     double wt;
00279     ch->startIteration();
00280     while(ch->hasMore()) {
00281       qn = ch->nextNode();
00282       wt = qn->eval(dRep);
00283       if(wt > dw)
00284         prod *= wt;
00285       else
00286         prod *= dw;
00287     }
00288     return prod;
00289   }
00290 };
00291 
00295 class OrQnode : public BeliefNode {
00296 public:
00297   OrQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00298   virtual ~OrQnode() {}
00301   virtual double eval(DocumentRep *dRep) {
00302     double prod = 1.0;
00303     QueryNode *qn;
00304     double wt;
00305     ch->startIteration();
00306     while(ch->hasMore()) {
00307       qn = ch->nextNode();
00308       wt = qn->eval(dRep);
00309       if(wt > dw)
00310         prod *= (1.0 - wt);
00311     }
00312     return (1.0 - prod);
00313   }
00314 };
00315 
00318 class NotQnode : public BeliefNode {
00319 public:
00320   NotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00321   virtual ~NotQnode() {}
00323   virtual double eval(DocumentRep *dRep) {
00324     // inverting the belief in the only child node
00325     QueryNode *qn;
00326     ch->startIteration();
00327     qn = ch->nextNode();
00328     return (1.0 - qn->eval(dRep));
00329   }
00330 };
00331 
00334 class MaxQnode : public BeliefNode {
00335 public:
00336   MaxQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00337   virtual ~MaxQnode() {}
00340   virtual double eval(DocumentRep *dRep) {
00341     double mx = dw;
00342     QueryNode *qn;
00343     double wt;
00344     ch->startIteration();
00345     while(ch->hasMore()) {
00346       qn = ch->nextNode();
00347       wt = qn->eval(dRep);
00348       if(wt > mx) 
00349         mx = wt;
00350     }
00351     return mx;
00352   }
00353 };
00354 
00358 class BandQnode : public BeliefNode {
00359 public:
00360   BandQnode(double dbelief, double w) : BeliefNode(w, dbelief) {}
00361   virtual ~BandQnode() {}
00365   virtual double eval(DocumentRep *dRep) {
00366     double prod = 1.0;
00367     QueryNode *qn;
00368     double wt;
00369     StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00370     int did = myRep->did;
00371     ch->startIteration();
00372     while(ch->hasMore()) {
00373       qn = ch->nextNode();
00374       // advance child prox entry to this document
00375       if (qn->hasMoreProx() && qn->nextProxItem(did))
00376         qn->nextDoc = did; // update nextDoc for term node eval
00377       wt = qn->eval(dRep);
00378       if(wt > dw) 
00379         prod *= wt;
00380       else 
00381         return 0;
00382     }
00383     return prod;
00384   }
00385   virtual void updateDocList(int numDocs) {
00386     intersectDocList(numDocs); // different from superclass.
00387   }
00388 };
00389 
00394 class BandNotQnode : public BeliefNode {
00395 public:
00396   BandNotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00397   virtual ~BandNotQnode() {}
00401   virtual double eval(DocumentRep *dRep) {
00402     double child1Wt;
00403     double child2Wt;
00404     // boolean_and_not consider only two children
00405     QueryNode *qn;
00406     ch->startIteration();
00407     qn = ch->nextNode();
00408     child1Wt = qn->eval(dRep);
00409     qn = ch->nextNode();
00410     child2Wt = qn->eval(dRep);
00411     if(child2Wt > dw)
00412       return 0;
00413     else
00414       return (child1Wt * (1.0 - child2Wt));
00415   }
00416 };
00417 
00418 
00423 class FiltRejQnode : public BeliefNode {
00424 public:
00425   FiltRejQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00426   virtual ~FiltRejQnode() {}
00431   virtual double eval(DocumentRep *dRep) {
00432     double child1Wt;
00433     double child2Wt;
00434     // filter_reject consider only two children
00435     QueryNode *qn;
00436     ch->startIteration();
00437     qn = ch->nextNode();
00438     child1Wt = qn->eval(dRep);
00439     qn = ch->nextNode();
00440     child2Wt = qn->eval(dRep);
00441     if(child1Wt > dw && child2Wt <= dw)
00442       return child1Wt;
00443     else
00444       return dw;
00445   }
00446 };
00447 
00452 class FiltReqQnode : public BeliefNode {
00453 public:
00454   FiltReqQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00455   virtual ~FiltReqQnode() {}
00459   virtual double eval(DocumentRep *dRep) {
00460     double child1Wt;
00461     double child2Wt;
00462     // filter_require consider only two children
00463     QueryNode *qn;
00464     ch->startIteration();
00465     qn = ch->nextNode();
00466     child1Wt = qn->eval(dRep);
00467     qn = ch->nextNode();
00468     child2Wt = qn->eval(dRep);
00469     if(child1Wt > dw && child2Wt > dw)
00470       return child1Wt;
00471     else
00472       return dw;
00473   }
00474   virtual void updateDocList(int numDocs) {
00475     intersectDocList(numDocs); // different from superclass.
00476   }
00477 };
00478 
00480 class TermQnode : public ProxNode {
00481 public:
00482   TermQnode(int id, double weight) : ProxNode(id, weight) { }
00483   virtual ~TermQnode() {}
00485   virtual double eval(DocumentRep *dRep) { 
00486     StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00487     int did = myRep->did;
00488     double freq = 0.0;
00489     if (nextDoc == did) {
00490       freq = (double)proxList->count();
00491       if (hasMoreProx()) {
00492         // advance the prox pointer.
00493         nextProxItem();
00494         nextDoc = proxList->id();
00495       } else {
00496         // No more prox entries.
00497         nextDoc = INT_MAX;
00498       }
00499     }
00500     return(myRep->termWeight(it, freq, dCnt));
00501   }
00503   virtual void copyDocList(int len, int tf, DocInfoList *dl, int dc);
00504 };
00505 
00508 class OdnQNode : public ProxNode {
00509 public:
00510   OdnQNode(int size, double w, double d) : ProxNode(size, w, d){}
00511   virtual ~OdnQNode() {}
00512   virtual void updateDocList(int numDocs) {
00513     ProxNode::updateDocList(numDocs);   // use superclass method
00514     orderedProxList(numDocs);
00515   }
00516   
00517 private:
00519   void orderedProxList(int numDocs);
00521   bool foundOrderedProx(int bpos, int wsize, QnList *cl, int ith);
00522 };
00523 
00526 class UwnQNode : public ProxNode {
00527 public:
00528   UwnQNode(int size, double w, double d) : ProxNode(size, w, d) {}
00529   virtual ~UwnQNode() {}
00530   virtual void updateDocList(int numDocs) {
00531     ProxNode::updateDocList(numDocs);   // use superclass method
00532     unorderedProxList(numDocs);
00533   }
00534 private:
00536   void unorderedProxList(int numDocs);
00538   bool findUnorderedWin(QueryNode *cqn, QnList *cl, int winSize);
00539 };
00540 
00541 
00543 // fix to be WPARSUMN 
00544 // This is a prox operator with embedded belief operators spliced out.
00547 class PassageQNode : public ProxNode {
00548 public:
00549   PassageQNode(int size, double w) : ProxNode(w){ winSize = size; }
00550   virtual ~PassageQNode() {}
00551 
00556   virtual double eval(DocumentRep *dR) {
00557     StructQryDocRep *dRep = (StructQryDocRep *)dR;
00558     double maxScore = 0;
00559     double score;      
00560     dRep->startPassageIteration(winSize);
00561     while(dRep->hasMorePassage()) {
00562       score = passageScore(dRep);
00563       if(score > maxScore) {
00564         maxScore = score;
00565       }
00566       dRep->nextPassage();
00567     }
00568     return maxScore;
00569   }
00570 
00571   virtual void updateDocList(int numDocs) {
00572     unionDocList(numDocs); // different from superclass.
00573     transformPassageOps();
00574   }
00575 private:
00578   double passageScore(StructQryDocRep *dRep);
00579 };
00580 
00583 class SynQNode : public ProxNode {
00584 public:
00585   SynQNode(double w, double d) : ProxNode(w, d) {}
00586   virtual ~SynQNode() {}
00587   virtual void updateDocList(int numDocs) {
00588     unionDocList(numDocs); // different from superclass.
00589     synonymProxList();
00590   }
00591   
00592 private:
00594   void synonymProxList();
00595 };
00596 
00599 class PropQNode : public OdnQNode {
00600 public:
00601   PropQNode(double w, double d) : OdnQNode(0, w, d){}
00602   virtual ~PropQNode() {}
00603 };
00604 
00605 #endif

Generated on Tue Nov 25 11:26:45 2003 for Lemur Toolkit by doxygen1.2.18