Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

QuerySpec.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef INDRI_QUERYSPEC_HPP
00014 #define INDRI_QUERYSPEC_HPP
00015 
00016 #include <vector>
00017 #include <string>
00018 #include <sstream>
00019 #include <indri/greedy_vector>
00020 #include <algorithm>
00021 #include "lemur-platform.h"
00022 
00023 #include "indri/Walker.hpp"
00024 #include "indri/Copier.hpp"
00025 #include "indri/Packer.hpp"
00026 #include "indri/Unpacker.hpp"
00027 
00028 #include "Exception.hpp"
00029 
00030 template<class T>
00031 bool equal( const std::vector<T>& one, const std::vector<T>& two ) {
00032   if( one.size() != two.size() )
00033     return false;
00034 
00035   for( unsigned int i=0; i<one.size(); i++ ) {
00036     if( *one[i] == *two[i] )
00037       continue;
00038 
00039     return false;
00040   }
00041 
00042   return true;
00043 }
00044 
00045 template<class T>
00046 bool unordered_equal( std::vector<T>& one, std::vector<T>& two ) {
00047   if( one.size() != two.size() )
00048     return false;
00049 
00050   std::vector<T> one_copy;
00051   for( unsigned int i=0; i<one.size(); i++ ) {
00052     one_copy.push_back( one[i] );
00053   }
00054 
00055   // this algorithm is n^2 as opposed to n log n if
00056   // we sorted things, but windows tend to be short
00057   for( unsigned int i=0; i<two.size(); i++ ) {
00058     for( unsigned int j=0; j<one_copy.size(); j++ ) {
00059       if( *one_copy[j] == *two[i] ) {
00060         // we remove each match--if they all match, the array will be empty
00061         one_copy.erase( one_copy.begin() + j );
00062         break;
00063       }
00064     }
00065   }
00066 
00067   return one_copy.size() == 0;
00068 }
00069 
00070 namespace indri {
00071   namespace lang {
00072     /* abstract */ class Node {
00073     protected:
00074       std::string _name;
00075 
00076     public:
00077       Node() {
00078         std::stringstream s;
00079         s << PTR_TO_INT(this);
00080         _name = s.str();
00081       }
00082 
00083       virtual ~Node() {
00084       }
00085       
00086       void setNodeName( const std::string& name ) {
00087         _name = name;
00088       }
00089 
00090       const std::string& nodeName() const {
00091         return _name;
00092       }
00093 
00094       virtual std::string typeName() const {
00095         return "Node";
00096       }
00097 
00098       virtual std::string queryText() const = 0;
00099 
00100       virtual bool operator < ( Node& other ) {
00101         // TODO: make this faster
00102         if( typeName() != other.typeName() )
00103           return typeName() < other.typeName();
00104 
00105         return queryText() < other.queryText();
00106       }
00107      
00108       virtual bool operator== ( Node& other ) {
00109         return &other == this; 
00110       }
00111       
00112       virtual void pack( Packer& packer ) = 0;
00113       virtual void walk( Walker& walker ) = 0;
00114       virtual Node* copy( Copier& copier ) = 0;
00115     };
00116 
00117     /* abstract */ class RawExtentNode : public Node {};
00118     /* abstract */ class ScoredExtentNode : public Node {};
00119     /* abstract */ class AccumulatorNode : public Node {};
00120     
00121     class IndexTerm : public RawExtentNode {
00122     private:
00123       std::string _text;
00124       bool _stemmed;
00125 
00126     public:
00127       IndexTerm( const std::string& text, bool stemmed = false ) : _text(text), _stemmed(stemmed)
00128       {
00129       }
00130 
00131       IndexTerm( Unpacker& unpacker ) {
00132         _text = unpacker.getString( "termName" );
00133         _stemmed = unpacker.getBoolean( "stemmed" );
00134       }
00135 
00136       const std::string& getText() { return _text; }
00137 
00138       bool operator==( Node& node ) {
00139         IndexTerm* other = dynamic_cast<IndexTerm*>(&node);
00140 
00141         if( !other )
00142           return false;
00143 
00144         if( other == this )
00145           return true;
00146         
00147         return other->_text == _text;
00148       }
00149 
00150       std::string typeName() const {
00151         return "IndexTerm";
00152       }
00153 
00154       std::string queryText() const {
00155         std::stringstream qtext;
00156 
00157         if( _stemmed ) {
00158           qtext << '"' << _text << '"';
00159         } else {
00160           qtext << _text;
00161         }
00162 
00163         return qtext.str();
00164       }
00165 
00166       void setStemmed(bool stemmed) {
00167         _stemmed = stemmed;
00168       }
00169 
00170       bool getStemmed() const {
00171         return _stemmed;
00172       }
00173 
00174       void pack( Packer& packer ) {
00175         packer.before(this);
00176         packer.put( "termName", _text );
00177         packer.put( "stemmed", _stemmed );
00178         packer.after(this);
00179       }
00180 
00181       void walk( Walker& walker ) {
00182         walker.before(this);
00183         walker.after(this);
00184       }
00185 
00186       Node* copy( Copier& copier ) {
00187         copier.before(this);
00188         IndexTerm* termCopy = new IndexTerm(*this);
00189         return copier.after(this, termCopy);
00190       }
00191     };
00192 
00193     class Field : public RawExtentNode {
00194     private:
00195       std::string _fieldName;
00196 
00197     public:
00198       Field( const std::string& name ) : _fieldName(name)
00199       {
00200       }
00201 
00202       Field( Unpacker& unpacker ) {
00203         _fieldName = unpacker.getString( "fieldName" );
00204       }
00205 
00206       const std::string& getFieldName() const { return _fieldName; }
00207 
00208       std::string typeName() const {
00209         return "Field";
00210       }
00211 
00212       std::string queryText() const {
00213         return _fieldName;
00214       }
00215 
00216       void pack( Packer& packer ) {
00217         packer.before(this);
00218         packer.put( "fieldName", _fieldName );
00219         packer.after(this);
00220       }
00221       
00222       void walk( Walker& walker ) {
00223         walker.before(this);
00224         walker.after(this);
00225       }
00226 
00227       Node* copy( Copier& copier ) {
00228         copier.before(this);
00229         Field* newField = new Field(*this);
00230         return copier.after(this, newField);
00231       }
00232 
00233       bool operator== ( Node& other ) {
00234         Field* otherField = dynamic_cast<Field*>(&other);
00235 
00236         if( !otherField )
00237           return false;
00238 
00239         return otherField->getFieldName() == getFieldName();
00240       }
00241     };
00242 
00243     class ExtentInside : public RawExtentNode {
00244     private:
00245       RawExtentNode* _inner;
00246       RawExtentNode* _outer;
00247 
00248     public:
00249       ExtentInside( RawExtentNode* inner, RawExtentNode* outer ) :
00250         _inner(inner),
00251         _outer(outer)
00252       {
00253       }
00254 
00255       ExtentInside( Unpacker& unpacker ) {
00256         _inner = unpacker.getRawExtentNode( "inner" );
00257         _outer = unpacker.getRawExtentNode( "outer" );
00258       }
00259 
00260       bool operator== ( Node& o ) {
00261         ExtentInside* other = dynamic_cast<ExtentInside*>(&o);
00262   
00263         return other &&
00264                *_inner == *other->_inner &&
00265                *_outer == *other->_outer;
00266       }
00267       
00268       std::string typeName() const {
00269         return "ExtentInside";
00270       }
00271 
00272       std::string queryText() const {
00273         std::stringstream qtext;
00274         qtext << _inner->queryText()
00275               << "."
00276               << _outer->queryText();
00277 
00278         return qtext.str();
00279       }
00280 
00281       RawExtentNode* getInner() {
00282         return _inner;
00283       }
00284 
00285       RawExtentNode* getOuter() {
00286         return _outer;
00287       }
00288 
00289       void pack( Packer& packer ) {
00290         packer.before(this);
00291         packer.put( "inner", _inner );
00292         packer.put( "outer", _outer );
00293         packer.after(this);
00294       }
00295 
00296       void walk( Walker& walker ) {
00297         walker.before(this);
00298         _inner->walk(walker);
00299         _outer->walk(walker);
00300         walker.after(this);
00301       }
00302 
00303       Node* copy( Copier& copier ) {
00304         copier.before(this);
00305         
00306         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
00307         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
00308         ExtentInside* extentInsideCopy = new ExtentInside( newInner, newOuter );
00309         extentInsideCopy->setNodeName( nodeName() );
00310 
00311         return copier.after(this, extentInsideCopy);
00312       }
00313     };
00314 
00315     class ExtentOr : public RawExtentNode {
00316     private:
00317       std::vector<RawExtentNode*> _children;
00318 
00319     public:
00320       ExtentOr() {}
00321       ExtentOr( const std::vector<RawExtentNode*>& children ) :
00322         _children(children)
00323       {
00324       }
00325 
00326       ExtentOr( Unpacker& unpacker ) {
00327         _children = unpacker.getRawExtentVector( "children" );
00328       } 
00329 
00330       std::string typeName() const {
00331         return "ExtentOr";
00332       }
00333 
00334       std::string queryText() const {
00335         std::stringstream qtext;
00336 
00337         for( unsigned int i=0; i<_children.size(); i++ ) {
00338           if(i>0) qtext << " ";
00339           qtext << _children[i]->queryText();
00340         }
00341 
00342         return qtext.str();
00343       }
00344 
00345       void addChild( RawExtentNode* node ) {
00346         _children.push_back(node);
00347       }
00348 
00349       std::vector<RawExtentNode*>& getChildren() {
00350         return _children;
00351       }
00352 
00353       bool operator== ( Node& node ) {
00354         ExtentOr* other = dynamic_cast<ExtentOr*>(&node);
00355 
00356         if( other == this )
00357           return true;
00358 
00359         if( !other )
00360           return false;
00361 
00362         return unordered_equal( other->_children, _children );
00363       }
00364 
00365       void pack( Packer& packer ) {
00366         packer.before(this);
00367         packer.put( "children", _children );
00368         packer.after(this);
00369       }
00370 
00371       void walk( Walker& walker ) {
00372         walker.before(this);
00373         for( unsigned int i=0; i<_children.size(); i++ ) {
00374           _children[i]->walk(walker);
00375         }
00376         walker.after(this);
00377       }
00378 
00379       Node* copy( Copier& copier ) {
00380         copier.before(this);
00381         
00382         ExtentOr* duplicate = new ExtentOr();
00383         duplicate->setNodeName( nodeName() );
00384         for( unsigned int i=0; i<_children.size(); i++ ) {
00385           indri::lang::Node* childNode = _children[i]->copy(copier);
00386           duplicate->addChild( dynamic_cast<RawExtentNode*>(childNode) );
00387         }
00388 
00389         return copier.after(this, duplicate);
00390       }
00391     };
00392 
00393     class ExtentAnd : public RawExtentNode {
00394     private:
00395       std::vector<RawExtentNode*> _children;
00396 
00397     public:
00398       ExtentAnd() {}
00399       ExtentAnd( const std::vector<RawExtentNode*>& children ) :
00400         _children(children)
00401       {
00402       }
00403 
00404       ExtentAnd( Unpacker& unpacker ) {
00405         _children = unpacker.getRawExtentVector( "children" );
00406       }
00407 
00408       std::string typeName() const {
00409         return "ExtentAnd";
00410       }
00411 
00412       std::string queryText() const {
00413         std::stringstream qtext;
00414 
00415         for( unsigned int i=0; i<_children.size(); i++ ) {
00416           if(i>0) qtext << ",";
00417           qtext << _children[i]->queryText();
00418         }
00419 
00420         return qtext.str();
00421       }
00422 
00423       void addChild( RawExtentNode* node ) {
00424         _children.push_back(node);
00425       }
00426 
00427       std::vector<RawExtentNode*>& getChildren() {
00428         return _children;
00429       }
00430 
00431       bool operator== ( Node& node ) {
00432         ExtentAnd* other = dynamic_cast<ExtentAnd*>(&node);
00433 
00434         if( other == this )
00435           return true;
00436 
00437         if( !other )
00438           return false;
00439 
00440         return unordered_equal( other->_children, _children );
00441       }
00442 
00443       void pack( Packer& packer ) {
00444         packer.before(this);
00445         packer.put( "children", _children );
00446         packer.after(this);
00447       }
00448 
00449       void walk( Walker& walker ) {
00450         walker.before(this);
00451         for( unsigned int i=0; i<_children.size(); i++ ) {
00452           _children[i]->walk(walker);
00453         }
00454         walker.after(this);
00455       }
00456 
00457       Node* copy( Copier& copier ) {
00458         copier.before(this);
00459 
00460         ExtentAnd* duplicate = new ExtentAnd();
00461         duplicate->setNodeName( nodeName() );
00462         for( unsigned int i=0; i<_children.size(); i++ ) {
00463           Node* child = _children[i]->copy(copier);
00464           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00465         }
00466 
00467         return copier.after(this, duplicate);
00468       }
00469     };
00470 
00471     class UWNode : public RawExtentNode {
00472     private:
00473       std::vector<RawExtentNode*> _children;
00474       int _windowSize;
00475 
00476     public:
00477       UWNode() :
00478         _windowSize(-1) // default is unlimited window size
00479       {
00480       }
00481 
00482       UWNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00483         _windowSize(windowSize),
00484         _children(children)
00485       {
00486       }
00487 
00488       UWNode( Unpacker& unpacker ) {
00489         _windowSize = (int) unpacker.getInteger( "windowSize" );
00490         _children = unpacker.getRawExtentVector( "children" );
00491       }
00492 
00493       std::string typeName() const {
00494         return "UWNode";
00495       }
00496 
00497       std::string queryText() const {
00498         std::stringstream qtext;
00499         
00500         if( _windowSize >= 0 )
00501         qtext << "#uw" << _windowSize << "( ";
00502         else
00503           qtext << "#uw( ";
00504           
00505         for( unsigned int i=0; i<_children.size(); i++ ) {
00506           qtext << _children[i]->queryText() << " ";
00507         }
00508         qtext << ")";
00509         return qtext.str();
00510       }
00511 
00512       void setWindowSize( int windowSize ) {
00513         _windowSize = windowSize;
00514       }
00515 
00516       void setWindowSize( const std::string& windowSize ) {
00517         setWindowSize( atoi( windowSize.c_str() ) );
00518       }
00519 
00520       int getWindowSize() const {
00521         return _windowSize;
00522       }
00523 
00524       const std::vector<RawExtentNode*>& getChildren() const {
00525         return _children;
00526       }
00527 
00528       void addChild( RawExtentNode* node ) {
00529         _children.push_back( node );
00530       }
00531 
00532       bool operator== ( Node& node ) {
00533         UWNode* other = dynamic_cast<UWNode*>(&node);
00534 
00535         if( !other )
00536           return false;
00537 
00538         if( other == this )
00539           return true;
00540 
00541         if( other->_windowSize != _windowSize ) {
00542           return false;
00543         }
00544 
00545         return unordered_equal( _children, other->_children );
00546       }
00547 
00548       void pack( Packer& packer ) {
00549         packer.before(this);
00550         packer.put( "windowSize", _windowSize );
00551         packer.put( "children", _children );
00552         packer.after(this);
00553       }
00554 
00555       void walk( Walker& walker ) {
00556         walker.before(this);
00557         for(unsigned int i=0; i<_children.size(); i++) {
00558           _children[i]->walk(walker);
00559         }
00560         walker.after(this);
00561       }
00562 
00563       Node* copy( Copier& copier ) {
00564         copier.before(this);
00565         
00566         UWNode* duplicate = new UWNode();
00567         duplicate->setNodeName( nodeName() );
00568         duplicate->setWindowSize( _windowSize );
00569         for(unsigned int i=0; i<_children.size(); i++) {
00570           Node* child = _children[i]->copy(copier);
00571           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00572         }
00573 
00574         return copier.after(this, duplicate);
00575       }
00576     };
00577 
00578     class ODNode : public RawExtentNode {
00579     private:
00580       int _windowSize;
00581       std::vector<RawExtentNode*> _children;
00582 
00583     public:
00584       ODNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00585         _windowSize(windowSize),
00586         _children(children)
00587       {
00588       }
00589 
00590       ODNode() :
00591         _windowSize(-1) // default is unlimited window size
00592       {
00593       }
00594 
00595       ODNode( Unpacker& unpacker ) {
00596         _windowSize = (int) unpacker.getInteger( "windowSize" );
00597         _children = unpacker.getRawExtentVector( "children" );
00598       }
00599 
00600       std::string typeName() const {
00601         return "ODNode";
00602       }
00603 
00604       std::string queryText() const {
00605         std::stringstream qtext;
00606         if( _windowSize >= 0 )
00607         qtext << "#" << _windowSize << "( ";
00608         else
00609           qtext << "#od( ";
00610 
00611         for( unsigned int i=0; i<_children.size(); i++ ) {
00612           qtext << _children[i]->queryText() << " ";
00613         }
00614         qtext << ")";
00615         return qtext.str();
00616       }
00617 
00618       const std::vector<RawExtentNode*>& getChildren() const {
00619         return _children;
00620       }
00621 
00622       void setWindowSize( int windowSize ) {
00623         _windowSize = windowSize;
00624       }
00625 
00626       void setWindowSize( const std::string& windowSize ) {
00627         setWindowSize( atoi( windowSize.c_str() ) );
00628       }
00629 
00630       int getWindowSize() const {
00631         return _windowSize;
00632       }
00633 
00634       void addChild( RawExtentNode* node ) {
00635         _children.push_back( node );
00636       }
00637 
00638       bool operator== ( Node& node ) {
00639         ODNode* other = dynamic_cast<ODNode*>(&node);
00640 
00641         if( ! other )
00642           return false;
00643 
00644         if( other == this )
00645           return true;
00646 
00647         if( other->_windowSize != _windowSize )
00648           return false;
00649 
00650         if( _children.size() != other->_children.size() )
00651           return false;
00652 
00653         return equal( _children, other->_children );
00654       }
00655 
00656       void pack( Packer& packer ) {
00657         packer.before(this);
00658         packer.put( "windowSize", _windowSize );
00659         packer.put( "children", _children );
00660         packer.after(this);
00661       }
00662 
00663       void walk( Walker& walker ) {
00664         walker.before(this);
00665         for(unsigned int i=0; i<_children.size(); i++) {
00666           _children[i]->walk(walker);
00667         }
00668         walker.after(this);
00669       }
00670 
00671       Node* copy( Copier& copier ) {
00672         copier.before(this);
00673         
00674         ODNode* duplicate = new ODNode();
00675         duplicate->setWindowSize( _windowSize );
00676         for(unsigned int i=0; i<_children.size(); i++) {
00677           Node* child = _children[i]->copy(copier);
00678           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00679         }
00680 
00681         return copier.after(this, duplicate);
00682       }
00683     };
00684 
00685     class FilReqNode : public RawExtentNode {
00686     private:
00687       RawExtentNode* _filtered;
00688       RawExtentNode* _required;
00689 
00690     public:
00691       FilReqNode( RawExtentNode* filtered, RawExtentNode* required ) {
00692         _filtered = filtered;
00693         _required = required;
00694       }
00695 
00696       FilReqNode( Unpacker& unpacker ) {
00697         _filtered = unpacker.getRawExtentNode( "filtered" );
00698         _required = unpacker.getRawExtentNode( "required" );
00699       }
00700 
00701       std::string typeName() const {
00702         return "FilReqNode";
00703       }
00704 
00705       std::string queryText() const {
00706         std::stringstream qtext;
00707 
00708         qtext << "#filreq("
00709               << _filtered->queryText()
00710               << " "
00711               << _required->queryText()
00712               << ")";
00713         return qtext.str();
00714       }
00715 
00716       RawExtentNode* getFiltered() {
00717         return _filtered;
00718       }
00719 
00720       RawExtentNode* getRequired() {
00721         return _required;
00722       }
00723 
00724       bool operator== ( Node& node ) {
00725         FilReqNode* other = dynamic_cast<FilReqNode*>(&node);
00726 
00727         if( !other )
00728           return false;
00729 
00730         return (*_filtered) == (*other->getFiltered()) &&
00731                (*_required) == (*other->getRequired());
00732       }
00733 
00734       void pack( Packer& packer ) {
00735         packer.before(this);
00736         packer.put("filtered", _filtered);
00737         packer.put("required", _required);
00738         packer.after(this);
00739       }
00740 
00741       void walk( Walker& walker ) {
00742         walker.before(this);
00743         _filtered->walk(walker);
00744         _required->walk(walker);
00745         walker.after(this);
00746       }
00747 
00748       Node* copy( Copier& copier ) {
00749         copier.before(this);
00750         RawExtentNode* filteredDuplicate = dynamic_cast<RawExtentNode*>(_filtered->copy(copier));
00751         RawExtentNode* requiredDuplicate = dynamic_cast<RawExtentNode*>(_required->copy(copier));
00752         FilReqNode* duplicate = new FilReqNode( filteredDuplicate, requiredDuplicate );
00753         return copier.after(this, duplicate);
00754       }
00755     };
00756 
00757     class FilRejNode : public RawExtentNode {
00758     private:
00759       RawExtentNode* _filtered;
00760       RawExtentNode* _disallowed;
00761 
00762     public:
00763       FilRejNode( RawExtentNode* filtered, RawExtentNode* disallowed ) {
00764         _filtered = filtered;
00765         _disallowed = disallowed;
00766       }
00767 
00768       FilRejNode( Unpacker& unpacker ) {
00769         _filtered = unpacker.getRawExtentNode( "filtered" );
00770         _disallowed = unpacker.getRawExtentNode( "disallowed" );
00771       }
00772 
00773       std::string typeName() const {
00774         return "FilRejNode";
00775       }
00776 
00777       std::string queryText() const {
00778         std::stringstream qtext;
00779 
00780         qtext << "#filrej("
00781               << _filtered->queryText()
00782               << " "
00783               << _disallowed->queryText()
00784               << ")";
00785 
00786         return qtext.str();
00787       }
00788 
00789       RawExtentNode* getFiltered() {
00790         return _filtered;
00791       }
00792 
00793       RawExtentNode* getDisallowed() {
00794         return _disallowed;
00795       }
00796 
00797       bool operator== ( Node& node ) {
00798         FilRejNode* other = dynamic_cast<FilRejNode*>(&node);
00799 
00800         if( !other )
00801           return false;
00802 
00803         return (*_filtered) == (*other->getFiltered()) &&
00804                (*_disallowed) == (*other->getDisallowed());
00805       }
00806 
00807       void pack( Packer& packer ) {
00808         packer.before(this);
00809         packer.put("filtered", _filtered);
00810         packer.put("disallowed", _disallowed);
00811         packer.after(this);
00812       }
00813 
00814       void walk( Walker& walker ) {
00815         walker.before(this);
00816         _filtered->walk(walker);
00817         _disallowed->walk(walker);
00818         walker.after(this);
00819       }
00820 
00821       Node* copy( Copier& copier ) {
00822         copier.before(this);
00823         RawExtentNode* filteredDuplicate = dynamic_cast<RawExtentNode*>(_filtered->copy(copier));
00824         RawExtentNode* disallowedDuplicate = dynamic_cast<RawExtentNode*>(_disallowed->copy(copier));
00825         FilRejNode* duplicate = new FilRejNode( filteredDuplicate, disallowedDuplicate );
00826         return copier.after(this, duplicate);
00827       }
00828     };
00829 
00830     class FieldLessNode : public RawExtentNode {
00831     private:
00832       RawExtentNode* _field;
00833       UINT64 _constant;
00834 
00835     public:
00836       FieldLessNode( RawExtentNode* field, UINT64 constant ) :
00837         _field(field),
00838         _constant(constant) {
00839       }
00840       
00841       FieldLessNode( Unpacker& unpacker ) {
00842         _field = unpacker.getRawExtentNode( "field" );
00843         _constant = unpacker.getInteger("constant");
00844       }
00845 
00846       std::string typeName() const {
00847         return "FieldLessNode";
00848       }
00849 
00850       std::string queryText() const {
00851         std::stringstream qtext;
00852         qtext << "#less(" << _field->queryText() << " " << _constant << ")";
00853         return qtext.str();
00854       }
00855 
00856       UINT64 getConstant() const {
00857         return _constant;
00858       }
00859 
00860       RawExtentNode* getField() {
00861         return _field;
00862       }
00863 
00864       bool operator== ( Node& node ) {
00865         FieldLessNode* other = dynamic_cast<FieldLessNode*>(&node);
00866 
00867         return other &&
00868                other->getConstant() == _constant &&
00869                *other->getField() == *_field;
00870       }
00871 
00872       Node* copy( Copier& copier ) {
00873         copier.before(this);
00874         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
00875         FieldLessNode* duplicate = new FieldLessNode( fieldDuplicate, _constant );
00876         return copier.after(this, duplicate);
00877       }
00878 
00879       void walk( Walker& walker ) {
00880         walker.before(this);
00881         _field->walk(walker);
00882         walker.after(this);
00883       }
00884 
00885       void pack( Packer& packer ) {
00886         packer.before(this);
00887         packer.put("field", _field);
00888         packer.put("constant", _constant);
00889         packer.after(this);
00890       }
00891     };
00892 
00893     class FieldGreaterNode : public RawExtentNode {
00894     private:
00895       RawExtentNode* _field;
00896       UINT64 _constant;
00897 
00898     public:
00899       FieldGreaterNode( RawExtentNode* field, UINT64 constant ) :
00900         _field(field),
00901         _constant(constant) {
00902       }
00903       
00904       FieldGreaterNode( Unpacker& unpacker ) {
00905         _field = unpacker.getRawExtentNode( "field" );
00906         _constant = unpacker.getInteger("constant");
00907       }
00908 
00909       std::string typeName() const {
00910         return "FieldGreaterNode";
00911       }
00912 
00913       std::string queryText() const {
00914         std::stringstream qtext;
00915         qtext << "#greater(" << _field->queryText() << " " << _constant << ")";
00916         return qtext.str();
00917       }
00918 
00919       UINT64 getConstant() const {
00920         return _constant;
00921       }
00922 
00923       RawExtentNode* getField() {
00924         return _field;
00925       }
00926 
00927       bool operator== ( Node& node ) {
00928         FieldGreaterNode* other = dynamic_cast<FieldGreaterNode*>(&node);
00929 
00930         return other &&
00931                other->getConstant() == _constant &&
00932                *other->getField() == *_field;
00933       }
00934 
00935       Node* copy( Copier& copier ) {
00936         copier.before(this);
00937         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
00938         FieldGreaterNode* duplicate = new FieldGreaterNode( fieldDuplicate, _constant );
00939         return copier.after(this, duplicate);
00940       }
00941 
00942       void walk( Walker& walker ) {
00943         walker.before(this);
00944         _field->walk(walker);
00945         walker.after(this);
00946       }
00947 
00948       void pack( Packer& packer ) {
00949         packer.before(this);
00950         packer.put("field", _field);
00951         packer.put("constant", _constant);
00952         packer.after(this);
00953       }
00954     };
00955 
00956     class FieldBetweenNode : public RawExtentNode {
00957     private:
00958       RawExtentNode* _field;
00959       UINT64 _low;
00960       UINT64 _high;
00961 
00962     public:
00963       FieldBetweenNode( RawExtentNode* field, UINT64 low, UINT64 high ) :
00964         _field(field),
00965         _low(low),
00966         _high(high) {
00967       }
00968       
00969       FieldBetweenNode( Unpacker& unpacker ) {
00970         _field = unpacker.getRawExtentNode( "field" );
00971         _low = unpacker.getInteger("low");
00972         _high = unpacker.getInteger("high");
00973       }
00974 
00975       std::string typeName() const {
00976         return "FieldBetweenNode";
00977       }
00978 
00979       std::string queryText() const {
00980         std::stringstream qtext;
00981         qtext << "#between(" << _field->queryText() << " " << _low << " " << _high << ")";
00982         return qtext.str();
00983       }
00984 
00985       UINT64 getLow() const {
00986         return _low;
00987       }
00988 
00989       UINT64 getHigh() const {
00990         return _high;
00991       }
00992 
00993       RawExtentNode* getField() {
00994         return _field;
00995       }
00996 
00997       bool operator== ( Node& node ) {
00998         FieldBetweenNode* other = dynamic_cast<FieldBetweenNode*>(&node);
00999 
01000         return other &&
01001                other->getLow() == _low &&
01002                other->getHigh() == _high &&
01003                *other->getField() == *_field;
01004       }
01005 
01006       Node* copy( Copier& copier ) {
01007         copier.before(this);
01008         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01009         FieldBetweenNode* duplicate = new FieldBetweenNode( fieldDuplicate, _low, _high );
01010         return copier.after(this, duplicate);
01011       }
01012 
01013       void walk( Walker& walker ) {
01014         walker.before(this);
01015         _field->walk(walker);
01016         walker.after(this);
01017       }
01018 
01019       void pack( Packer& packer ) {
01020         packer.before(this);
01021         packer.put("field", _field);
01022         packer.put("low", _low);
01023         packer.put("high", _high);
01024         packer.after(this);
01025       }
01026     };
01027 
01028     class FieldEqualsNode : public RawExtentNode {
01029     private:
01030       RawExtentNode* _field;
01031       UINT64 _constant;
01032 
01033     public:
01034       FieldEqualsNode( RawExtentNode* field, UINT64 constant ) :
01035         _field(field),
01036         _constant(constant) {
01037       }
01038       
01039       FieldEqualsNode( Unpacker& unpacker ) {
01040         _field = unpacker.getRawExtentNode("field");
01041         _constant = unpacker.getInteger("constant");
01042       }
01043 
01044       std::string typeName() const {
01045         return "FieldEqualsNode";
01046       }
01047 
01048       std::string queryText() const {
01049         std::stringstream qtext;
01050         qtext << "#equals(" << _field->queryText() << " " << _constant << ")";
01051         return qtext.str();
01052       }
01053 
01054       UINT64 getConstant() const {
01055         return _constant;
01056       }
01057 
01058       RawExtentNode* getField() {
01059         return _field;
01060       }
01061 
01062       bool operator== ( Node& node ) {
01063         FieldEqualsNode* other = dynamic_cast<FieldEqualsNode*>(&node);
01064 
01065         return other &&
01066                other->getConstant() == _constant &&
01067                *other->getField() == *_field;
01068       }
01069 
01070       Node* copy( Copier& copier ) {
01071         copier.before(this);
01072         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01073         FieldEqualsNode* duplicate = new FieldEqualsNode( fieldDuplicate, _constant );
01074         return copier.after(this, duplicate);
01075       }
01076 
01077       void walk( Walker& walker ) {
01078         walker.before(this);
01079         _field->walk(walker);
01080         walker.after(this);
01081       }
01082 
01083       void pack( Packer& packer ) {
01084         packer.before(this);
01085         packer.put("field", _field);
01086         packer.put("constant", _constant);
01087         packer.after(this);
01088       }
01089     };
01090 
01091     class RawScorerNode : public ScoredExtentNode {
01092     private:
01093       UINT64 _occurrences; // number of occurrences within this context
01094       UINT64 _contextSize; // number of terms that occur within this context
01095       UINT64 _maximumContextLength;
01096       UINT64 _minimumContextLength;
01097       UINT64 _maximumOccurrences;
01098       double _maximumContextFraction;
01099 
01100       RawExtentNode* _raw;
01101       RawExtentNode* _context;
01102       std::string _smoothing;
01103 
01104     public:
01105       RawScorerNode( RawExtentNode* raw, RawExtentNode* context, std::string smoothing = "method:dirichlet,mu:2500" ) {
01106         _raw = raw;
01107         _context = context;
01108 
01109         _occurrences = 0;
01110         _contextSize = 0;
01111         _maximumContextLength = MAX_INT32;
01112         _maximumOccurrences = MAX_INT32;
01113         _minimumContextLength = 1;
01114         _maximumContextFraction = 1;
01115         _smoothing = smoothing;
01116       }
01117 
01118       RawScorerNode( Unpacker& unpacker ) {
01119         _raw = unpacker.getRawExtentNode( "raw" );
01120         _context = unpacker.getRawExtentNode( "context" );
01121 
01122         _occurrences = unpacker.getInteger( "occurrences" );
01123         _contextSize = unpacker.getInteger( "contextSize" );
01124         _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
01125         _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
01126         _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
01127         _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
01128         _smoothing = unpacker.getString( "smoothing" );
01129       }
01130 
01131       std::string typeName() const {
01132         return "RawScorerNode";
01133       }
01134 
01135       std::string queryText() const {
01136         std::stringstream qtext;
01137         
01138         qtext << _raw->queryText();
01139         if( _context ) {
01140           // if we haven't added a period yet, put one in
01141           int dot = qtext.str().find('.');
01142           if( dot < 0 )
01143             qtext << '.';
01144 
01145           qtext << "(" << _context->queryText() << ")";
01146         }
01147 
01148         return qtext.str();
01149       }
01150 
01151       UINT64 getOccurrences() const {
01152         return _occurrences;
01153       }
01154 
01155       UINT64 getContextSize() const {
01156         return _contextSize;
01157       }
01158 
01159       const std::string& getSmoothing() const {
01160         return _smoothing;
01161       }
01162 
01163       UINT64 getMaxContextLength() const {
01164         return _maximumContextLength;
01165       }
01166 
01167       UINT64 getMinContextLength() const {
01168         return _minimumContextLength;
01169       }
01170 
01171       UINT64 getMaxOccurrences() const {
01172         return _maximumOccurrences;
01173       }
01174 
01175       double getMaxContextFraction() const {
01176         return _maximumContextFraction;
01177       }
01178 
01179       void setStatistics( UINT64 occurrences, UINT64 contextSize ) {
01180         _occurrences = occurrences;
01181         _contextSize = contextSize;
01182       }
01183 
01184       void setStatistics( UINT64 occurrences, UINT64 contextSize,
01185                           UINT64 maxOccurrences, UINT64 minContextLength, UINT64 maxContextLength,
01186                           double maxContextFraction ) {
01187         _occurrences = occurrences;
01188         _contextSize = contextSize;
01189 
01190         _maximumOccurrences = maxOccurrences;
01191         _minimumContextLength = minContextLength;
01192         _maximumContextLength = maxContextLength;
01193         _maximumContextFraction = maxContextFraction;
01194       }
01195 
01196       void setContext( RawExtentNode* context ) {
01197         _context = context;
01198       }
01199 
01200       void setRawExtent( RawExtentNode* rawExtent ) {
01201         _raw = rawExtent;
01202       }
01203 
01204       void setSmoothing( const std::string& smoothing ) {
01205         _smoothing = smoothing;
01206       }
01207 
01208       RawExtentNode* getContext() {
01209         return _context;
01210       }
01211 
01212       RawExtentNode* getRawExtent() {
01213         return _raw;
01214       }
01215 
01216       void pack( Packer& packer ) {
01217         packer.before(this);
01218         packer.put( "raw", _raw );
01219         packer.put( "context", _context );
01220 
01221         packer.put( "occurrences", _occurrences );
01222         packer.put( "contextSize", _contextSize );
01223         packer.put( "minimumContextLength", _minimumContextLength );
01224         packer.put( "maximumContextLength", _maximumContextLength );
01225         packer.put( "maximumOccurrences", _maximumOccurrences );
01226         packer.put( "maximumContextFraction", _maximumContextFraction );
01227         packer.put( "smoothing", _smoothing );
01228         packer.after(this);
01229       }
01230 
01231       void walk( Walker& walker ) {
01232         walker.before(this);
01233         if( _raw )
01234           _raw->walk(walker);
01235         if( _context )
01236           _context->walk(walker);
01237         walker.after(this);
01238       }
01239 
01240       Node* copy( Copier& copier ) {
01241         copier.before(this);
01242 
01243         RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
01244         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
01245         RawScorerNode* duplicate = new RawScorerNode(*this);
01246         duplicate->setRawExtent( duplicateRaw );
01247         duplicate->setContext( duplicateContext );
01248 
01249         return copier.after(this, duplicate);
01250       }
01251     };
01252 
01253     class TermFrequencyScorerNode : public ScoredExtentNode {
01254     private:
01255       UINT64 _occurrences; // number of occurrences within this context
01256       UINT64 _contextSize; // number of terms that occur within this context
01257       UINT64 _maximumContextLength;
01258       UINT64 _minimumContextLength;
01259       UINT64 _maximumOccurrences;
01260       double _maximumContextFraction;
01261 
01262       std::string _text;
01263       std::string _smoothing;
01264       bool _stemmed;
01265 
01266     public:
01267       TermFrequencyScorerNode( const std::string& text, bool stemmed ) {
01268         _occurrences = 0;
01269         _contextSize = 0;
01270         _maximumContextLength = MAX_INT32;
01271         _maximumOccurrences = MAX_INT32;
01272         _minimumContextLength = 1;
01273         _maximumContextFraction = 1;
01274         _smoothing = "";
01275         _text = text;
01276         _stemmed = stemmed;
01277       }
01278 
01279       TermFrequencyScorerNode( Unpacker& unpacker ) {
01280         _occurrences = unpacker.getInteger( "occurrences" );
01281         _contextSize = unpacker.getInteger( "contextSize" );
01282         _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
01283         _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
01284         _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
01285         _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
01286         _smoothing = unpacker.getString( "smoothing" );
01287         _text = unpacker.getString( "text" );
01288         _stemmed = unpacker.getBoolean( "stemmed" );
01289       }
01290       
01291       const std::string& getText() const {
01292         return _text;
01293       }
01294 
01295       bool getStemmed() const {
01296         return _stemmed;
01297       }
01298 
01299       std::string typeName() const {
01300         return "TermFrequencyScorerNode";
01301       }
01302 
01303       std::string queryText() const {
01304         std::stringstream qtext;
01305         
01306         if( !_stemmed )
01307           qtext << _text;
01308         else
01309           qtext << "\"" << _text << "\"";
01310 
01311         return qtext.str();
01312       }
01313 
01314       UINT64 getOccurrences() const {
01315         return _occurrences;
01316       }
01317 
01318       UINT64 getContextSize() const {
01319         return _contextSize;
01320       }
01321 
01322       const std::string& getSmoothing() const {
01323         return _smoothing;
01324       }
01325 
01326       UINT64 getMaxContextLength() const {
01327         return _maximumContextLength;
01328       }
01329 
01330       UINT64 getMinContextLength() const {
01331         return _minimumContextLength;
01332       }
01333 
01334       UINT64 getMaxOccurrences() const {
01335         return _maximumOccurrences;
01336       }
01337 
01338       double getMaxContextFraction() const {
01339         return _maximumContextFraction;
01340       }
01341 
01342       void setStatistics( UINT64 occurrences, UINT64 contextSize,
01343                           UINT64 maxOccurrences,
01344                           UINT64 minContextLength, UINT64 maxContextLength,
01345                           double maxContextFraction ) {
01346         _occurrences = occurrences;
01347         _contextSize = contextSize;
01348 
01349         _maximumOccurrences = maxOccurrences;
01350         _minimumContextLength = minContextLength;
01351         _maximumContextLength = maxContextLength;
01352         _maximumContextFraction = maxContextFraction;
01353       }
01354 
01355       void setSmoothing( const std::string& smoothing ) {
01356         _smoothing = smoothing;
01357       }
01358 
01359       void pack( Packer& packer ) {
01360         packer.before(this);
01361         packer.put( "occurrences", _occurrences );
01362         packer.put( "contextSize", _contextSize );
01363         packer.put( "minimumContextLength", _minimumContextLength );
01364         packer.put( "maximumContextLength", _maximumContextLength );
01365         packer.put( "maximumOccurrences", _maximumOccurrences );
01366         packer.put( "maximumContextFraction", _maximumContextFraction );
01367         packer.put( "text", _text );
01368         packer.put( "stemmed", _stemmed );
01369         packer.put( "smoothing", _smoothing );
01370         packer.after(this);
01371       }
01372 
01373       void walk( Walker& walker ) {
01374         walker.before(this);
01375         walker.after(this);
01376       }
01377 
01378       Node* copy( Copier& copier ) {
01379         copier.before(this);
01380         TermFrequencyScorerNode* duplicate = new TermFrequencyScorerNode(*this);
01381         return copier.after(this, duplicate);
01382       }
01383     };
01384 
01385     // The CachedFrequencyScorerNode should only be used on a local machine;
01386     // it should not be transferred across the network
01387     class CachedFrequencyScorerNode : public indri::lang::ScoredExtentNode {
01388     private:
01389       indri::lang::Node* _raw;
01390       indri::lang::Node* _context;
01391       std::string _smoothing;
01392       void* _list;
01393     
01394     public:
01395       CachedFrequencyScorerNode( indri::lang::Node* raw, indri::lang::Node* context )
01396         :
01397         _raw(raw),
01398         _context(context),
01399         _list(0)
01400       {
01401       }
01402 
01403       CachedFrequencyScorerNode( Unpacker& unpacker ) {
01404         LEMUR_THROW( LEMUR_RUNTIME_ERROR, "CachedFrequencyScorerNode should not be used on the network" );
01405 
01406         _raw = unpacker.getRawExtentNode( "raw" );
01407         _context = unpacker.getRawExtentNode( "context" );
01408         _smoothing = unpacker.getString( "smoothing" );
01409       }
01410 
01411       void setList( void* list ) {
01412         _list = list;
01413       }
01414 
01415       void* getList() {
01416         return _list;
01417       }
01418 
01419       std::string typeName() const {
01420         return "CachedFrequencyScorerNode";
01421       }
01422 
01423       std::string queryText() const {
01424         std::stringstream qtext;
01425         
01426         qtext << _raw->queryText();
01427         if( _context ) {
01428           // if we haven't added a period yet, put one in
01429           int dot = qtext.str().find('.');
01430           if( dot < 0 )
01431             qtext << '.';
01432 
01433           qtext << "(" << _context->queryText() << ")";
01434         }
01435 
01436         return qtext.str();
01437       }
01438 
01439       void setSmoothing( const std::string& smoothing ) {
01440         _smoothing = smoothing;
01441       }
01442 
01443       const std::string& getSmoothing() const {
01444         return _smoothing;
01445       }
01446 
01447       indri::lang::Node* getRaw() {
01448         return _raw;
01449       }
01450 
01451       indri::lang::Node* getContext() {
01452         return _context;
01453       }
01454       
01455       void pack( Packer& packer ) {
01456         packer.before(this);
01457         packer.put( "raw", _raw );
01458         packer.put( "context", _context );
01459         packer.put( "smoothing", _smoothing );
01460         packer.after(this);
01461       }
01462 
01463       void walk( Walker& walker ) {
01464         walker.before(this);
01465         _raw->walk( walker );
01466         if( _context )
01467           _context->walk( walker );
01468         walker.after(this);
01469       }
01470 
01471       indri::lang::Node* copy( Copier& copier ) {
01472         copier.before(this);
01473 
01474         indri::lang::RawExtentNode* duplicateRaw = dynamic_cast<indri::lang::RawExtentNode*>(_raw->copy(copier));
01475         indri::lang::RawExtentNode* duplicateContext = 0;
01476 
01477         if( _context ) 
01478           duplicateContext = dynamic_cast<indri::lang::RawExtentNode*>(_context->copy(copier));
01479 
01480         CachedFrequencyScorerNode* duplicate = new CachedFrequencyScorerNode( duplicateRaw,
01481                                                                               duplicateContext );
01482         duplicate->setNodeName( nodeName() );
01483         duplicate->setSmoothing( _smoothing );
01484         duplicate->setList( getList() );
01485 
01486         return copier.after( this, duplicate );
01487       }
01488     };
01489 
01490     class PriorNode : public ScoredExtentNode {
01491     public:
01492       struct tuple_type {
01493         int begin;
01494         int end;
01495         double score;
01496       };
01497 
01498     private:
01499       std::map<int,tuple_type> _table;
01500       Field* _field;
01501       std::string _fieldName;
01502 
01503     public:
01504       PriorNode() :
01505         _field(0)
01506       {
01507       }
01508 
01509       PriorNode( indri::lang::Field* field, const std::map<int, tuple_type>& table ) :
01510         _table(table),
01511         _field(field)
01512       {
01513         _fieldName = _field->getFieldName();  
01514       }
01515 
01516       PriorNode( std::string& fieldName, const std::map<int, tuple_type>& table ) :
01517         _fieldName(fieldName),
01518         _field(0),
01519         _table(table)
01520       {
01521       }
01522 
01523       std::string queryText() const {
01524         std::stringstream qtext;
01525         // with the definition of priors somewhat in flux, it's
01526         // hard to know what would be good to put here.
01527         qtext << "#prior(" << _fieldName << ")";
01528         return qtext.str();
01529       }
01530 
01531       PriorNode( Unpacker& unpacker ) {
01532         std::vector<int> beginList = unpacker.getIntVector( "begin" );
01533         std::vector<int> endList = unpacker.getIntVector( "end" );
01534         std::vector<double> scoreList = unpacker.getDoubleVector( "score" );
01535         assert( beginList.size() == endList.size() );
01536         assert( scoreList.size() == endList.size() );
01537 
01538         for( size_t i=0; i<beginList.size(); i++ ) {
01539           tuple_type t;
01540           t.begin = beginList[i];
01541           t.end = endList[i];
01542           t.score = scoreList[i];
01543 
01544           _table[ beginList[i] ] = t;
01545         }
01546  
01547         _field = dynamic_cast<Field*>(unpacker.getRawExtentNode( "field" ));
01548       }
01549 
01550       std::string nodeType() {
01551         return "PriorNode";
01552       }
01553       
01554       const std::map<int,tuple_type>& getTable() const {
01555         return _table;
01556       }
01557 
01558       const std::string& getFieldName() const {
01559         return _fieldName;
01560       }
01561 
01562       indri::lang::Field* getField() const {
01563         return _field;
01564       }
01565 
01566       void setField( indri::lang::Field* field ) {
01567         _field = field;
01568       }
01569 
01570       void walk( Walker& walker ) {
01571         walker.before(this);
01572         _field->walk(walker);
01573         walker.after(this);
01574       }
01575 
01576       indri::lang::Node* copy( Copier& copier ) {
01577         copier.before(this);
01578         Field* duplicateField = dynamic_cast<Field*>(_field->copy(copier));
01579         PriorNode* duplicate = new PriorNode( duplicateField, getTable() );
01580         return copier.after(this, duplicate);
01581       }
01582 
01583       void pack( Packer& packer ) {
01584         packer.before(this);
01585 
01586         std::vector<int> beginList;
01587         std::vector<int> endList;
01588         std::vector<double> scoreList;
01589 
01590         for( std::map<int,tuple_type>::iterator iter;
01591              iter != _table.end();
01592              iter++ )
01593         {
01594           beginList.push_back( (*iter).second.begin );
01595           endList.push_back( (*iter).second.end );
01596           scoreList.push_back( (*iter).second.score );
01597         }
01598 
01599         packer.put( "begin", beginList );
01600         packer.put( "end", endList );
01601         packer.put( "score", scoreList );
01602         packer.put( "field", _field );
01603         packer.after(this);
01604       }
01605     };
01606 
01607     /* abstract */ class UnweightedCombinationNode : public ScoredExtentNode {
01608     protected:
01609       std::vector<ScoredExtentNode*> _children;
01610 
01611       void _unpack( Unpacker& unpacker ) {
01612         _children = unpacker.getScoredExtentVector( "children" );
01613       }
01614 
01615       template<class _ThisType>
01616       void _walk( _ThisType* ptr, Walker& walker ) {
01617         walker.before(ptr);
01618 
01619         for( unsigned int i=0; i<_children.size(); i++ ) {
01620           _children[i]->walk(walker);
01621         }
01622         
01623         walker.after(ptr);
01624       }
01625 
01626       template<class _ThisType>
01627       Node* _copy( _ThisType* ptr, Copier& copier ) {
01628         copier.before(ptr);
01629         
01630         _ThisType* duplicate = new _ThisType();
01631         duplicate->setNodeName( nodeName() );
01632         for( unsigned int i=0; i<_children.size(); i++ ) {
01633           duplicate->addChild( dynamic_cast<ScoredExtentNode*>(_children[i]->copy(copier)) );
01634         } 
01635 
01636         return copier.after(ptr, duplicate);
01637       }
01638 
01639       void _childText( std::stringstream& qtext ) const {
01640         for( unsigned int i=0; i<_children.size(); i++ ) {
01641           if(i>0) qtext << " ";
01642           qtext << _children[i]->queryText();
01643         }
01644       }
01645 
01646     public:
01647       const std::vector<ScoredExtentNode*>& getChildren() {
01648         return _children;
01649       }
01650 
01651       void addChild( ScoredExtentNode* scoredNode ) {
01652         _children.push_back( scoredNode );
01653       }
01654 
01655       void pack( Packer& packer ) {
01656         packer.before(this);
01657         packer.put( "children", _children );
01658         packer.after(this);
01659       }
01660     };
01661 
01662     /* abstract */ class WeightedCombinationNode : public ScoredExtentNode {
01663     protected:
01664       std::vector< std::pair<double, ScoredExtentNode*> > _children;
01665 
01666       void _unpack( Unpacker& unpacker ) {
01667         std::vector<double> weights = unpacker.getDoubleVector( "weights" );
01668         std::vector<ScoredExtentNode*> nodes = unpacker.getScoredExtentVector( "children" );
01669 
01670         for( unsigned int i=0; i<weights.size(); i++ ) {
01671           _children.push_back( std::make_pair( weights[i], nodes[i] ) );
01672         }
01673       }
01674 
01675       template<class _ThisType>
01676       void _walk( _ThisType* ptr, Walker& walker ) {
01677         walker.before(ptr);
01678         for( unsigned int i=0; i<_children.size(); i++ ) {
01679           _children[i].second->walk(walker);
01680         }
01681         walker.after(ptr);
01682       }
01683 
01684       template<class _ThisType>
01685       Node* _copy( _ThisType* ptr, Copier& copier ) {
01686         copier.before(ptr);
01687 
01688         _ThisType* duplicate = new _ThisType;
01689         duplicate->setNodeName( nodeName() );
01690         for( unsigned int i=0; i<_children.size(); i++ ) {
01691           double childWeight = _children[i].first;
01692           Node* childCopy = _children[i].second->copy( copier );
01693 
01694           duplicate->addChild( childWeight, dynamic_cast<ScoredExtentNode*>(childCopy) );
01695         }
01696         return copier.after(ptr, duplicate);
01697       }
01698 
01699       void _childText( std::stringstream& qtext ) const {
01700         for( unsigned int i=0; i<_children.size(); i++ ) {
01701           if(i>0) qtext << " ";
01702           qtext << _children[i].first
01703                 << " "
01704                 << _children[i].second->queryText();
01705         }
01706       }
01707 
01708     public:
01709       const std::vector< std::pair<double, ScoredExtentNode*> >& getChildren() {
01710         return _children;
01711       }
01712 
01713       void addChild( double weight, ScoredExtentNode* scoredNode ) {
01714         _children.push_back( std::make_pair( weight, scoredNode) );
01715       }
01716 
01717       void addChild( const std::string& weight, ScoredExtentNode* scoredNode ) {
01718         addChild( atof( weight.c_str() ), scoredNode );
01719       }
01720 
01721       void pack( Packer& packer ) {
01722         packer.before(this);
01723         
01724         std::vector<double> weights;
01725         std::vector<ScoredExtentNode*> nodes;
01726 
01727         for( unsigned int i=0; i<_children.size(); i++ ) {
01728           weights.push_back( _children[i].first );
01729           nodes.push_back( _children[i].second );
01730         }
01731 
01732         packer.put( "weights", weights );
01733         packer.put( "children", nodes );
01734         packer.after(this);
01735       }
01736     };
01737 
01738     class OrNode : public UnweightedCombinationNode {
01739     public:
01740       OrNode() {}
01741       OrNode( Unpacker& unpacker ) {
01742         _unpack( unpacker );
01743       }
01744 
01745       std::string typeName() const {
01746         return "OrNode";
01747       }
01748 
01749       std::string queryText() const {
01750         std::stringstream qtext;
01751         qtext << "#or(";
01752         _childText(qtext);
01753         qtext << ")";
01754 
01755         return qtext.str();
01756       } 
01757 
01758       void walk( Walker& walker ) {
01759         _walk( this, walker );
01760       }
01761       
01762       Node* copy( Copier& copier ) {
01763         return _copy( this, copier );
01764       }
01765     };
01766 
01767     class NotNode : public ScoredExtentNode {
01768     private:
01769       ScoredExtentNode* _child;
01770 
01771     public:
01772       NotNode() {
01773         _child = 0;
01774       }
01775 
01776       NotNode( ScoredExtentNode* child ) {
01777         _child = child;
01778       }
01779 
01780       NotNode( Unpacker& unpacker ) {
01781         _child = unpacker.getScoredExtentNode( "child" );
01782       }
01783 
01784       std::string typeName() const {
01785         return "NotNode";
01786       }
01787 
01788       ScoredExtentNode* getChild() {
01789         return _child;
01790       }
01791 
01792       void setChild( ScoredExtentNode* child ) {
01793         _child = child;
01794       }
01795 
01796       std::string queryText() const {
01797         std::stringstream qtext;
01798         qtext << "#not(";
01799         qtext << _child->queryText();
01800         qtext << ")";
01801 
01802         return qtext.str();
01803       } 
01804 
01805       void walk( Walker& walker ) {
01806         walker.before(this);
01807         _child->walk(walker);
01808         walker.after(this);
01809       }
01810 
01811       void pack( Packer& packer ) {
01812         packer.before(this);
01813         packer.put( "child", _child );
01814         packer.after(this);
01815       }
01816 
01817       Node* copy( Copier& copier ) {
01818         copier.before( this );
01819         ScoredExtentNode* childDuplicate = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
01820         NotNode* duplicate = new NotNode( childDuplicate );
01821         duplicate->setNodeName( nodeName() );
01822         return copier.after( this, duplicate );
01823       }
01824     };
01825 
01826     class MaxNode : public UnweightedCombinationNode {
01827     public:
01828       MaxNode() {}
01829       MaxNode( Unpacker& unpacker ) {
01830         _unpack( unpacker );
01831       }
01832 
01833       std::string typeName() const {
01834         return "MaxNode";
01835       }
01836 
01837       std::string queryText() const {
01838         std::stringstream qtext;
01839         qtext << "#max(";
01840         _childText(qtext);
01841         qtext << ")";
01842 
01843         return qtext.str();
01844       } 
01845 
01846 
01847       void walk( Walker& walker ) {
01848         _walk(this, walker);
01849       }
01850 
01851       Node* copy( Copier& copier ) {
01852         return _copy( this, copier );
01853       }
01854     };
01855 
01856     class CombineNode : public UnweightedCombinationNode {
01857     public:
01858       CombineNode() {}
01859       CombineNode( Unpacker& unpacker ) {
01860         _unpack( unpacker );
01861       }
01862 
01863       std::string typeName() const {
01864         return "CombineNode";
01865       }
01866 
01867       std::string queryText() const {
01868         std::stringstream qtext;
01869         qtext << "#combine(";
01870         _childText(qtext);
01871         qtext << ")";
01872 
01873         return qtext.str();
01874       } 
01875 
01876       void walk( Walker& walker ) {
01877         _walk( this, walker );
01878       }
01879       
01880       Node* copy( Copier& copier ) {
01881         return _copy( this, copier );
01882       }
01883     };
01884 
01885     class WAndNode : public WeightedCombinationNode {
01886     public:
01887       WAndNode() {}
01888       WAndNode( Unpacker& unpacker ) {
01889         _unpack( unpacker );
01890       }
01891 
01892       std::string typeName() const {
01893         return "WAndNode";
01894       }
01895 
01896       std::string queryText() const {
01897         std::stringstream qtext;
01898         qtext << "#wand(";
01899         _childText(qtext);
01900         qtext << ")";
01901 
01902         return qtext.str();
01903       } 
01904 
01905       void walk( Walker& walker ) {
01906         _walk( this, walker );
01907       }
01908 
01909       Node* copy( Copier& copier ) {
01910         return _copy( this, copier );
01911       }
01912     };
01913 
01914     class WSumNode : public WeightedCombinationNode {
01915     public:
01916       WSumNode() {}
01917       WSumNode( Unpacker& unpacker ) {
01918         _unpack( unpacker );
01919       }
01920 
01921       std::string typeName() const {
01922         return "WSumNode";
01923       }
01924 
01925       std::string queryText() const {
01926         std::stringstream qtext;
01927         qtext << "#wsum(";
01928         _childText(qtext);
01929         qtext << ")";
01930 
01931         return qtext.str();
01932       } 
01933 
01934       void walk( Walker& walker ) {
01935         _walk( this, walker );
01936       }
01937 
01938       Node* copy( Copier& copier ) {
01939         return _copy( this, copier );
01940       }
01941     };
01942 
01943     class WeightNode : public WeightedCombinationNode {
01944     public:
01945       WeightNode() {}
01946       WeightNode( Unpacker& unpacker ) {
01947         _unpack( unpacker );
01948       }
01949 
01950       std::string typeName() const {
01951         return "WeightNode";
01952       }
01953 
01954       std::string queryText() const {
01955         std::stringstream qtext;
01956         qtext << "#weight(";
01957         _childText(qtext);
01958         qtext << ")";
01959 
01960         return qtext.str();
01961       } 
01962 
01963       void walk( Walker& walker ) {
01964         _walk( this, walker );
01965       }
01966 
01967       Node* copy( Copier& copier ) {
01968         return _copy( this, copier );
01969       }
01970     };
01971 
01972     class ExtentRestriction : public ScoredExtentNode {
01973     private:
01974       ScoredExtentNode* _child;
01975       RawExtentNode* _field;
01976 
01977     public:
01978       ExtentRestriction( Unpacker& unpacker ) {
01979         _child = unpacker.getScoredExtentNode("child");
01980         _field = unpacker.getRawExtentNode("field");
01981       }
01982 
01983       ExtentRestriction( ScoredExtentNode* child, RawExtentNode* field ) :
01984         _child(child),
01985         _field(field)
01986       {
01987       }
01988 
01989       std::string typeName() const {
01990         return "ExtentRestriction";
01991       }
01992 
01993       std::string queryText() const {
01994         std::stringstream qtext;
01995         // this extent restriction is almost certainly because of some #combine or #max operator
01996         // in the _child position.  We look for the first parenthesis (e.g. #combine(dog cat)) and
01997         // insert the brackets in.
01998         
01999         std::string childText = _child->queryText();
02000         std::string::size_type pos = childText.find( '(' );
02001 
02002         if( pos != std::string::npos ) {
02003           qtext << childText.substr(0,pos) 
02004                 << "["
02005                 << _field->queryText()
02006                 << "]"
02007                 << childText.substr(pos);
02008         } else {
02009           // couldn't find a parenthesis, so we'll tack the [field] on the front
02010           qtext << "["
02011                 << _field->queryText()
02012                 << "]"
02013                 << childText;
02014         }
02015 
02016         return qtext.str();
02017       } 
02018 
02019       ScoredExtentNode* getChild() {
02020         return _child;
02021       }
02022 
02023       RawExtentNode* getField() {
02024         return _field;
02025       }
02026 
02027       void setChild( ScoredExtentNode* child ) {
02028         _child = child;
02029       }
02030 
02031       void setField( RawExtentNode* field ) {
02032         _field = field;
02033       }
02034       
02035       void pack( Packer& packer ) {
02036         packer.before(this);
02037         packer.put("child", _child);
02038         packer.put("field", _field);
02039         packer.after(this);
02040       }
02041 
02042       void walk( Walker& walker ) {
02043         walker.before(this);
02044         _child->walk(walker);
02045         _field->walk(walker);
02046         walker.after(this);
02047       }
02048 
02049       Node* copy( Copier& copier ) {
02050         copier.before(this);
02051 
02052         ScoredExtentNode* duplicateChild = dynamic_cast<indri::lang::ScoredExtentNode*>(_child->copy(copier));
02053         RawExtentNode* duplicateField = dynamic_cast<indri::lang::RawExtentNode*>(_field->copy(copier));
02054         ExtentRestriction* duplicate = new ExtentRestriction( duplicateChild, duplicateField );
02055         duplicate->setNodeName( nodeName() );
02056         
02057         return copier.after(this, duplicate);
02058       }
02059     };
02060 
02061     class FilterNode : public ScoredExtentNode {
02062     private:
02063       ScoredExtentNode* _child;
02064       std::vector<int> _documents;
02065 
02066     public:
02067       FilterNode( ScoredExtentNode* child, std::vector<int>& documents ) : 
02068         _child(child),
02069         _documents(documents)
02070       {
02071       }
02072 
02073       FilterNode( Unpacker& unpacker ) {
02074         _child = unpacker.getScoredExtentNode( "child" );
02075         _documents = unpacker.getIntVector( "documents" );
02076       }
02077 
02078       std::string typeName() const {
02079         return "FilterNode";
02080       }
02081 
02082       ScoredExtentNode* getChild() {
02083         return _child;
02084       }
02085 
02086       const std::vector<int>& getDocuments() const {
02087         return _documents;
02088       }
02089 
02090       void pack( Packer& packer ) {
02091         packer.before(this);
02092         packer.put("child", _child);
02093         packer.put("documents", _documents);
02094         packer.after(this);
02095       }
02096 
02097       void walk( Walker& walker ) {
02098         walker.before(this);
02099         _child->walk(walker);
02100         walker.after(this);
02101       }
02102 
02103       std::string queryText() const {
02104         // for now, we'll let the filter be anonymous, since it can never
02105         // be typed by the user
02106         return _child->queryText();
02107       }
02108 
02109       Node* copy( Copier& copier ) {
02110         copier.before(this);
02111 
02112         ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
02113         FilterNode* duplicate = new FilterNode( duplicateChild, _documents );
02114         duplicate->setNodeName( nodeName() );
02115 
02116         return copier.after(this, duplicate);
02117       }
02118     };
02119 
02120     class ContextCounterNode : public AccumulatorNode {
02121     private:
02122       RawExtentNode* _raw;
02123       RawExtentNode* _context;
02124       bool _hasCounts;
02125       bool _hasContextSize;
02126       bool _hasMaxScore;
02127       UINT64 _occurrences;
02128       UINT64 _contextSize;
02129 
02130       UINT64 _minimumContextLength;
02131       UINT64 _maximumContextLength;
02132       UINT64 _maximumOccurrences;
02133       double _maximumContextFraction;
02134 
02135     public:
02136       ContextCounterNode( RawExtentNode* raw, RawExtentNode* context ) :
02137          _hasCounts(false),
02138          _hasContextSize(false),
02139          _hasMaxScore(false),
02140          _occurrences(0),
02141          _contextSize(0),
02142          _minimumContextLength(1),
02143          _maximumContextLength(MAX_INT32),
02144          _maximumOccurrences(MAX_INT32),
02145          _maximumContextFraction(1.0)
02146       {
02147         _raw = raw;
02148         _context = context;
02149       }
02150 
02151       ContextCounterNode( Unpacker& unpacker ) {
02152         _raw = unpacker.getRawExtentNode( "raw" );
02153         _context = unpacker.getRawExtentNode( "context" );
02154         _occurrences = unpacker.getInteger( "occurrences" );
02155         _contextSize = unpacker.getInteger( "contextSize" );
02156 
02157         _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
02158         _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
02159         _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
02160         _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
02161 
02162         _hasCounts = unpacker.getBoolean( "hasCounts" );
02163         _hasContextSize = unpacker.getBoolean( "hasContextSize" );
02164         _hasMaxScore = unpacker.getBoolean( "hasMaxScore" );
02165       }
02166 
02167       std::string typeName() const {
02168         return "ContextCounterNode";
02169       }
02170 
02171       std::string queryText() const {
02172         std::stringstream qtext;
02173         
02174         if( _raw )
02175           qtext << _raw->queryText();
02176 
02177         if( _context ) {
02178           // if we haven't added a period yet, put one in
02179           int dot = qtext.str().find('.');
02180           if( dot < 0 )
02181             qtext << '.';
02182 
02183           qtext << "(" << _context->queryText() << ")";
02184         }
02185 
02186         return qtext.str();
02187       }
02188 
02189       RawExtentNode* getContext() {
02190         return _context;
02191       }
02192 
02193       RawExtentNode* getRawExtent() {
02194         return _raw;
02195       }
02196 
02197       void setRawExtent( RawExtentNode* rawExtent ) {
02198         _raw = rawExtent;
02199       }
02200 
02201       void setContext( RawExtentNode* context ) {
02202         _context = context;
02203       }
02204 
02205       void pack( Packer& packer ) {
02206         packer.before(this);
02207         packer.put( "raw", _raw );
02208         packer.put( "context", _context );
02209         packer.put( "occurrences", _occurrences );
02210         packer.put( "contextSize", _contextSize );
02211 
02212         packer.put( "maximumOccurrences", _maximumOccurrences );
02213         packer.put( "maximumContextFraction", _maximumContextFraction );
02214         packer.put( "maximumContextLength", _maximumContextLength );
02215         packer.put( "minimumContextLength", _minimumContextLength );
02216 
02217         packer.put( "hasCounts", _hasCounts );
02218         packer.put( "hasContextSize", _hasContextSize );
02219         packer.put( "hasMaxScore", _hasMaxScore );
02220         packer.after(this);
02221       }
02222 
02223       void walk( Walker& walker ) {
02224         walker.before(this);
02225         if( _raw ) _raw->walk(walker);
02226         if( _context ) _context->walk(walker);
02227         walker.after(this);
02228       }
02229 
02230       Node* copy( Copier& copier ) {
02231         copier.before(this);
02232         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
02233         RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
02234         ContextCounterNode* duplicate = new ContextCounterNode(*this);
02235         duplicate->setContext(duplicateContext);
02236         duplicate->setRawExtent(duplicateRaw);
02237         return copier.after(this, duplicate);
02238       }
02239 
02240       bool hasCounts() const {
02241         return _hasCounts;
02242       }
02243 
02244       bool hasContextSize() const {
02245         return _hasContextSize;
02246       }
02247 
02248       bool hasMaxScore() const {
02249         return _hasMaxScore;
02250       }
02251 
02252       UINT64 getOccurrences() const {
02253         return _occurrences;
02254       }
02255 
02256       UINT64 getContextSize() const {
02257         return _contextSize;
02258       }
02259 
02260       UINT64 getMaximumOccurrences() const {
02261         return _maximumOccurrences;
02262       }
02263 
02264       UINT64 getMinimumContextLength() const {
02265         return _minimumContextLength;
02266       }
02267 
02268       UINT64 getMaximumContextLength() const {
02269         return _maximumContextLength;
02270       }
02271 
02272       double getMaximumContextFraction() const {
02273         return _maximumContextFraction;
02274       }
02275 
02276       void setContextSize( UINT64 contextSize ) {
02277         _contextSize = contextSize;
02278         _hasContextSize = true;
02279       }
02280 
02281       void setCounts( UINT64 occurrences,
02282                       UINT64 contextSize ) {
02283         _hasCounts = true;
02284         _occurrences = occurrences;
02285         setContextSize( contextSize );
02286       }
02287 
02288       void setCounts( UINT64 occurrences,
02289                       UINT64 contextSize,
02290                       UINT64 maximumOccurrences, 
02291                       UINT64 minimumContextLength, 
02292                       UINT64 maximumContextLength,
02293                       double maximumContextFraction )
02294       {
02295         setCounts( occurrences, contextSize );
02296         _hasMaxScore = true;
02297         _maximumOccurrences = maximumOccurrences;
02298         _minimumContextLength = minimumContextLength;
02299         _maximumContextLength = maximumContextLength;
02300         _maximumContextFraction = maximumContextFraction;
02301       }
02302     };
02303 
02304     class ScoreAccumulatorNode : public AccumulatorNode {
02305     private:
02306       ScoredExtentNode* _scoredNode;
02307 
02308     public:
02309       ScoreAccumulatorNode( ScoredExtentNode* scoredNode ) :
02310         _scoredNode(scoredNode)
02311       {
02312       }
02313 
02314       ScoreAccumulatorNode( Unpacker& unpacker ) {
02315         _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02316       }
02317 
02318       std::string typeName() const {
02319         return "ScoreAccumulatorNode";
02320       }
02321 
02322       std::string queryText() const {
02323         // anonymous
02324         return _scoredNode->queryText();
02325       }
02326 
02327       ScoredExtentNode* getChild() {
02328         return _scoredNode;
02329       }
02330 
02331       void pack( Packer& packer ) {
02332         packer.before(this);
02333         packer.put( "scoredNode", _scoredNode );
02334         packer.after(this);
02335       }
02336 
02337       void walk( Walker& walker ) {
02338         walker.before(this);
02339         _scoredNode->walk(walker);
02340         walker.after(this);
02341       }
02342 
02343       Node* copy( Copier& copier ) {
02344         copier.before(this);
02345         ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02346         ScoreAccumulatorNode* duplicate = new ScoreAccumulatorNode(duplicateChild);
02347         duplicate->setNodeName( nodeName() );
02348         return copier.after(this, duplicate);
02349       }
02350     };
02351 
02352     class AnnotatorNode : public AccumulatorNode {
02353       private:
02354         ScoredExtentNode* _scoredNode;
02355 
02356       public:
02357         AnnotatorNode( ScoredExtentNode* scoredNode ) :
02358           _scoredNode(scoredNode)
02359         {
02360         }
02361 
02362         AnnotatorNode( Unpacker& unpacker ) {
02363           _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02364         }
02365 
02366         std::string typeName() const {
02367           return "AnnotatorNode";
02368         }
02369 
02370         std::string queryText() const {
02371           return _scoredNode->queryText();
02372         }
02373 
02374         ScoredExtentNode* getChild() {
02375           return _scoredNode;
02376         }
02377 
02378         void pack( Packer& packer ) {
02379           packer.before(this);
02380           packer.put( "scoredNode", _scoredNode );
02381           packer.after(this);
02382         }
02383 
02384         void walk( Walker& walker ) {
02385           walker.before(this);
02386           _scoredNode->walk(walker);
02387           walker.after(this);
02388         }
02389 
02390         Node* copy( Copier& copier ) {
02391           copier.before(this);
02392           ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02393           AnnotatorNode* duplicate = new AnnotatorNode(duplicateChild);
02394           duplicate->setNodeName( nodeName() );
02395           return copier.after(this, duplicate);
02396         }
02397     };
02398   }
02399 }
02400 
02401 #endif // INDRI_QUERYSPEC_HPP
02402 
02403 
02404 

Generated on Wed Nov 3 12:59:02 2004 for Lemur Toolkit by doxygen1.2.18