Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

QryBasedSampler.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _QRYBASEDSAMPLER_HPP
00013 #define _QRYBASEDSAMPLER_HPP
00014 
00015 
00016 
00017 #include "FreqCounter.hpp"
00018 #include "DBManager.hpp"
00019 
00021 typedef stringset docidset;
00022 
00024 #define T_NDOCS 1
00025 
00026 #define T_NWORDS 2
00027 
00028 #define T_NQRYS 4
00029 
00033 class QryBasedSampler {
00034 public:
00035   QryBasedSampler();
00036   ~QryBasedSampler();
00037 
00039   bool probe(char * initQuery);
00040   
00042   void setDBManager(DBManager * database);
00043 
00045   DBManager * getDBManager();
00046 
00047 
00050   void setFreqCounter(FreqCounter * counter);
00051 
00053   FreqCounter * getFreqCounter();
00054 
00055 
00059   void setOutputPrefix(char * prefix);
00060   
00062   char * getOutputPrefix();
00063 
00065   void setNumDocs(int n);
00066 
00068   int getNumDocs();
00069 
00070 
00072   void setNumWords(int n);
00073 
00075   int getNumWords();
00076 
00077 
00079   void setNumQueries(int n);
00080 
00082   int getNumQueries();
00083 
00084 
00091   void setTermMode(int m);
00092 
00094   int getTermMode();
00095   
00096 
00098   void setDocsPerQuery(int n);
00099   
00101   int getDocsPerQuery();
00102 
00103 
00104 private:
00105 
00106   /* for querying a db */
00107   DBManager * db;
00108 
00109 
00110   /* for building a description of a db */
00111   FreqCounter * freqCounter;
00112 
00113 
00114   /* output prefix for filenames */
00115   char * outputPrefix;
00116 
00117 
00118   /* termination mode of the probe -
00119    * either T_NDOCS or T_NWORDS */
00120   int termMode;
00121   
00122   /* number unique docs to retrieve - only used if
00123    * termMode == T_NDOCS */
00124   int numDocs;
00125 
00126   /* number unique words to retrieve - only used if
00127    * termMode == T_NWORDS */
00128   int numWords;
00129 
00130   /* number of queries to run - only used if
00131    * termMode == T_NQRYS */
00132   int numQueries;
00133 
00134   /* documents per query to use */
00135   int docsPerQuery;
00136 
00137   /* stores the ids of the document already retrieved
00138    * from the system.  used to prevent parsing
00139    * a document multiple times */
00140   docidset seenDocs;
00141 };
00142 
00143 #endif

Generated on Fri Feb 6 07:11:48 2004 for LEMUR by doxygen1.2.16