Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvPushIndex.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVPUSHINDEX_HPP
00014 #define _INVPUSHINDEX_HPP
00015 
00017 
00024 /*
00025  * NAME DATE - COMMENTS
00026  * tnt 01/02 - created
00027  ======================================================================*/
00028 #include "common_headers.hpp"
00029 #include "PushIndex.hpp"
00030 #include "MemCache.hpp"
00031 #include "InvFPTypes.hpp"
00032 #include "InvDocList.hpp"
00033 #include "InvFPTerm.hpp"
00034 #include "InvIndexMerge.hpp"
00035 
00036 
00037 typedef map<string, InvDocList*, less<string> > TABLE_T;
00038 
00039 class InvPushIndex : public PushIndex {
00040 public:
00041   InvPushIndex(){ };
00042   InvPushIndex(const string &prefix, int cachesize=128000000, long maxfilesize=2100000000, DOCID_T startdocid=1);
00043   ~InvPushIndex();
00044 
00046   void setName(const string &prefix);
00047 
00049   bool beginDoc(const DocumentProps* dp);
00050 
00052   bool addTerm(const Term& t);
00053 
00055   void endDoc(const DocumentProps* dp);
00056 
00058   virtual void endDoc(const DocumentProps* dp, const string &mgr);
00059 
00061   void endCollection(const CollectionProps* cp);
00062 
00064   void setDocManager(const string &mgrID);
00065 
00066 protected:
00067   void writeTOC(int numinv);
00068   void writeDocIDs();
00069   void writeCache();
00070   void lastWriteCache();
00071   void writeDTIDs();
00072   void writeDocMgrIDs();
00075   int docMgrID(const string &mgr);
00076   virtual void doendDoc(const DocumentProps* dp, int mgrid);
00077 
00078   long maxfile; 
00079   MemCache* cache; 
00080   vector<EXDOCID_T> docIDs; 
00081   vector<TERM_T> termIDs; 
00082   vector<string> tempfiles; 
00083   vector<string> dtfiles; 
00084   vector<string> docmgrs;  // the list of doc managers we have (index = id)
00085 
00086   FILE* writetlookup; 
00087   ofstream writetlist; 
00088 
00089   COUNT_T tcount;    
00090   COUNT_T tidcount ; 
00091   COUNT_T dtidcount; 
00092   string name;    
00093   TABLE_T wordtable; 
00094   map<TERMID_T, COUNT_T> termlist; 
00095   int* membuf; 
00096   int membufsize;  // how much memory we have
00097   int curdocmgr; // the current docmanager to use
00098 };
00099 
00100 #endif

Generated on Wed Nov 3 12:58:58 2004 for Lemur Toolkit by doxygen1.2.18