Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvPushIndex.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVPUSHINDEX_HPP
00014 #define _INVPUSHINDEX_HPP
00015 
00017 
00024 /*
00025  * NAME DATE - COMMENTS
00026  * tnt 01/02 - created
00027  ======================================================================*/
00028 #include "common_headers.hpp"
00029 #include "PushIndex.hpp"
00030 #include "MemCache.hpp"
00031 #include "InvFPTypes.hpp"
00032 #include "InvDocList.hpp"
00033 #include "InvFPTerm.hpp"
00034 #include "InvIndexMerge.hpp"
00035 
00036 
00037 typedef map<char*, InvDocList*, ltstr> TABLE_T;
00038 
00039 class InvPushIndex : public PushIndex {
00040 public:
00041   InvPushIndex(char* prefix="DefaultIndex", int cachesize=128000000, long maxfilesize=2100000000, DOCID_T startdocid=1);
00042   ~InvPushIndex();
00043 
00045   void setName(char* prefix);
00046 
00048   bool beginDoc(DocumentProps* dp);
00049 
00051   bool addTerm(Term& t);
00052 
00054   void endDoc(DocumentProps* dp);
00055 
00057   virtual void endDoc(DocumentProps* dp, const char* mgr);
00058 
00060   void endCollection(CollectionProps* cp);
00061 
00063   void setDocManager(const char* mgrID);
00064 
00065 protected:
00066   void writeTOC(int numinv);
00067   void writeDocIDs();
00068   void writeCache();
00069   void lastWriteCache();
00070   void writeDTIDs();
00071   void writeDocMgrIDs();
00074   int docMgrID(const char* mgr);
00075   virtual void doendDoc(DocumentProps* dp, int mgrid);
00076 
00077   long maxfile; 
00078   MemCache* cache; 
00079   vector<char*> docIDs; 
00080   vector<char*> termIDs; 
00081   vector<char*> tempfiles; 
00082   vector<char*> dtfiles; 
00083   vector<char*> docmgrs;  // the list of doc managers we have (index = id)
00084 
00085   FILE* writetlookup; 
00086   ofstream writetlist; 
00087 
00088   int tcount;    
00089   int tidcount ; 
00090   int dtidcount; 
00091   char* name;    
00092   int namelen;   
00093   TABLE_T wordtable; 
00094   map<int, int> termlist; 
00095   int* membuf; 
00096   int membufsize;  // how much memory we have
00097   int curdocmgr; // the current docmanager to use
00098 };
00099 
00100 #endif

Generated at Fri Jul 26 18:26:24 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000