Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvFPPushIndex.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVFPPUSHINDEX_HPP
00014 #define _INVFPPUSHINDEX_HPP
00015 
00017 
00024 /*
00025  * NAME DATE - COMMENTS
00026  * tnt 03/01 - created
00027  ======================================================================*/
00028 #include "common_headers.hpp"
00029 #include "PushIndex.hpp"
00030 #include "MemCache.hpp"
00031 #include "InvFPTypes.hpp"
00032 #include "InvFPDocList.hpp"
00033 #include "InvFPTerm.hpp"
00034 #include "InvFPIndexMerge.hpp"
00035 
00036 
00037 typedef map<char*, InvFPDocList*, ltstr> TABLE_T;
00038 
00039 class InvFPPushIndex : public PushIndex {
00040 public:
00041   InvFPPushIndex(char* prefix="DefaultIndex", int cachesize=128000000, long maxfilesize=2100000000, DOCID_T startdocid=1);
00042   ~InvFPPushIndex();
00043 
00045   void setName(char* prefix);
00046 
00048   bool beginDoc(DocumentProps* dp);
00049 
00051   bool addTerm(Term& t);
00052 
00054   void endDoc(DocumentProps* dp);
00055 
00057   void endCollection(CollectionProps* cp);
00058 
00059 
00060 private:
00061   void writeTOC(int numinv);
00062   void writeDocIDs();
00063   void writeDTIDs();
00064   void writeCache();
00065   void lastWriteCache();
00066 
00067   long maxfile; 
00068   MemCache* cache; 
00069  // FILE* writetlist; /// filestream for writing the list of located terms for each document
00070   ofstream writetlist;
00071   FILE* writetlookup; 
00072   vector<LocatedTerm> termlist; 
00073   vector<char*> docIDs; 
00074   vector<char*> termIDs; 
00075   vector<char*> tempfiles; 
00076   vector<char*> dtfiles; 
00077   
00078   int tcount;    
00079   int tidcount ; 
00080   int dtidcount; 
00081   char* name;    
00082   int namelen;   
00083   TABLE_T wordtable; 
00084 
00085   int* membuf; 
00086   int membufsize;  // how much memory we have
00087 };
00088 
00089 #endif

Generated at Fri Jul 26 18:22:26 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000