Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

FlattextDocMgr.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  *
00003  *  Original source copyright (c) 2001, Carnegie Mellon University.
00004  *  See copyright.cmu for details.
00005  *  Modifications copyright (c) 2002, University of Massachusetts.
00006  *  See copyright.umass for details.
00007  *
00008  *==========================================================================
00009 */
00010 
00011 #ifndef _FLATTEXTDOCMGR_HPP
00012 #define _FLATTEXTDOCMGR_HPP
00013 
00014 #include "common_headers.hpp"
00015 #include "TextHandlerManager.hpp"
00016 #include "Exception.hpp"
00017 #include "DocumentManager.hpp"
00018 
00019 #define FT_SUFFIX ".flat"
00020 #define FT_LOOKUP ".lookup"
00021 #define FT_FID    ".fid"
00022 
00023 class FlattextDocMgr : public DocumentManager, public TextHandler {
00024 public:
00025 
00026   struct lookup_e {
00027     int fid;
00028     long offset;
00029     long bytes;
00030   };
00031 
00032   struct abc {
00033     bool operator() (char* s1, char* s2) const {
00034       return strcmp(s1, s2) < 0;
00035     }
00036   };
00037 
00042   FlattextDocMgr(string name, string mode, string source);  
00043 
00046   FlattextDocMgr(const char* name);
00047 
00048   virtual ~FlattextDocMgr();
00049 
00051   virtual bool open(const char*manname);
00052 
00054   virtual const char* getMyID();
00055   
00057   virtual char* getDoc(const char* docID);
00058 
00059   virtual void buildMgr();
00060 
00061   char* handleDoc(char * docno);
00062   void  handleEndDoc();
00064   virtual TextHandler* getTextHandler() { return this; }
00065 
00066 private:
00068   bool readinSources(const char* fn);
00069 
00071   void writeTOC();
00072 
00073   bool loadTOC(const char* fn);
00074   bool loadFTLookup(const char* fn);
00075   bool loadFTFiles(const char* fn, int num);
00076 
00077   int numdocs;              // how many docs we have
00078   string parseMode;           // what type of parser we have
00079   long prevpos;              // pos of previous doc beginning
00080   string IDname;            // my name
00081   string IDnameext;         // my name with type extension
00082   vector<string> sources;   // list of all source files
00083   int fileid;                       // fileid of current file being processed
00084   ofstream writefpos;       // stream for writing out file positions
00085   map<char*, lookup_e*, abc> table; 
00086   lookup_e* entries;        // array of lookup entries
00087 };
00088 
00089 #endif

Generated on Fri Feb 6 07:11:46 2004 for LEMUR by doxygen1.2.16