Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

Parser.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _PARSER_HPP
00013 #define _PARSER_HPP
00014 
00015 #include "TextHandler.hpp"
00016 #include "WordSet.hpp"
00017 
00025 
00026 class Parser : public TextHandler {
00027 public:
00028   static const string category;
00029   static const string identifier;
00030 
00031   Parser();
00032   virtual ~Parser();
00033 
00036   virtual void parse(const string &filename);
00037   
00040   virtual void parseFile(const string &filename) = 0;
00041 
00043   virtual void parseBuffer(char * buf, int len) = 0;
00044 
00048   virtual void setAcroList(const WordSet * acronyms);
00049 
00051   virtual void setAcroList(string filename);
00052 
00054   virtual long fileTell() const = 0;
00055 
00057   virtual long getDocBytePos() const { return docpos; }
00058 
00060   virtual const string getParseFile() const { return parsefile; }
00061 
00062 protected: 
00065   bool isAcronym(const char * word);
00067   void clearAcros();
00068 
00069   long docpos; 
00070 
00071   string parsefile;
00072 private:
00074   WordSet * myacros;
00075   const WordSet* borrowedacros;
00076 };
00077 
00078 #endif

Generated on Wed Nov 3 12:59:01 2004 for Lemur Toolkit by doxygen1.2.18