00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _PARSER_HPP 00013 #define _PARSER_HPP 00014 00015 #include "TextHandler.hpp" 00016 #include "WordSet.hpp" 00017 00025 00026 class Parser : public TextHandler { 00027 public: 00028 Parser(); 00029 virtual ~Parser(); 00030 00033 virtual void parse(char * filename) { parseFile(filename); } ; 00034 00036 virtual void parseFile(char* filename) = 0; 00037 00039 virtual void parseBuffer(char * buf, int len) = 0; 00040 00044 virtual void setAcroList(WordSet * acronyms); 00045 00047 virtual void setAcroList(string filename); 00048 00050 virtual long fileTell() = 0; 00051 00053 virtual long getDocBytePos() { return docpos; } 00054 00055 protected: 00058 bool isAcronym(char * word); 00059 long docpos; 00060 00061 private: 00063 WordSet * acros; 00065 bool mine; 00066 }; 00067 00068 #endif