00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _PARSER_HPP 00013 #define _PARSER_HPP 00014 00015 #include "TextHandler.hpp" 00016 #include "WordSet.hpp" 00017 00025 00026 class Parser : public TextHandler { 00027 00028 public: 00029 00030 Parser() { 00031 acros = NULL; 00032 } 00033 00036 virtual void parse(char * filename) { parseFile(filename); } ; 00037 00039 virtual void parseFile(char* filename) = 0; 00040 00042 virtual void parseBuffer(char * buf, int len) = 0; 00043 00047 virtual void setAcroList(WordSet * acronyms) { 00048 acros = acronyms; 00049 } 00050 00052 virtual long fileTell() = 0; 00053 00055 virtual long getDocBytePos() { return docpos; } 00056 00057 protected: 00060 bool isAcronym(char * word) { 00061 if (acros != NULL) 00062 return acros->contains(word); 00063 return false; 00064 } 00065 00066 long docpos; 00067 00068 private: 00070 WordSet * acros; 00071 00072 }; 00073 00074 #endif