00001 /*========================================================================== 00002 * Copyright (c) 2003 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #include "Parser.hpp" 00013 #include "TextHandler.hpp" 00014 #include "LinkedPropertyList.hpp" 00015 00016 #ifndef _IDENTIPARSER_HPP 00017 #define _IDENTIPARSER_HPP 00018 00036 // Source code in IdentifinderParser.l 00037 00038 00039 #define BEGIN_PREFIX "B_" 00040 #define END_PREFIX "E_" 00041 // for simplicity, make both prefixes the same length 00042 #define PREFIX_LEN 2 00043 00044 class IdentifinderParser : public Parser { 00045 00046 public: 00047 IdentifinderParser(); 00048 00050 void parseFile(char * filename); 00051 00052 void parseBuffer(char * buf, int len); 00053 00054 long fileTell(); 00055 00056 private: 00058 void doParse(); 00059 00061 int state; 00062 00064 int poscount; 00065 00067 Property wordpos; 00068 Property tag; // entity tag 00069 Property btag; // the begin tag 00070 Property etag; // the end tag 00071 00072 00074 LinkedPropertyList proplist; 00075 }; 00076 00077 #endif