Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

IndriTextHandler.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2000-2004 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software (and below), and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _INDRITEXTHANDLER_HPP
00013 #define _INDRITEXTHANDLER_HPP
00014 
00015 #include "Parser.hpp"
00016 #include "indri/ParsedDocument.hpp"
00017 #include "indri/IndexEnvironment.hpp"
00018 
00024 #define DOCIDKEY "docno"
00025 
00026 class IndriTextHandler : public TextHandler {
00027 
00028 public:
00031   IndriTextHandler(const string &name, int memory, const Parser* p);
00032   ~IndriTextHandler();
00033 
00035   char * handleDoc(char * docno);
00037   void handleEndDoc();
00039   char * handleWord(char * word, const char* original, PropertyList* list);
00040   char * handleBeginTag(char* tag, const char* orig, PropertyList* props);
00041   char * handleEndTag(char* tag, const char* orig, PropertyList* props);
00042 
00043 protected:
00045   IndexEnvironment env;
00047   ParsedDocument document;
00049   MetadataPair docid;
00051   char* curdocno;
00053   char* docsource;
00055   int bufsize;
00057   int docbegin;
00059   const Parser* parser;
00061 };
00062 
00063 #endif
00064 

Generated on Wed Nov 3 12:58:58 2004 for Lemur Toolkit by doxygen1.2.18