Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

TextHandler.hpp

Go to the documentation of this file.
00001 
00002 /*==========================================================================
00003  *
00004  *  Original source copyright (c) 2001, Carnegie Mellon University.
00005  *  See copyright.cmu for details.
00006  *  Modifications copyright (c) 2002, University of Massachusetts.
00007  *  See copyright.umass for details.
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #include <cstring>
00013 #include "common_headers.hpp"
00014 
00015 #ifndef NULL
00016 #define NULL 0
00017 #endif
00018 
00019 #ifndef _TEXTHANDLER_HPP
00020 #define _TEXTHANDLER_HPP
00021 
00022 #include "PropertyList.hpp"
00023 
00024 
00025 #define MAXWORDSIZE 1024
00026 
00045 
00046 
00052 
00053 //  Might make more sense as TextSource and TextDestination with
00054 //  functions in the middle of the chain inheriting from both.
00055 #include <cstdio>
00056 
00057 class TextHandler {
00058 
00059 public:
00060   enum TokenType {BEGINDOC = 1, ENDDOC = 2, WORD = 3, 
00061                   BEGINTAG = 4, ENDTAG = 5, SYMBOL = 6};
00062 
00063   TextHandler() {
00064     textHandler = NULL;
00065     buffer[MAXWORDSIZE-1] = '\0';
00066   }
00067   virtual ~TextHandler() {}
00068   
00070   virtual void setTextHandler(TextHandler * th) {
00071     textHandler = th;
00072   }
00074   virtual TextHandler * getTextHandler() {
00075     return textHandler;
00076   }
00077 
00078   virtual void foundToken(int type, 
00079                           char * token = NULL, 
00080                           char * orig = NULL,
00081                           PropertyList * properties = NULL) {
00082     char * t = NULL;
00083 
00084     if (token != NULL) {
00085       strncpy(buffer, token, MAXWORDSIZE - 1);
00086       t = buffer;
00087     } 
00088 
00089     switch (type) {
00090 
00091     case BEGINDOC:
00092       t = handleBeginDoc(t, orig, properties);
00093       break;
00094     case ENDDOC:
00095       t = handleEndDoc(t, orig, properties);
00096       break;
00097     case WORD:
00098       t = handleWord(t, orig, properties);
00099       break;
00100     case BEGINTAG:
00101       t = handleBeginTag(t, orig, properties);
00102       break;
00103     case ENDTAG:
00104       t = handleEndTag(t, orig, properties);
00105       break;            
00106     case SYMBOL:
00107       t = handleSymbol(t, orig, properties);
00108       break;            
00109     }
00110 
00111     if (textHandler != NULL) {
00112       textHandler->foundToken(type, t, orig, properties);
00113     }
00114   }
00115 
00118   virtual char * handleBeginDoc(char * docno, char * original,
00119                                 PropertyList * list) {
00120     return handleDoc(docno);
00121   }
00124   virtual char * handleEndDoc(char * token, char * original,
00125                               PropertyList * list) {
00126     handleEndDoc();
00127     return token;
00128   }
00131   virtual char * handleWord(char * word, char * original,
00132                             PropertyList * list) {
00133     return handleWord(word);
00134   }
00136   virtual char * handleBeginTag(char * tag, char * original,
00137                                 PropertyList * list) {
00138     return tag;
00139   }
00141   virtual char * handleEndTag(char * tag, char * original,
00142                               PropertyList * list) {
00143     return tag;
00144   }
00145 
00148   virtual char * handleSymbol(char * symbol, char * original,
00149                               PropertyList * list) {
00150     return handleSymbol(symbol);
00151   }
00152 
00153 
00154 
00155   // For backwards compatability
00157   virtual void foundDoc(char * docno) {
00158     foundToken(BEGINDOC, docno, docno);
00159   }
00160   virtual void foundDoc(char * docno, char * original) {
00161     foundToken(BEGINDOC, docno, original);
00162   }
00164   virtual void foundWord(char * word) {
00165     foundToken(WORD, word, word);
00166   }
00167   virtual void foundWord(char * word, char * original) {
00168     foundToken(WORD, word, original);
00169   }
00171   virtual void foundEndDoc() {
00172     foundToken(ENDDOC);
00173   }
00175   virtual void foundSymbol(char * sym) {
00176     foundToken(SYMBOL, sym, sym);
00177   }  
00178   // Kept for backwords compatability
00180   virtual char * handleDoc(char * docno) { return docno; }
00182   virtual char * handleWord(char * word) { return word; }
00184   virtual void handleEndDoc() { }
00186   virtual char * handleSymbol(char * sym) { return sym; }
00187 
00188 protected:
00190   TextHandler * textHandler;
00191 
00192   char buffer[MAXWORDSIZE];
00193 };
00194 
00195 #endif
00196 

Generated on Tue Nov 25 11:26:46 2003 for Lemur Toolkit by doxygen1.2.18