00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <cstring>
00013 #include "common_headers.hpp"
00014
00015 #ifndef NULL
00016 #define NULL 0
00017 #endif
00018
00019 #ifndef _TEXTHANDLER_HPP
00020 #define _TEXTHANDLER_HPP
00021
00022 #include "PropertyList.hpp"
00023
00024
00025 #define MAXWORDSIZE 1024
00026
00045
00046
00052
00053
00054
00055 #include <cstdio>
00056
00057 class TextHandler {
00058
00059 public:
00060 enum TokenType {BEGINDOC = 1, ENDDOC = 2, WORD = 3,
00061 BEGINTAG = 4, ENDTAG = 5, SYMBOL = 6};
00062
00063 TextHandler() {
00064 textHandler = NULL;
00065 buffer[MAXWORDSIZE-1] = '\0';
00066 }
00067 virtual ~TextHandler() {}
00068
00070 virtual void setTextHandler(TextHandler * th) {
00071 textHandler = th;
00072 }
00074 virtual TextHandler * getTextHandler() {
00075 return textHandler;
00076 }
00077
00078 virtual void foundToken(int type,
00079 char * token = NULL,
00080 char * orig = NULL,
00081 PropertyList * properties = NULL) {
00082 char * t = NULL;
00083
00084 if (token != NULL) {
00085 strncpy(buffer, token, MAXWORDSIZE - 1);
00086 t = buffer;
00087 }
00088
00089 switch (type) {
00090
00091 case BEGINDOC:
00092 t = handleBeginDoc(t, orig, properties);
00093 break;
00094 case ENDDOC:
00095 t = handleEndDoc(t, orig, properties);
00096 break;
00097 case WORD:
00098 t = handleWord(t, orig, properties);
00099 break;
00100 case BEGINTAG:
00101 t = handleBeginTag(t, orig, properties);
00102 break;
00103 case ENDTAG:
00104 t = handleEndTag(t, orig, properties);
00105 break;
00106 case SYMBOL:
00107 t = handleSymbol(t, orig, properties);
00108 break;
00109 }
00110
00111 if (textHandler != NULL) {
00112 textHandler->foundToken(type, t, orig, properties);
00113 }
00114 }
00115
00118 virtual char * handleBeginDoc(char * docno, char * original,
00119 PropertyList * list) {
00120 return handleDoc(docno);
00121 }
00124 virtual char * handleEndDoc(char * token, char * original,
00125 PropertyList * list) {
00126 handleEndDoc();
00127 return token;
00128 }
00131 virtual char * handleWord(char * word, char * original,
00132 PropertyList * list) {
00133 return handleWord(word);
00134 }
00136 virtual char * handleBeginTag(char * tag, char * original,
00137 PropertyList * list) {
00138 return tag;
00139 }
00141 virtual char * handleEndTag(char * tag, char * original,
00142 PropertyList * list) {
00143 return tag;
00144 }
00145
00148 virtual char * handleSymbol(char * symbol, char * original,
00149 PropertyList * list) {
00150 return handleSymbol(symbol);
00151 }
00152
00153
00154
00155
00157
00158 foundToken(BEGINDOC, docno, docno);
00159 }
00160 virtual void foundDoc(char * docno, char * original) {
00161 foundToken(BEGINDOC, docno, original);
00162 }
00164 virtual void foundWord(char * word) {
00165 foundToken(WORD, word, word);
00166 }
00167 virtual void foundWord(char * word, char * original) {
00168 foundToken(WORD, word, original);
00169 }
00171 virtual void foundEndDoc() {
00172 foundToken(ENDDOC);
00173 }
00175 virtual void foundSymbol(char * sym) {
00176 foundToken(SYMBOL, sym, sym);
00177 }
00178
00180
00182
00184
00186
00187
00188 protected:
00190 TextHandler * textHandler;
00191
00192 char buffer[MAXWORDSIZE];
00193 };
00194
00195 #endif
00196