#include <TextHandler.hpp>
Inheritance diagram for TextHandler:

Public Types | |
| enum | TokenType { BEGINDOC = 1, ENDDOC = 2, WORDTOK = 3, BEGINTAG = 4, ENDTAG = 5, SYMBOLTOK = 6 } |
Public Methods | |
| TextHandler () | |
| virtual | ~TextHandler () |
| virtual void | setTextHandler (TextHandler *th) |
| Set the TextHandler that this TextHandler will pass information on to. | |
| virtual TextHandler * | getTextHandler () |
| Set the TextHandler that this TextHandler will pass information on to. | |
| virtual void | foundToken (TokenType type, char *token=NULL, const char *orig=NULL, PropertyList *properties=NULL) |
| virtual char * | handleBeginDoc (char *docno, const char *original, PropertyList *list) |
| virtual char * | handleEndDoc (char *token, const char *original, PropertyList *list) |
| virtual char * | handleWord (char *word, const char *original, PropertyList *list) |
| virtual char * | handleBeginTag (char *tag, const char *original, PropertyList *list) |
| Handle a begin tag. | |
| virtual char * | handleEndTag (char *tag, const char *original, PropertyList *list) |
| Handle an end tag. | |
| virtual char * | handleSymbol (char *symbol, const char *original, PropertyList *list) |
| virtual void | foundDoc (char *docno) |
| Found a document with document number. | |
| virtual void | foundDoc (char *docno, const char *original) |
| virtual void | foundWord (char *word) |
| Found a word. | |
| virtual void | foundWord (char *word, const char *original) |
| virtual void | foundEndDoc () |
| Found end of doc. | |
| virtual void | foundSymbol (char *sym) |
| Found a word. | |
| virtual char * | handleDoc (char *docno) |
| Handle a doc. | |
| virtual char * | handleWord (char *word) |
| Handle a word, possibly transforming it. | |
| virtual void | handleEndDoc () |
| Handle the end of the doc. | |
| virtual char * | handleSymbol (char *sym) |
| Handle a word, possibly transforming it. | |
| virtual string | getCategory () |
| Return the category TextHandler this is. | |
| virtual string | getIdentifier () |
| Return a unique identifier for this TextHandler object. | |
Static Public Attributes | |
| const string | category = "TextHandler" |
| const string | identifier = "TextHandler" |
Protected Attributes | |
| TextHandler * | textHandler |
| The next textHandler in the chain. | |
| string | cat |
| string | iden |
| char | buffer [MAXWORDSIZE] |
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
|
Found a document with document number.
|
|
|
Found end of doc.
|
|
|
Found a word.
|
|
||||||||||||||||||||
|
|
|
||||||||||||
|
|
|
|
Found a word.
|
|
|
Return the category TextHandler this is.
|
|
|
Return a unique identifier for this TextHandler object.
|
|
|
Set the TextHandler that this TextHandler will pass information on to.
|
|
||||||||||||||||
|
Handle a doc begin - default implementation calls handleDoc for backwords compat |
|
||||||||||||||||
|
Handle a begin tag.
Reimplemented in IndriTextHandler, and ElemDocMgr. |
|
|
Handle a doc.
Reimplemented in DocFreqIndexer, FreqCounter, IndriTextHandler, InvFPTextHandler, KeyfileTextHandler, PropIndexTH, FlattextDocMgr, KeyfileDocMgr, WriterInQueryHandler, and WriterTextHandler. |
|
|
Handle the end of the doc.
Reimplemented in DocFreqIndexer, IndriTextHandler, FlattextDocMgr, and KeyfileDocMgr. |
|
||||||||||||||||
|
Handle a doc end - default implementation calls old handleEndDoc for backwords compat |
|
||||||||||||||||
|
Handle an end tag.
Reimplemented in IndriTextHandler, and ElemDocMgr. |
|
|
Handle a word, possibly transforming it.
Reimplemented in WriterInQueryHandler, StringQuery, and QueryDocument. |
|
||||||||||||||||
|
Handle a symbol - default implementation calls old handleSymbol for backwords compat |
|
|
Handle a word, possibly transforming it.
Reimplemented in CtfIndexer, DocFreqIndexer, FreqCounter, InvFPTextHandler, KeyfileTextHandler, QueryTextHandler, KeyfileDocMgr, Stemmer, Stopper, WriterInQueryHandler, WriterTextHandler, StringQuery, DocOffsetParser, and QueryDocument. |
|
||||||||||||||||
|
Handle a word - default implementation calls old handleWord for backwords compat Reimplemented in IndriTextHandler, PropIndexTH, and BrillPOSTokenizer. |
|
|
Set the TextHandler that this TextHandler will pass information on to.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Reimplemented in ArabicParser, ArabicStemmer, BrillPOSParser, ChineseCharParser, ChineseParser, IdentifinderParser, InqArabicParser, InQueryOpParser, KStemmer, Parser, PorterStemmer, ReutersParser, Stemmer, Stopper, TrecParser, and WebParser. |
|
|
The next textHandler in the chain.
|
1.2.18