00001 00002 #include "TextHandler.hpp" 00003 #include "InvPushIndex.hpp" 00004 #include "Parser.hpp" 00005 #include "WordSet.hpp" 00006 00007 #include <stdio.h> 00008 00009 #ifndef _DOCFREQINDEXER_HPP 00010 #define _DOCFREQINDEXER_HPP 00011 00012 00013 class DocFreqIndexer : public TextHandler { 00014 00015 public: 00016 DocFreqIndexer(char * csName, char * cwName, 00017 char * ssName, int bufferSize, bool countStopWords = false); 00018 ~DocFreqIndexer(); 00019 00020 char * handleDoc(char * docno); 00021 char * handleWord(char * word); 00022 void handleEndDoc(); 00023 00024 void newDb(char * name); 00025 00026 00027 00028 private: 00029 00030 int cw; 00031 int dfCount; 00032 bool first; 00033 00034 InvPushIndex * collsel; 00035 00036 DocumentProps * csdp; 00037 InvFPTerm * term; 00038 00039 WordSet docWords; 00040 00041 FILE * collWords; 00042 FILE * serverSizes; 00043 int numDocs; 00044 00045 bool countStopWds; 00046 00047 }; 00048 00049 #endif