00001
00002 #include "TextHandler.hpp"
00003 #include "InvPushIndex.hpp"
00004 #include "Parser.hpp"
00005 #include "WordSet.hpp"
00006
00007 #include <stdio.h>
00008
00009 #ifndef _DOCFREQINDEXER_HPP
00010 #define _DOCFREQINDEXER_HPP
00011
00012
00013 class DocFreqIndexer : public TextHandler {
00014
00015 public:
00016 DocFreqIndexer(char * csName, char * cwName,
00017 char * ssName, int bufferSize, bool countStopWords = false);
00018 ~DocFreqIndexer();
00019
00020 char * handleDoc(char * docno, char * orig);
00021 char * handleWord(char * word, char * orig);
00022 void handleEndDoc();
00023
00024 void newDb(char * name);
00025
00026
00027
00028 private:
00029
00030 int cw;
00031 int dfCount;
00032 bool first;
00033
00034 InvPushIndex * collsel;
00035
00036 DocumentProps * csdp;
00037 InvFPTerm * term;
00038
00039 WordSet docWords;
00040
00041 FILE * collWords;
00042 FILE * serverSizes;
00043 int numDocs;
00044
00045 bool countStopWds;
00046
00047 };
00048
00049 #endif