Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

DocFreqIndexer.hpp

Go to the documentation of this file.
00001 
00002 #include "TextHandler.hpp"
00003 #include "InvPushIndex.hpp"
00004 #include "Parser.hpp"
00005 #include "WordSet.hpp"
00006 
00007 #include <stdio.h>
00008 
00009 #ifndef _DOCFREQINDEXER_HPP
00010 #define _DOCFREQINDEXER_HPP
00011 
00012 
00013 class DocFreqIndexer : public TextHandler {
00014 
00015 public:
00016   DocFreqIndexer(char * csName, char * cwName, 
00017                  char * ssName, int bufferSize, bool countStopWords = false);
00018   ~DocFreqIndexer();
00019 
00020   char * handleDoc(char * docno);
00021   char * handleWord(char * word);
00022   void handleEndDoc();
00023 
00024   void newDb(char * name);
00025 
00026 
00027 
00028 private:
00029   
00030   int cw;
00031   int dfCount;
00032   bool first;
00033 
00034   InvPushIndex * collsel;
00035 
00036   DocumentProps * csdp;
00037   InvFPTerm * term;
00038 
00039   WordSet docWords;
00040 
00041   FILE * collWords;
00042   FILE * serverSizes;
00043   int numDocs;
00044   
00045   bool countStopWds;
00046 
00047 };
00048 
00049 #endif

Generated on Fri Feb 6 07:11:46 2004 for LEMUR by doxygen1.2.16