Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvFPIndexMerge.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVFPINDEXMERGE_HPP
00014 #define _INVFPINDEXMERGE_HPP
00015 
00016 #include "common_headers.hpp"
00017 #include "InvFPDocList.hpp"
00018 #include "InvFPTypes.hpp"
00019 
00020 #define READBUFSIZE 2000000
00021 #define NUM_FH_OPEN 32
00022 
00023 struct IndexReader {
00024   InvFPDocList* list;
00025   ifstream* reader;
00026 };
00027 
00028 // this class could actually be static
00029 class InvFPIndexMerge {
00030 public:
00035   InvFPIndexMerge(char* buffer, long size, long maxfilesize=2100000000);
00036   InvFPIndexMerge(long buffersize=64000000, long maxfilesize=2100000000);
00037   ~InvFPIndexMerge();
00038 
00041   int merge(vector<char*>* tf, char* prefix);
00042 
00043   void setMaxFileSize(long size);
00044   char* setBuffer(char* buffer, long size);
00045 
00049   int hierMerge(vector<char*>* files, int level);
00050 
00052   int mergeFiles(vector<char*>* files, vector<char*>* intmed, int level);
00053 
00055   int finalMerge(vector<char*>* files);
00056 
00057 private:
00059   void writeInvFIDs();
00061   void least(vector<IndexReader*>* r, vector<int>* ret);
00063   void setbuf(ifstream* fs, char* bp, int bytes);
00064 
00065   char* name;
00066   vector<char*> invfiles; // list of files that we've written to
00067   long maxfile; // maximum file size for each index
00068   long bufsize;
00069   char* readbuffer;
00070 };
00071 
00072 #endif

Generated at Fri Jul 26 18:22:26 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000