Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvFPIndex.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVFPINDEX_HPP
00014 #define _INVFPINDEX_HPP
00015 
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "InvFPDocList.hpp"
00020 #include "InvFPTermList.hpp"
00021 #include "InvFPTypes.hpp"
00022 
00023 
00024 // for counts array
00025 #define UNIQUE_TERMS 0
00026 #define TOTAL_TERMS  1
00027 #define DOCS         2
00028 #define DT_FILES     3
00029 #define INV_FILES    4
00030 
00031 // for names array
00032 #define DOC_INDEX    0
00033 #define DOC_LOOKUP   1
00034 #define TERM_INDEX   2
00035 #define TERM_LOOKUP  3
00036 #define TERM_IDS     4
00037 #define DOC_IDS      5
00038 
00039 class InvFPIndex : public Index {
00040 public:
00041    InvFPIndex();
00042    InvFPIndex(const char* indexName);
00043   ~InvFPIndex(); 
00044 
00046 
00047 
00049   bool open(const char* indexName);
00051 
00053   bool openName(char* filename);
00054 
00056 
00057 
00059   int term(const char* word);
00060 
00062   const char* term(int termID);
00063 
00065   int document(const char* docIDStr);
00066 
00068   const char* document(int docID); 
00069 
00071 
00073 
00074 
00076   int docCount() { return counts[DOCS]; };
00077 
00079   int termCountUnique() { return counts[UNIQUE_TERMS]; };
00080 
00082   int termCount(int termID) const;
00083 
00085   int termCount() const { return counts[TOTAL_TERMS]; };
00086 
00088   float docLengthAvg();
00089 
00091   int docCount(int termID);
00092 
00094   int docLength(DOCID_T docID) const;
00095 
00097   int docLengthCounted(int docID);
00098 
00100 
00102 
00103 
00104   DocInfoList* docInfoList(int termID);
00105 
00107   TermInfoList* termInfoList(int docID);
00108 
00110   TermInfoList* termInfoListSeq(int docID);
00111 
00113 private:
00115   bool fullToc(const char* fileName);
00117   bool mainToc(char* fileName);
00119   bool indexLookup();
00121   bool invFileIDs();
00123   bool dtLookup();
00125   bool dtFileIDs();
00127   bool termIDs();
00129   bool docIDs();
00130 
00131   int* counts;    // array to hold all the overall count stats of this db
00132   char** names;   // array to hold all the names for files we need for this db
00133   float aveDocLen; // the average document length in this index
00134   inv_entry* lookup;  // the array holding entries (index is termid)
00135   dt_entry* dtlookup; // the array holding entries to dt index (index of array is docid)
00136   TERM_T* terms;   // array of the term spellings (index is termid)
00137   EXDOCID_T* docnames; // array of the external docids (index is docid)
00138   char** dtfiles; // array of dt index filenames
00139   char** invfiles; // array of inv index filenames
00140   map<TERM_T, TERMID_T, ltstr> termtable; // table of terms to termid
00141   map<EXDOCID_T, DOCID_T, ltstr> doctable; // table of exdocids to docid
00142 };
00143 
00144 #endif

Generated at Fri Jul 26 18:22:26 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000