00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 #ifndef _INDEX_HPP 00014 #define _INDEX_HPP 00015 00017 00055 // C. Zhai 02/08/2001 00056 00057 #include "TermInfoList.hpp" 00058 #include "DocInfoList.hpp" 00059 #include "DocumentManager.hpp" 00060 00061 class Index { 00062 public: 00063 00064 virtual ~Index() {}; 00065 00067 00068 00070 virtual bool open(const string &indexName)=0; 00072 00074 00075 00077 virtual int term (const string &word) const=0; 00078 00080 virtual const string term (int termID) const=0; 00081 00083 virtual int document (const string &docIDStr) const=0; 00084 00086 virtual const string document (int docID) const=0; 00087 00090 // virtual const char* docManager(int docID) { return NULL;} 00091 virtual const DocumentManager* docManager(int docID) const {return NULL;} 00092 00093 00095 00098 virtual const string termLexiconID() const { return "";} 00099 00101 00103 00104 00106 virtual int docCount () const=0; 00107 00109 virtual int termCountUnique () const=0; 00110 00112 virtual int termCount (int termID) const=0; 00113 00115 virtual int termCount () const=0; 00116 00118 virtual float docLengthAvg() const=0; 00119 00121 virtual int docCount(int termID) const=0; 00122 00124 virtual int docLength (int docID) const=0; 00125 00127 00129 00130 00131 virtual DocInfoList *docInfoList(int termID) const=0; 00132 00134 virtual TermInfoList *termInfoList(int docID) const=0; 00135 00137 00138 // returns TermInfoList is sequential representation (not bag of words) 00139 // return NULL list when sequence is not available. 00140 virtual TermInfoList *termInfoListSeq(int docID) const { return NULL; } 00141 00142 }; 00143 00144 00145 #endif 00146 00147 00148