#include <Index.hpp>
Inheritance diagram for Index:
Public Methods | |
virtual | ~Index () |
virtual TermInfoList * | termInfoListSeq (DOCID_T docID) const |
Open index | |
virtual bool | open (const string &indexName)=0 |
Open previously created Index, return true if opened successfully, indexName should be the full name of the table-of-content file for the index. E.g., "index.bsc" for an index built with the basic indexer. | |
Spelling and index conversion | |
virtual TERMID_T | term (const TERM_T &word) const=0 |
Convert a term spelling to a termID, returns 0 if out of vocabulary. Valid index starts at 1. | |
virtual const TERM_T | term (TERMID_T termID) const=0 |
Convert a valid termID to its spelling. | |
virtual DOCID_T | document (const EXDOCID_T &docIDStr) const=0 |
Convert a spelling to docID, returns 0 if out of vocabulary. Valid index starts at 1. | |
virtual const EXDOCID_T | document (DOCID_T docID) const=0 |
Convert a valid docID to its spelling. | |
virtual const DocumentManager * | docManager (DOCID_T docID) const |
virtual const string | termLexiconID () const |
Return a string ID for the term lexicon (usually the file name of the lexicon). | |
Summary counts | |
virtual COUNT_T | docCount () const=0 |
Total count (i.e., number) of documents in collection. | |
virtual COUNT_T | termCountUnique () const=0 |
Total count of unique terms in collection, i.e., the term vocabulary size. | |
virtual COUNT_T | termCount (TERMID_T termID) const=0 |
Total counts of a term in collection. | |
virtual COUNT_T | termCount () const=0 |
Total counts of all terms in collection. | |
virtual float | docLengthAvg () const=0 |
Average document length. | |
virtual COUNT_T | docCount (TERMID_T termID) const=0 |
Total counts of doc with a given term. | |
virtual COUNT_T | docLength (DOCID_T docID) const=0 |
Total counts of terms in a document. | |
Index entry access | |
virtual DocInfoList * | docInfoList (TERMID_T termID) const=0 |
returns a new instance of DocInfoList which represents the doc entries in a term index, you must delete the instance later.
| |
virtual TermInfoList * | termInfoList (DOCID_T docID) const=0 |
returns a new instance of TermInfoList which represents the word entries in a document index, you must delete the instance later.
|
This is an abstract class that provides a uniform interface for access to an indexed document collection. The following is an example of using it.
Index &myIndex; myIndex.open("index-file"); int t1; ... // now fetch doc info list for term t1 // this returns a dynamic instance, so you'll need to delete it DocInfoList *docList = myIndex.docInfoList(t1); docList->startIteration(); DocInfo *entry; while (docList->hasMore()) { entry = docList->nextEntry(); // this returns a pointer to a *static* memory, do don't delete entry! cout << "entry doc id: "<< entry->docID() <<endl; cout << "entry term count: "<< entry->termCount() << endl; } delete docList;
|
|
|
Total counts of doc with a given term.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Total count (i.e., number) of documents in collection.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
returns a new instance of DocInfoList which represents the doc entries in a term index, you must delete the instance later.
Implemented in BasicIndexWithCat, InvFPIndex, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Total counts of terms in a document.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Average document length.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
A String identifier for the document manager to get at the source of the document with this document id Reimplemented in InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Convert a valid docID to its spelling.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Convert a spelling to docID, returns 0 if out of vocabulary. Valid index starts at 1.
Implemented in InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Open previously created Index, return true if opened successfully,
Implemented in BasicIndexWithCat, InvIndex, and KeyfileIncIndex. |
|
Convert a valid termID to its spelling.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Convert a term spelling to a termID, returns 0 if out of vocabulary. Valid index starts at 1.
Implemented in InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Total counts of all terms in collection.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Total counts of a term in collection.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Total count of unique terms in collection, i.e., the term vocabulary size.
Implemented in BasicIndexWithCat, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
returns a new instance of TermInfoList which represents the word entries in a document index, you must delete the instance later.
Implemented in BasicIndexWithCat, InvFPIndex, InvIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Reimplemented in InvFPIndex, KeyfileIncIndex, and LemurIndriIndex. |
|
Return a string ID for the term lexicon (usually the file name of the lexicon). This function should be pure virtual; the default implementation is just for convenience. Appropriate implementation to be done in the future. |