Public Methods |
| BasicIndex () |
| constructor (used when opening an index)
|
| BasicIndex (Compress *pc) |
| constructor (used when building an index)
|
virtual | ~BasicIndex () |
virtual bool | open (const char *indexName) |
| Open previously created Index, return true if opened successfully.
|
void | build (DocStream *collectionStream, const char *file, const char *outputPrefix, int totalDocs=0x1000000, int maxMemory=0x4000000, int minimumCount=1, int maxVocSize=2000000) |
|
virtual int | term (const char *word) |
| Convert a term spelling to a termID.
|
virtual const char * | term (int termID) |
| Convert a termID to its spelling.
|
virtual int | document (const char *docIDStr) |
| Convert a spelling to docID.
|
virtual const char * | document (int docID) |
| Convert a docID to its spelling.
|
virtual const char * | termLexiconID () |
| return the term lexicon ID
|
|
virtual int | docCount () |
| Total count (i.e., number) of documents in collection.
|
virtual int | termCountUnique () |
| Total count of unique terms in collection.
|
virtual int | termCount (int termID) const |
| Total counts of a term in collection.
|
virtual int | termCount () const |
| Total counts of all terms in collection.
|
virtual float | docLengthAvg () |
| Average document length.
|
virtual int | docCount (int termID) |
| Total counts of doc with a given term.
|
virtual int | docLength (int docID) const |
| Total counts of terms in a document.
|
|
virtual DocInfoList * | docInfoList (int termID) |
| doc entries in a term index, caller should release the memory -
See also:
-
DocList
|
virtual TermInfoList * | termInfoList (int docID) |
| word entries in a document index, caller should release the memory -
See also:
-
TermList
|
Private Methods |
void | buildVocabulary (int maxVocSize, int minimumCount) |
void | writeWordIndex (int indexNum, FastList< IndexCount > *dlw) |
int | indexCollection () |
int | headDocIndex () |
int | headWordIndex () |
void | createKeys () |
void | mergeIndexFiles () |
void | createKey (const char *inName, const char *outName, Terms &voc, int *byteOffset) |
int | mergePair (const char *fn1, const char *fn2, const char *fn3) |
void | writeIndexFile () |
Private Attributes |
ifstream | textStream |
String | prefix |
String | textFile |
String | wordVocabulary |
String | documentVocabulary |
String | wordIndexFile |
String | documentIndexFile |
String | wordKeyFile |
String | documentKeyFile |
Terms | terms |
Terms | docids |
int | numDocuments |
int | numWords |
int | numBytes |
int | maxDocumentLength |
float | avgDocumentLength |
int | totalDocuments |
int | memorySegment |
int | maxSegmentsPerIndex |
time_t | timeToIndex |
int | maximumMemory |
MemList * | pMemList |
Compress * | pCompressor |
bool | deleteCompressor |
DocStream * | pDocStream |
ifstream | wordIndexStream |
ifstream | documentIndexStream |
int * | woffset |
int * | doffset |
int * | tmpdarr |
int * | tmpwarr |
int * | countOfTerm |
int * | countOfDoc |