Public Methods |
| | BasicIndex () |
| | constructor (used when opening an index)
|
| | BasicIndex (Compress *pc) |
| | constructor (used when building an index)
|
| virtual | ~BasicIndex () |
| virtual bool | open (const char *indexName) |
| | Open previously created Index, return true if opened successfully.
|
| void | build (DocStream *collectionStream, const char *file, const char *outputPrefix, int totalDocs=0x1000000, int maxMemory=0x4000000, int minimumCount=1, int maxVocSize=2000000) |
|
| virtual int | term (const char *word) |
| | Convert a term spelling to a termID.
|
| virtual const char * | term (int termID) |
| | Convert a termID to its spelling.
|
| virtual int | document (const char *docIDStr) |
| | Convert a spelling to docID.
|
| virtual const char * | document (int docID) |
| | Convert a docID to its spelling.
|
| virtual const char * | termLexiconID () |
| | return the term lexicon ID
|
|
| virtual int | docCount () |
| | Total count (i.e., number) of documents in collection.
|
| virtual int | termCountUnique () |
| | Total count of unique terms in collection.
|
| virtual int | termCount (int termID) const |
| | Total counts of a term in collection.
|
| virtual int | termCount () const |
| | Total counts of all terms in collection.
|
| virtual float | docLengthAvg () |
| | Average document length.
|
| virtual int | docCount (int termID) |
| | Total counts of doc with a given term.
|
| virtual int | docLength (int docID) const |
| | Total counts of terms in a document.
|
|
| virtual DocInfoList * | docInfoList (int termID) |
| | doc entries in a term index, caller should release the memory -
See also:
-
DocList
|
| virtual TermInfoList * | termInfoList (int docID) |
| | word entries in a document index, caller should release the memory -
See also:
-
TermList
|
Private Methods |
| void | buildVocabulary (int maxVocSize, int minimumCount) |
| void | writeWordIndex (int indexNum, FastList< IndexCount > *dlw) |
| int | indexCollection () |
| int | headDocIndex () |
| int | headWordIndex () |
| void | createKeys () |
| void | mergeIndexFiles () |
| void | createKey (const char *inName, const char *outName, Terms &voc, int *byteOffset) |
| int | mergePair (const char *fn1, const char *fn2, const char *fn3) |
| void | writeIndexFile () |
Private Attributes |
| ifstream | textStream |
| String | prefix |
| String | textFile |
| String | wordVocabulary |
| String | documentVocabulary |
| String | wordIndexFile |
| String | documentIndexFile |
| String | wordKeyFile |
| String | documentKeyFile |
| Terms | terms |
| Terms | docids |
| int | numDocuments |
| int | numWords |
| int | numBytes |
| int | maxDocumentLength |
| float | avgDocumentLength |
| int | totalDocuments |
| int | memorySegment |
| int | maxSegmentsPerIndex |
| time_t | timeToIndex |
| int | maximumMemory |
| MemList * | pMemList |
| Compress * | pCompressor |
| bool | deleteCompressor |
| DocStream * | pDocStream |
| ifstream | wordIndexStream |
| ifstream | documentIndexStream |
| int * | woffset |
| int * | doffset |
| int * | tmpdarr |
| int * | tmpwarr |
| int * | countOfTerm |
| int * | countOfDoc |