Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

IndriIndex.cpp File Reference

#include "indri/IndriIndex.hpp"
#include "indri/DocListDiskBuilder.hpp"
#include "indri/DocListMemoryBuilder.hpp"
#include "ReadBuffer.hpp"
#include "WriteBuffer.hpp"
#include "RVLCompress.hpp"
#include "indri/IndriTermInfoList.hpp"
#include <assert.h>
#include <functional>
#include <algorithm>
#include <sstream>
#include <fstream>
#include <iostream>
#include <time.h>
#include <indri/greedy_vector>
#include <indri/count_iterator>
#include "indri/XMLNode.hpp"
#include "indri/XMLWriter.hpp"
#include "indri/XMLReader.hpp"
#include "indri/delete_range.hpp"
#include "indri/RVLCompressStream.hpp"
#include "indri/RVLDecompressStream.hpp"
#include "indri/ParsedDocument.hpp"
#include <math.h>

Compounds

class  modified_sort

Defines

#define TERMDATA   ".termdata"
#define DOCINDEX   ".docs"
#define DOCSTATS   ".docstats"
#define DOCLENGTHS   ".doclengths"
#define DOCSTATSBATCH   ".docstatsbatch"
#define DOCINDEXBATCH   ".docsbatch"
#define TERMIDMAP   ".tid"
#define TERMIDSTRMAP   ".tidstr"
#define DOCIDMAP   ".did"
#define DOCIDSTRMAP   ".didstr"
#define IVLINDEX   ".ivl"
#define INVALID_STR   "[OOV]"
#define EXTENSION   ".ind"
#define INDRI_MAX_BATCH_MERGE_SEGMENTS   (384)
#define INDRI_MERGE_READBUFFER_SIZE   (1024*1024)
#define INDRI_MERGE_TERMLISTBUFFER_SIZE   (1024*1024)
#define INDRI_BASE_MEMORY_USAGE   (5*1024*1024)
#define INDRI_WRITEBUFFER_SIZE   (1024*1024)
#define INDRI_DOCLISTREADER_SIZE   (1024*1024)
#define INDRI_MINIMUM_LOOKUP_BUFFER_SIZE   (128*1024)
#define INDRI_MINIMUM_SCRATCH_SPACE   (512*1024)
#define INDRI_EPSILON_FLUSH_POSTINGS   (512*1024)
#define INDRI_FIELD_BUFFERSIZE   (1024*1024)
#define INDRI_TERMDATA_PROPORTION   (0.05)
#define INDRI_TERMCACHE_PROPORTION   (0.10)
#define INDRI_LISTS_PROPORTION   (0.85)

Functions

void indriindex_fetch_modified (std::vector< int > &modified, HashTable< int, indri::index::TermData * > &table, bool alphaSort=false)
void indriindex_merge_termdata (indri::index::TermData *master, indri::index::TermData *sub, int fields)
void indriindex_get_smallest (greedy_vector< int > &smallest, std::vector< char * > &terms)
void indriindex_close_merge_files (std::vector< File * > &listFiles, std::vector< File * > &mappingFiles, std::vector< File * > &statsFiles, std::vector< WriteBuffer * > &mappingBuffers, std::vector< ReadBuffer * > &statsBuffers, std::vector< indri::index::DocListFileIterator * > &listIterators)
void indriindex_read_mapping (const std::string &mappingName, greedy_vector< int > &mapping, bool del)

Define Documentation

#define DOCIDMAP   ".did"
 

#define DOCIDSTRMAP   ".didstr"
 

#define DOCINDEX   ".docs"
 

#define DOCINDEXBATCH   ".docsbatch"
 

#define DOCLENGTHS   ".doclengths"
 

#define DOCSTATS   ".docstats"
 

#define DOCSTATSBATCH   ".docstatsbatch"
 

#define EXTENSION   ".ind"
 

#define INDRI_BASE_MEMORY_USAGE   (5*1024*1024)
 

#define INDRI_DOCLISTREADER_SIZE   (1024*1024)
 

#define INDRI_EPSILON_FLUSH_POSTINGS   (512*1024)
 

#define INDRI_FIELD_BUFFERSIZE   (1024*1024)
 

#define INDRI_LISTS_PROPORTION   (0.85)
 

#define INDRI_MAX_BATCH_MERGE_SEGMENTS   (384)
 

#define INDRI_MERGE_READBUFFER_SIZE   (1024*1024)
 

#define INDRI_MERGE_TERMLISTBUFFER_SIZE   (1024*1024)
 

#define INDRI_MINIMUM_LOOKUP_BUFFER_SIZE   (128*1024)
 

#define INDRI_MINIMUM_SCRATCH_SPACE   (512*1024)
 

#define INDRI_TERMCACHE_PROPORTION   (0.10)
 

#define INDRI_TERMDATA_PROPORTION   (0.05)
 

#define INDRI_WRITEBUFFER_SIZE   (1024*1024)
 

#define INVALID_STR   "[OOV]"
 

#define IVLINDEX   ".ivl"
 

#define TERMDATA   ".termdata"
 

#define TERMIDMAP   ".tid"
 

#define TERMIDSTRMAP   ".tidstr"
 


Function Documentation

void indriindex_close_merge_files std::vector< File * > &    listFiles,
std::vector< File * > &    mappingFiles,
std::vector< File * > &    statsFiles,
std::vector< WriteBuffer * > &    mappingBuffers,
std::vector< ReadBuffer * > &    statsBuffers,
std::vector< indri::index::DocListFileIterator * > &    listIterators
[static]
 

void indriindex_fetch_modified std::vector< int > &    modified,
HashTable< int, indri::index::TermData * > &    table,
bool    alphaSort = false
 

void indriindex_get_smallest greedy_vector< int > &    smallest,
std::vector< char * > &    terms
 

void indriindex_merge_termdata indri::index::TermData   master,
indri::index::TermData   sub,
int    fields
 

void indriindex_read_mapping const std::string &    mappingName,
greedy_vector< int > &    mapping,
bool    del
 


Generated on Wed Nov 3 12:59:13 2004 for Lemur Toolkit by doxygen1.2.18