Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvFPTypes.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 /* type definitions for objects we will use */
00014 #ifndef _INVFPTYPES_H
00015 #define _INVFPTYPES_H
00016 
00017 #define IND_VERSION "3.0"
00018 
00019 #include "common_headers.hpp"
00020 
00021 typedef int   FILEID_T;
00022 typedef int   TERMID_T;
00023 typedef TERMID_T LOC_T;  // for simplifying implementation of memory management, it's helpful for termid and location to be of the same type.
00024 typedef TERMID_T DOCID_T;
00025 typedef float SCORE_T;
00026 typedef string TERM_T;
00027 typedef string EXDOCID_T;
00028 typedef void* POS_T;  // Used by DocLists and TermLists
00029 
00030 // suffixes for filenames
00031 #define INVINDEX  ".invf"
00032 #define INVFPINDEX ".invfp"
00033 #define INVLOOKUP  ".invlookup"
00034 #define DTINDEX  ".dt"
00035 #define DTLOOKUP  ".dtlookup"
00036 #define TERMIDMAP  ".tid"
00037 #define TERMIDSTRMAP ".tidstr"
00038 #define DOCIDMAP  ".did"
00039 #define DOCIDSTRMAP ".didstr"
00040 #define MAINTOC  ".inv"
00041 #define INVFPTOC ".ifp"
00042 #define DOCMGRMAP ".dm"
00043 
00044 // what to call out of vocabulary ids
00045 #define INVALID_STR "[OOV]"
00046 
00047 // name for parameters
00048 #define VERSION_PAR "VERSION"
00049 #define NUMDOCS_PAR "NUM_DOCS"
00050 #define NUMTERMS_PAR "NUM_TERMS"
00051 #define NUMUTERMS_PAR "NUM_UNIQUE_TERMS"
00052 #define AVEDOCLEN_PAR "AVE_DOCLEN"
00053 #define INVINDEX_PAR  "INV_INDEX"
00054 #define INVLOOKUP_PAR  "INV_LOOKUP"
00055 #define DTINDEX_PAR  "DT_INDEX"
00056 #define DTLOOKUP_PAR  "DT_LOOKUP"
00057 #define TERMIDMAP_PAR  "TERMIDS"
00058 #define TERMIDSTRMAP_PAR "TERMIDSTRS"
00059 #define DOCIDMAP_PAR  "DOCIDS"
00060 #define DOCIDSTRMAP_PAR "DOCIDSTRS"
00061 #define NUMDT_PAR  "NUM_DTFILES"
00062 #define NUMINV_PAR  "NUM_INVFILES"
00063 #define DOCMGR_PAR  "DOCMGR_IDS"
00064 
00065 struct LocatedTerm { // pair of term and its location
00066   TERMID_T term;
00067   LOC_T loc;
00068 };
00069 
00070 struct LLTerm { // pair of term and list of locations
00071   TERMID_T term;
00072   vector<LOC_T> loc;
00073 };
00074 
00075 struct dt_entry {   // an entry in the lookup table for docterm lists index
00076   FILEID_T fileid;  // which file the word is in
00077   long offset;        // what the offset into the file is
00078   int length;         // the length of the inverted list
00079   int docmgr;         // the docmgr id of manager for this doc
00080 };
00081 
00082 struct inv_entry {   // an entry in the lookup table for docterm lists index
00083   FILEID_T fileid;  // which file the word is in
00084   long offset;        // what the offset into the file is
00085   int ctf;            // collection term freq
00086   int df;             // doc freq
00087 };
00088 
00089 struct ltstr
00090 {
00091   bool operator()(char* s1, char* s2) const{
00092     return strcmp(s1, s2) < 0;
00093   }
00094 };
00095 
00096 #endif

Generated on Fri Jul 2 16:25:36 2004 for Lemur Toolkit by doxygen1.2.18