00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 /* 00013 10/18/2002 -- dmf Add binReadC, binWriteC, deltaDecode, and deltEncode 00014 for compression of TermInfoLists. 00015 */ 00016 00017 #ifndef _INVFPTERMLIST_HPP 00018 #define _INVFPTERMLIST_HPP 00019 00020 #include "common_headers.hpp" 00021 #include "InvFPTerm.hpp" 00022 #include "InvFPTypes.hpp" 00023 #include "RVLCompress.hpp" 00024 00032 class File; 00033 00034 class InvFPTermList : public TermInfoList { 00035 public: 00036 InvFPTermList(); 00037 InvFPTermList(int did, int len, vector<LocatedTerm> &tls); 00038 ~InvFPTermList(); 00039 00041 void startIteration(); 00042 00044 bool hasMore(); 00045 00047 TermInfo *nextEntry(); 00048 00050 int docLength(){ return length; } 00051 00053 int termCount() { return listlen; } 00054 00056 int docID() { return uid; } 00057 00060 bool binRead(ifstream& infile); 00062 bool binReadC(ifstream& infile); 00064 void binWriteC(ofstream& ofile); 00065 00066 bool binReadC( File& infile ); 00067 void binWriteC( File& outfile ); 00068 00071 virtual void deltaDecode(); 00074 virtual void deltaEncode(); 00075 00077 void countTerms(); 00078 00079 protected: 00080 DOCID_T uid; // this doc's id 00081 int length; // length of this document (terms + stopwords) 00082 LocatedTerm* list; // list of terms and locations 00083 LLTerm* listcounted; // list of terms and location lists 00084 int listlen; // number of items we have in list (same as number of terms) 00085 int index; // index for iterator 00086 int* counts; // keep track of counts of terms for bag of word 00087 InvFPTerm entry; 00088 vector<int> loclist; //list of locations to return 00089 00090 }; 00091 00092 00093 00094 00095 #endif