00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 /* 00013 10/18/2002 -- dmf Add binReadC, binWriteC, deltaDecode, and deltEncode 00014 for compression of TermInfoLists. 00015 */ 00016 00017 #ifndef _INVFPTERMLIST_HPP 00018 #define _INVFPTERMLIST_HPP 00019 00020 #include "common_headers.hpp" 00021 #include "InvFPTerm.hpp" 00022 #include "InvFPTypes.hpp" 00023 #include "RVLCompress.hpp" 00024 00032 class InvFPTermList : public TermInfoList { 00033 public: 00034 InvFPTermList(); 00035 InvFPTermList(int did, int len, vector<LocatedTerm> &tls); 00036 ~InvFPTermList(); 00037 00039 void startIteration(); 00040 00042 bool hasMore(); 00043 00045 TermInfo *nextEntry(); 00046 00048 int docLength(){ return length; } 00049 00051 int termCount() { return listlen; } 00052 00054 int docID() { return uid; } 00055 00058 bool binRead(ifstream& infile); 00060 bool binReadC(ifstream& infile); 00062 void binWriteC(ofstream& ofile); 00063 00066 virtual void deltaDecode(); 00069 virtual void deltaEncode(); 00070 00072 void countTerms(); 00073 00074 protected: 00075 DOCID_T uid; // this doc's id 00076 int length; // length of this document (terms + stopwords) 00077 LocatedTerm* list; // list of terms and locations 00078 LLTerm* listcounted; // list of terms and location lists 00079 int listlen; // number of items we have in list (same as number of terms) 00080 int index; // index for iterator 00081 int* counts; // keep track of counts of terms for bag of word 00082 InvFPTerm entry; 00083 vector<int> loclist; //list of locations to return 00084 00085 }; 00086 00087 00088 00089 00090 #endif