00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015
00016
00017
00018
00019
00020
00021 #include <cmath>
00022 #include "common_headers.hpp"
00023 #include "DocInfoList.hpp"
00024 #include "InvDocInfo.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031
00032 #define DEFAULT 9
00033
00034 class InvDocList: public DocInfoList {
00035 public:
00036 InvDocList();
00037
00040 InvDocList(int id, int len);
00042 InvDocList(MemCache* mc, int id, int len);
00043 InvDocList(MemCache* mc, int id, int len, int docid, int location);
00045 InvDocList(int id, int listlen, int* list, int fr, int* ldocid, int len);
00046 ~InvDocList();
00047
00053 void setList(int id, int listlen, int* list, int fr, int* ldocid=NULL, int len=0);
00054
00058 void setListSafe(int id, int listlen, int* list, int fr, int* ldocid, int len);
00059
00063 void reset();
00064
00067 void resetFree();
00068
00069 bool allocMem();
00070 bool hasNoMem();
00071
00073 virtual bool addTerm(int docid);
00074
00076 virtual bool append(InvDocList* tail);
00077
00078 virtual void startIteration();
00079 virtual bool hasMore();
00080 virtual DocInfo* nextEntry();
00081 virtual void nextEntry(InvDocInfo* info);
00082
00083 DOCID_T curDocID() { if (lastid == NULL) return -1; return *lastid; };
00084 int docFreq() { return df; };
00085 int length() { return end-begin; };
00086 int termID() { return uid; };
00087 int termLen() { return strlength; };
00088 virtual int termCTF();
00089 int curDocIDdiff() { return lastid-begin; };
00090 int curDocIDtf() { return *(lastid+1); };
00091 int memorySize() { return size; };
00092
00094 void binWrite(ofstream& of);
00095
00097 bool binRead(ifstream& inf);
00098
00100 void binWriteC(ofstream& of);
00101
00103 bool binReadC(ifstream& inf);
00104
00105 protected:
00109 bool getMoreMem();
00110 int logb2(int num);
00111
00114 virtual void deltaEncode();
00115
00118 virtual void deltaDecode();
00119
00120 int* begin;
00121 int* lastid;
00122 int* freq;
00123 int* end;
00124 int* iter;
00125 int size;
00126 int intsize;
00127 int strlength;
00128 TERMID_T uid;
00129 int df;
00130 MemCache* cache;
00131 bool hascache;
00132
00133 bool READ_ONLY;
00134 private:
00135 InvDocInfo entry;
00136 };
00137
00138 #endif