00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015
00016
00017
00018
00019
00020
00021 #include <cmath>
00022 #include "common_headers.hpp"
00023 #include "DocInfoList.hpp"
00024 #include "InvDocInfo.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031
00032 #define DEFAULT 9
00033
00034 class InvDocList: public DocInfoList {
00035 public:
00036 InvDocList();
00037
00040 InvDocList(int id, int len);
00042 InvDocList(MemCache* mc, int id, int len);
00043 InvDocList(MemCache* mc, int id, int len, int docid, int location);
00045 InvDocList(int id, int listlen, int* list, int fr, int* ldocid, int len);
00046 ~InvDocList();
00047
00053 void setList(int id, int listlen, int* list, int fr, int* ldocid=NULL, int len=0);
00054
00058 void setListSafe(int id, int listlen, int* list, int fr, int* ldocid, int len);
00059
00063 void reset();
00064
00067 void resetFree();
00068
00069 bool allocMem();
00070 bool hasNoMem();
00071
00073 virtual bool addTerm(int docid);
00074
00076 virtual bool append(InvDocList* tail);
00077
00078 virtual void startIteration();
00079 virtual bool hasMore();
00080 virtual DocInfo* nextEntry();
00081 virtual void nextEntry(InvDocInfo* info);
00082
00083 DOCID_T curDocID() { if (lastid == NULL) return -1; return *lastid; };
00084 int docFreq() { return df; };
00085 int length() { return end-begin; };
00086 int termID() { return uid; };
00087 int termLen() { return strlength; };
00088 virtual int termCTF();
00089 int curDocIDdiff() { return lastid-begin; };
00090 int curDocIDtf() { return *(lastid+1); };
00091
00093 void binWrite(ofstream& of);
00094
00096 bool binRead(ifstream& inf);
00097
00099 void binWriteC(ofstream& of);
00100
00102 bool binReadC(ifstream& inf);
00103
00104 protected:
00108 bool getMoreMem();
00109 int logb2(int num);
00110
00113 virtual void deltaEncode();
00114
00117 virtual void deltaDecode();
00118
00119 int* begin;
00120 int* lastid;
00121 int* freq;
00122 int* end;
00123 int* iter;
00124 int size;
00125 int intsize;
00126 int strlength;
00127 TERMID_T uid;
00128 int df;
00129 MemCache* cache;
00130 bool hascache;
00131
00132 bool READ_ONLY;
00133 private:
00134 InvDocInfo entry;
00135 };
00136
00137 #endif