00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015
00016
00017
00018
00019
00020
00021 #include <cmath>
00022 #include "common_headers.hpp"
00023 #include "DocInfoList.hpp"
00024 #include "InvDocInfo.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031
00032 #define DEFAULT 9
00033
00034 class InvDocList: public DocInfoList {
00035 public:
00036 InvDocList();
00037
00040 InvDocList(int id, int len);
00042 InvDocList(MemCache* mc, int id, int len);
00043 InvDocList(MemCache* mc, int id, int len, int docid, int location);
00045 InvDocList(int id, int listlen, int* list, int fr, int* ldocid, int len);
00046 ~InvDocList();
00047
00053 void setList(int id, int listlen, int* list, int fr, int* ldocid=NULL, int len=0);
00054
00058 void setListSafe(int id, int listlen, int* list, int fr, int* ldocid, int len);
00059
00063 void reset();
00064
00067 void resetFree();
00068
00069 bool allocMem();
00070 bool hasNoMem();
00071
00073 virtual bool addTerm(int docid);
00074
00076 virtual bool append(InvDocList* tail);
00077
00078 virtual void startIteration();
00079 virtual bool hasMore();
00080 virtual DocInfo* nextEntry();
00081 virtual void nextEntry(InvDocInfo* info);
00082
00083 DOCID_T curDocID() { if (lastid == NULL) return -1; return *lastid; };
00084 int docFreq() { return df; };
00085 int length() { return end-begin; };
00086 int termID() { return uid; };
00087 int termLen() { return strlength; };
00088 int curDocIDdiff() { return lastid-begin; };
00089 int curDocIDtf() { return *(lastid+1); };
00090
00092 void binWrite(ofstream& of);
00093
00095 bool binRead(ifstream& inf);
00096
00098 void binWriteC(ofstream& of);
00099
00101 bool binReadC(ifstream& inf);
00102
00103 protected:
00107 bool getMoreMem();
00108 int logb2(int num);
00109
00112 virtual void deltaEncode();
00113
00116 virtual void deltaDecode();
00117
00118 int* begin;
00119 int* lastid;
00120 int* freq;
00121 int* end;
00122 int* iter;
00123 int size;
00124 int intsize;
00125 int strlength;
00126 TERMID_T uid;
00127 int df;
00128 MemCache* cache;
00129 bool hascache;
00130
00131 bool READ_ONLY;
00132 };
00133
00134 #endif