00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "InvFPIndex.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022
00023 using std::vector;
00024
00025 #define EOS "*eos"
00026 #define TITLE "*title"
00027 #define PRONOUN "*pronoun"
00028 #define PSG_LEN 15
00029
00035 class MMRSumm : public Summarizer {
00036
00037 private:
00038 double lambda;
00039 InvFPIndex* idx;
00040 int summLen;
00041 vector<MMRPassage> doc;
00042 int iterCount;
00043 double maxSims;
00044 MMRPassage* queryPassage;
00045
00046 int autoMMRQuery(void) {
00047 TermInfo* tEntry;
00048 TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00049 termCount* storage;
00050 if (hasTITLE(idx, tList)) {
00051
00052 tList->startIteration();
00053 cout << "title found" << endl;
00054 while (tList->hasMore()) {
00055 tEntry = tList->nextEntry();
00056 if ( isTITLE(idx->term(tEntry->id())) ) {
00057 tEntry = tList->nextEntry();
00058 storage = new termCount;
00059 storage->termID = tEntry->id();
00060 storage->tf = tEntry->count();
00061 storage->val = tEntry->count();
00062 queryPassage->addTerm(*storage);
00063 }
00064 }
00065 } else {
00066 tList->startIteration();
00067 for (int i=0; i<10; i++) {
00068 if (tList->hasMore()) {
00069 tEntry = tList->nextEntry();
00070 storage = new termCount;
00071 storage->termID = tEntry->id();
00072 storage->tf = tEntry->count();
00073 storage->val = tEntry->count();
00074 queryPassage->addTerm(*storage);
00075 }
00076 }
00077 }
00078 cout << "Autoquery: ";
00079 showPassage((*queryPassage).getAsVector(), idx);
00080 cout << endl;
00081
00082 return 1;
00083 }
00084
00085 int setMMRQuery(char* qInfo) {
00086 if (qInfo != "") {
00087 termCount* storage;
00088 storage = new termCount;
00089 storage->termID = idx->term(qInfo);
00090 storage->tf = 1;
00091 storage->val = 1;
00092 queryPassage->addTerm(*storage);
00093 return 1;
00094 }
00095 return autoMMRQuery();
00096 }
00097
00098 public:
00099
00100 MMRSumm(InvFPIndex* inIdx, int inSummLen = 5) {
00101 idx = inIdx;
00102 summLen = inSummLen;
00103 iterCount = 1;
00104 maxSims = -1.0;
00105 queryPassage = NULL;
00106 lambda = 1.0;
00107 };
00108
00109 virtual void markPassages(int optLen, char* qInfo);
00110
00111 virtual void addPassage(Passage &psg);
00112
00113 void addDocument(const char* docID);
00114
00115 virtual int fetchPassages(Passage* psgs, int optLen);
00116
00117 virtual void summDocument(const char* docID, const int optLen, const char* qInfo);
00118
00119 virtual void scorePassages(const char* qInfo);
00120
00121 virtual void clear(void);
00122
00123 virtual int nextPassage(Passage* psg);
00124
00125 virtual void iterClear(void);
00126
00127 virtual void outputSumm(void);
00128
00129 void findNextPassage(MMRPassage &psg, InvFPIndex* idx,
00130 TermInfoList* tList, int eos);
00131
00132 void showPassage(passageVec* psg, InvFPIndex* idx);
00133
00134 void showMarkedPassages();
00135
00136 int isEOS(const char* check) {
00137 return !strcmp(check, EOS);
00138 }
00139
00140 int hasEOS(InvFPIndex* idx, TermInfoList* tList) {
00141 tList->startIteration();
00142 TermInfo* tEntry;
00143 while (tList->hasMore()) {
00144 tEntry = tList->nextEntry();
00145 if ( isEOS(idx->term(tEntry->id())) ) return true;
00146 }
00147 return false;
00148 }
00149
00150 int isTITLE(const char* check) {
00151 return !strcmp(check, TITLE);
00152 }
00153
00154 int hasTITLE(InvFPIndex* idx, TermInfoList* tList) {
00155 tList->startIteration();
00156 TermInfo* tEntry;
00157 while (tList->hasMore()) {
00158 tEntry = tList->nextEntry();
00159 if ( isTITLE(idx->term(tEntry->id())) ) return true;
00160 }
00161 return false;
00162 }
00163
00164 int isPRONOUN(const char* check) {
00165 return !strcmp(check, PRONOUN);
00166 }
00167
00168 struct compareSW {
00169 double lambda;
00170 compareSW(double l) { lambda = l; }
00171 bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00172 return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00173 }
00174 };
00175
00176 };
00177
00178 #endif