Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

MMRSumm.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014 
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "InvFPIndex.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022 
00023 using std::vector;
00024 
00025 #define EOS      "*eos"
00026 #define TITLE    "*title"
00027 #define PRONOUN  "*pronoun"
00028 #define PSG_LEN  15
00029 
00035 class MMRSumm : public Summarizer {
00036 
00037 private:
00038   double lambda;
00039   InvFPIndex* idx;
00040   int summLen;
00041   vector<MMRPassage> doc;
00042   int iterCount;
00043   double maxSims;
00044   MMRPassage* queryPassage;
00045 
00046   int autoMMRQuery(void) {
00047     TermInfo* tEntry;
00048     TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00049     termCount* storage;
00050     if (hasTITLE(idx, tList)) {
00051       // use title words
00052       tList->startIteration();
00053       cout << "title found" << endl;
00054       while (tList->hasMore()) {
00055         tEntry = tList->nextEntry();
00056         if ( isTITLE(idx->term(tEntry->id())) ) {
00057           tEntry = tList->nextEntry(); // the actual word after title token
00058           storage = new termCount;
00059           storage->termID = tEntry->id();
00060           storage->tf = tEntry->count();
00061           storage->val = tEntry->count();
00062           queryPassage->addTerm(*storage);
00063         }
00064       }      
00065     } else {
00066       tList->startIteration();
00067       for (int i=0; i<10; i++) {
00068         if (tList->hasMore()) {
00069           tEntry = tList->nextEntry();
00070           storage = new termCount;
00071           storage->termID = tEntry->id();
00072           storage->tf = tEntry->count();
00073           storage->val = tEntry->count();
00074           queryPassage->addTerm(*storage);
00075         }
00076       } 
00077     }
00078     cout << "Autoquery: ";
00079     showPassage((*queryPassage).getAsVector(), idx);
00080     cout << endl;
00081 
00082     return 1;
00083   }
00084 
00085   int setMMRQuery(char* qInfo) {
00086     if (qInfo != "") {
00087       termCount* storage;
00088       storage = new termCount;
00089       storage->termID = idx->term(qInfo);
00090       storage->tf = 1;
00091       storage->val = 1;
00092       queryPassage->addTerm(*storage);
00093       return 1;
00094     }
00095     return autoMMRQuery();
00096   }
00097 
00098 public:
00099 
00100   MMRSumm(InvFPIndex* inIdx, int inSummLen = 5) {
00101     idx = inIdx;
00102     summLen = inSummLen;
00103     iterCount = 1;
00104     maxSims = -1.0;
00105     queryPassage = NULL;
00106     lambda = 1.0;
00107   };
00108   
00109   virtual void markPassages(int optLen, char* qInfo);
00110 
00111   virtual void addPassage(Passage &psg);
00112 
00113   void addDocument(const char* docID);
00114 
00115   virtual int fetchPassages(Passage* psgs, int optLen);
00116   
00117   virtual void summDocument(const char* docID, const int optLen, const char* qInfo);
00118 
00119   virtual void scorePassages(const char* qInfo);
00120 
00121   virtual void clear(void);
00122 
00123   virtual int nextPassage(Passage* psg);
00124 
00125   virtual void iterClear(void);
00126 
00127   virtual void outputSumm(void);
00128 
00129   void findNextPassage(MMRPassage &psg, InvFPIndex* idx, 
00130                        TermInfoList* tList, int eos);
00131 
00132   void showPassage(passageVec* psg, InvFPIndex* idx);
00133   
00134   void showMarkedPassages();
00135 
00136   int isEOS(const char* check) {
00137     return !strcmp(check, EOS);
00138   }
00139   
00140   int hasEOS(InvFPIndex* idx, TermInfoList* tList) {
00141     tList->startIteration();
00142     TermInfo* tEntry;
00143     while (tList->hasMore()) {
00144       tEntry = tList->nextEntry();
00145       if ( isEOS(idx->term(tEntry->id())) ) return true;
00146     }
00147     return false;
00148   }
00149   
00150   int isTITLE(const char* check) {
00151     return !strcmp(check, TITLE);
00152   }
00153   
00154   int hasTITLE(InvFPIndex* idx, TermInfoList* tList) {
00155     tList->startIteration();
00156     TermInfo* tEntry;
00157     while (tList->hasMore()) {
00158       tEntry = tList->nextEntry();
00159       if ( isTITLE(idx->term(tEntry->id())) ) return true;
00160     }
00161     return false;
00162   }
00163   
00164   int isPRONOUN(const char* check) {
00165     return !strcmp(check, PRONOUN);
00166   }
00167   
00168   struct compareSW {
00169     double lambda;
00170     compareSW(double l) { lambda = l; }
00171     bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00172       return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00173     }
00174   };
00175   
00176 }; // MMRSumm
00177 
00178 #endif

Generated on Fri Feb 6 07:11:48 2004 for LEMUR by doxygen1.2.16