Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

MMRSumm.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014 
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "InvFPIndex.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022 
00023 using std::vector;
00024 
00025 #define EOS      "*eos"
00026 #define TITLE    "*title"
00027 #define PRONOUN  "*pronoun"
00028 #define PSG_LEN  15
00029 
00035 class MMRSumm : public Summarizer {
00036 
00037 private:
00038   double lambda;
00039   InvFPIndex* idx;
00040   int summLen;
00041   vector<MMRPassage> doc;
00042   int iterCount;
00043   double maxSims;
00044   MMRPassage* queryPassage;
00045 
00046   int autoMMRQuery(void) {
00047     TermInfo* tEntry;
00048     TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00049     termCount* storage;
00050     if (hasTITLE(idx, tList)) {
00051       // use title words
00052       tList->startIteration();
00053       cout << "title found" << endl;
00054       while (tList->hasMore()) {
00055         tEntry = tList->nextEntry();
00056         if ( isTITLE(idx->term(tEntry->id())) ) {
00057           tEntry = tList->nextEntry(); // the actual word after title token
00058           storage = new termCount;
00059           storage->termID = tEntry->id();
00060           storage->tf = tEntry->count();
00061           storage->val = tEntry->count();
00062           queryPassage->addTerm(*storage);
00063         }
00064       }      
00065     } else {
00066       tList->startIteration();
00067       for (int i=0; i<10; i++) {
00068         if (tList->hasMore()) {
00069           tEntry = tList->nextEntry();
00070           storage = new termCount;
00071           storage->termID = tEntry->id();
00072           storage->tf = tEntry->count();
00073           storage->val = tEntry->count();
00074           queryPassage->addTerm(*storage);
00075         }
00076       } 
00077     }
00078     cout << "Autoquery: ";
00079     showPassage((*queryPassage).getAsVector(), idx);
00080     cout << endl;
00081 
00082     return 1;
00083   }
00084 
00085   int setMMRQuery(char* qInfo) {
00086     if (qInfo != "") {
00087       termCount* storage;
00088       storage = new termCount;
00089       storage->termID = idx->term(qInfo);
00090       storage->tf = 1;
00091       storage->val = 1;
00092       queryPassage->addTerm(*storage);
00093       return 1;
00094     }
00095     return autoMMRQuery();
00096   }
00097 
00098 public:
00099 
00100   MMRSumm(InvFPIndex* inIdx, int inSummLen = 5) {
00101     idx = inIdx;
00102     summLen = inSummLen;
00103     iterCount = 1;
00104     maxSims = -1.0;
00105     queryPassage = NULL;
00106     lambda = 1.0;
00107   };
00108   
00109   virtual void markPassages(int optLen, char* qInfo);
00110 
00111   virtual void addPassage(Passage &psg);
00112 
00113   virtual int fetchPassages(Passage psgs[], int optLen);
00114   
00115   virtual void summDocument(const char* docID, const int optLen, const char* qInfo);
00116 
00117   virtual void scorePassages(const char* qInfo);
00118 
00119   virtual void clear(void);
00120 
00121   virtual int nextPassage(Passage* psg);
00122 
00123   virtual void iterClear(void);
00124 
00125   virtual void outputSumm(void);
00126 
00127   void findNextPassage(MMRPassage &psg, InvFPIndex* idx, 
00128                        TermInfoList* tList, int eos);
00129 
00130   void showPassage(passageVec* psg, InvFPIndex* idx);
00131   
00132   void showMarkedPassages();
00133 
00134   int isEOS(const char* check) {
00135     return !strcmp(check, EOS);
00136   }
00137   
00138   int hasEOS(InvFPIndex* idx, TermInfoList* tList) {
00139     tList->startIteration();
00140     TermInfo* tEntry;
00141     while (tList->hasMore()) {
00142       tEntry = tList->nextEntry();
00143       if ( isEOS(idx->term(tEntry->id())) ) return true;
00144     }
00145     return false;
00146   }
00147   
00148   int isTITLE(const char* check) {
00149     return !strcmp(check, TITLE);
00150   }
00151   
00152   int hasTITLE(InvFPIndex* idx, TermInfoList* tList) {
00153     tList->startIteration();
00154     TermInfo* tEntry;
00155     while (tList->hasMore()) {
00156       tEntry = tList->nextEntry();
00157       if ( isTITLE(idx->term(tEntry->id())) ) return true;
00158     }
00159     return false;
00160   }
00161   
00162   int isPRONOUN(const char* check) {
00163     return !strcmp(check, PRONOUN);
00164   }
00165   
00166   struct compareSW {
00167     double lambda;
00168     compareSW(double l) { lambda = l; }
00169     bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00170       return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00171     }
00172   };
00173   
00174 }; // MMRSumm
00175 
00176 #endif

Generated at Fri Jul 26 18:26:24 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000