Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CORIRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _CORIRETMETHOD_HPP
00014 #define _CORIRETMETHOD_HPP
00015 
00016 
00017 #include "UnigramLM.hpp"
00018 #include "SimpleKLDocModel.hpp"
00019 #include "TextQueryRetMethod.hpp"
00020 #include "Param.hpp"
00021 #include <math.h>
00022 
00023 
00024 
00025 class CORIQueryRep : public ArrayQueryRep {
00026 public:
00027   CORIQueryRep(TextQuery & qry, Index & dbIndex);
00028   virtual ~CORIQueryRep() {}
00029 
00030 protected:
00031   Index & ind;
00032 };
00033 
00034 class CORIDocRep : public DocumentRep {
00035 public:
00036   CORIDocRep(int docID, Index & dbIndex, double * cwRatio, 
00037              double TFfact = 150, double TFbase = 50, 
00038              SimpleKLDocModel * smoother = NULL,
00039              UnigramLM * collectLM = NULL);
00040   virtual ~CORIDocRep() { }
00041   virtual double termWeight(int termID, DocInfo * info);
00042 
00043   virtual double scoreConstant() { return 0; }
00044 
00045 private:
00046 
00047   Index & ind;
00048 
00049   int * cwCounts;
00050 
00051   SimpleKLDocModel * dfSmooth;
00052   UnigramLM * collLM;
00053 
00054   double c05;
00055   double idiv;
00056   double tnorm;
00057 };
00058 
00059 
00060 
00061 class CORIRetMethod : public TextQueryRetMethod {
00062 public:
00063 
00064   CORIRetMethod(Index & dbIndex, ScoreAccumulator &accumulator, 
00065                 String cwName, 
00066                 SimpleKLDocModel ** smoothers = NULL, 
00067                 UnigramLM * collectLM = NULL);
00068   ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00069 
00070   virtual TextQueryRep * computeTextQueryRep(TextQuery & qry) {
00071     return new CORIQueryRep(qry, ind);
00072   }
00073   virtual DocumentRep * computeDocRep(int docID) { 
00074     if (dfSmooth != NULL) {
00075       return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00076     }
00077     return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00078   }
00079   virtual ScoreFunction * scoreFunc() {
00080     return scFunc;
00081   }
00082 
00083   virtual void scoreCollection(QueryRep &qry, IndexedRealVector &results);
00084 
00085   virtual void updateTextQuery(TextQueryRep &qryRep, DocIDSet &relDocs) { }
00086   
00087   void setTFFactor(double tf) { tffactor = tf; }
00088   void setTFBaseline(double tf) { tfbaseline = tf; }
00089 
00090 protected:
00091 
00092   ScoreFunction * scFunc;
00093   SimpleKLDocModel ** dfSmooth;
00094   UnigramLM * collLM;
00095 
00096   double * cwRatio;
00097   double tffactor;
00098   double tfbaseline;
00099   
00100 };
00101 
00102 class CORIScoreFunc : public ScoreFunction {
00103 public:
00104   CORIScoreFunc(Index & index) : ind(index) {
00105     double dc = ind.docCount();
00106     c05 = dc + 0.5;
00107     idiv = log(dc + 1) / 0.6;
00108    
00109   }
00110 
00111   virtual double adjustedScore(double origScore, TextQueryRep * qRep,
00112                                DocumentRep * dRep) {
00113     if (qr != qRep) {
00114       qr = qRep;
00115       
00116       qRep->startIteration();
00117       rmax = 0;
00118       double qw = 0;
00119       while (qRep->hasMore()) {
00120         int qtid = qRep->nextTerm()->id();
00121         
00122         rmax += (log(c05 / ind.docCount(qtid)) / idiv);
00123       }
00124     }
00125     return (origScore / rmax);
00126   }
00127 
00128 private:
00129   Index & ind;
00130 
00131   TextQueryRep * qr;
00132   double rmax;
00133 
00134   double c05;
00135   double idiv;
00136 };
00137 
00138 
00139 #endif /* _CORIRETMETHOD_HPP */

Generated at Fri Jul 26 18:26:22 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000