Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

StructQryDocRep.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00017 class StructQryDocRep : public DocumentRep {
00018 public:
00020   StructQryDocRep(int docID, double *idfValue, int docLength, int docCount,
00021                   double docLengthAverage, double db) : 
00022     DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00023     docEnd(docLength), size(docLength), start(0),
00024     dla(docLengthAverage), defaultBelief(db) {
00025     oneMinusDB = 1 - defaultBelief;
00026     denom = log(docCount + 1.0);
00027     numer = docCount + 0.5;
00028   }
00030   virtual ~StructQryDocRep() {}
00032   virtual double termWeight(int termID, DocInfo *info) { return 0;}
00034   virtual double termWeight(int termID, double dtf, int df) {
00035     if (idf)
00036       return beliefScore(dtf, idf[termID]);
00037     else
00038       return beliefScore(dtf, computeIdfScore(df));
00039   }
00041   virtual double scoreConstant() { return 0;}
00042 
00044   void startPassageIteration(int sz) {
00045     size = sz;
00046     increment = size/2;
00047     start = 0;
00048     end = size < docEnd ? size : docEnd;
00049   }
00051   bool hasMorePassage() {
00052     // still some terms in the list.
00053     return(start < docEnd);
00054   }
00055 
00057   void nextPassage() {
00058     if(start + increment < docEnd)
00059       start += increment;
00060     else
00061       start = docEnd;
00062     end = (start + size) < docEnd ? (start + size) : docEnd;
00063   }
00064 
00067   double computeIdfScore(double df) {
00068     return log(numer/df)/denom;
00069   }
00070 
00072   double beliefScore(double df, double idf) {
00073     return (defaultBelief + oneMinusDB
00074             * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00075   }
00076 
00078   int did;
00080   int start; 
00082   int end;
00083 
00084 private:
00086   double *idf;
00088   int size; 
00090   int increment; 
00092   int docEnd;  
00094   double dla;
00096   double numer, denom;
00098   double defaultBelief, oneMinusDB;
00099 };
00100 #endif

Generated on Tue Nov 25 11:26:46 2003 for Lemur Toolkit by doxygen1.2.18