Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

TwoStageTermScoreFunction.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // TwoStageTermScoreFunction.hpp
00015 //
00016 // 16 April 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00020 #define INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00021 
00022 class TwoStageTermScoreFunction : public TermScoreFunction {
00023 private:
00024   double _mu;
00025   double _lambda;
00026   double _collectionFrequency;
00027 
00028 public:
00029   TwoStageTermScoreFunction( double mu, double lambda, double collectionFrequency ) :
00030     _mu(mu),
00031     _lambda(lambda),
00032     _collectionFrequency(collectionFrequency) {
00033   }
00034 
00035   double scoreOccurrence( int occurrences, int contextSize ) {
00036 
00037     //                    [  c(w;d) + \mu * p(w|C)   ]
00038     //    ( 1 - \lambda ) [ ------------------------ ] + \lambda * p(w|C)
00039     //                    [       |d| + \mu          ]
00040 
00041     double dirichlet = ((double(occurrences) + _mu*_collectionFrequency) / (double(contextSize) + _mu));
00042     double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00043     return log(p);
00044   }
00045 
00046   double scoreOccurrence( int occurrences, int contextSize, int documentOccurrences, int documentLength ) {
00047     double documentFrequency = double(documentOccurrences) / double(documentLength);
00048     double dirichlet = ((double(occurrences) + _mu*documentFrequency) / (double(contextSize) + _mu));
00049     double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00050     return log(p);
00051   }
00052 };
00053 
00054 #endif // INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00055 

Generated on Wed Nov 3 12:59:07 2004 for Lemur Toolkit by doxygen1.2.18