00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00020 #define INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00021
00022 class TwoStageTermScoreFunction : public TermScoreFunction {
00023 private:
00024 double _mu;
00025 double _lambda;
00026 double _collectionFrequency;
00027
00028 public:
00029 TwoStageTermScoreFunction( double mu, double lambda, double collectionFrequency ) :
00030 _mu(mu),
00031 _lambda(lambda),
00032 _collectionFrequency(collectionFrequency) {
00033 }
00034
00035 double scoreOccurrence( int occurrences, int contextSize ) {
00036
00037
00038
00039
00040
00041 double dirichlet = ((double(occurrences) + _mu*_collectionFrequency) / (double(contextSize) + _mu));
00042 double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00043 return log(p);
00044 }
00045
00046 double scoreOccurrence( int occurrences, int contextSize, int documentOccurrences, int documentLength ) {
00047 double documentFrequency = double(documentOccurrences) / double(documentLength);
00048 double dirichlet = ((double(occurrences) + _mu*documentFrequency) / (double(contextSize) + _mu));
00049 double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00050 return log(p);
00051 }
00052 };
00053
00054 #endif // INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00055