Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

RetParamManager.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _RETRIEVALPARAMETER_HPP
00013 #define _RETRIEVALPARAMETER_HPP
00014 
00016 #include "Param.hpp"
00017 
00018 #include "TFIDFRetMethod.hpp"
00019 #include "OkapiRetMethod.hpp"
00020 #include "SimpleKLRetMethod.hpp"
00021 #include "CORIRetMethod.hpp"
00022 #include "CosSimRetMethod.hpp"
00023 #include "InQueryRetMethod.hpp"
00024 
00026 namespace RetrievalParameter {
00027 
00029 
00030 
00031   static String databaseIndex;
00033   static String retModel;
00035   static String textQuerySet;
00037   static String resultFile;
00039   static bool TRECresultFileFormat;
00041   static int fbDocCount;
00043   static int resultCount;
00045   static bool cacheDocReps;
00047   static bool useWorkingSet;
00049   static String workSetFile;
00050 
00052 
00053   static string getLower(char *parm, char *def) {
00054     string tmpString = ParamGetString(parm, def);
00055     // make it all lowercase
00056     for (int i = 0; i < tmpString.length(); i++)
00057       tmpString[i] = tolower(tmpString[i]);
00058     return tmpString;
00059   }
00060 
00061   static void get() {
00062     databaseIndex = ParamGetString("index","");
00063     retModel = getLower("retModel","kl");
00064     // backwards compatibility.
00065     if (retModel == "0") retModel = "tfidf";
00066     if (retModel == "1") retModel = "okapi";
00067     if (retModel == "2") retModel = "kl";
00068     if (retModel == "3") retModel = "inquery";
00069     if (retModel == "4") retModel = "cori_cs";
00070     if (retModel == "5") retModel = "cos";
00071 
00072     string tmp = getLower("cacheDocReps", "true");
00073     cacheDocReps = (tmp == "true" || tmp == "1");
00074 
00075     tmp = getLower("useWorkingSet", "false"); 
00076     useWorkingSet = (tmp == "true" || tmp == "1");
00077     // working set file name
00078     workSetFile = ParamGetString("workingSetFile",""); 
00079     
00080     textQuerySet = ParamGetString("textQuery","");
00081     resultFile = ParamGetString("resultFile","");
00082 
00083     tmp = getLower("resultFileFormat","trec");
00084     TRECresultFileFormat = (tmp == "trec" || tmp == "1");
00085 
00086     // default being no feedback
00087     fbDocCount = ParamGetInt("feedbackDocCount",0); 
00088     resultCount = ParamGetInt("resultCount", 1000); 
00089     
00090   }
00091 };
00092 
00093 
00094 namespace TFIDFParameter {
00095 
00097 
00098   static WeightParam docTFPrm;
00099   static WeightParam qryTFPrm;
00100   static FeedbackParam fbPrm;
00102   
00103   static void get()
00104   {
00105     string tfmethod = RetrievalParameter::getLower("doc.tfMethod", "bm25");
00106     if (tfmethod == "rawtf") docTFPrm.tf = RAWTF;
00107     else if (tfmethod == "logf") docTFPrm.tf = LOGTF;    
00108     else if (tfmethod == "bm25") docTFPrm.tf = BM25;
00109 
00110     docTFPrm.bm25K1 = ParamGetDouble("doc.bm25K1",defaultDocK1);
00111     docTFPrm.bm25B = ParamGetDouble("doc.bm25B",defaultDocB);
00112 
00113     tfmethod = RetrievalParameter::getLower("query.tfMethod", "bm25");
00114     if (tfmethod == "rawtf") qryTFPrm.tf = RAWTF;
00115     else if (tfmethod == "logf") qryTFPrm.tf = LOGTF;    
00116     else if (tfmethod == "bm25") qryTFPrm.tf = BM25;
00117 
00118     qryTFPrm.bm25K1 = ParamGetDouble("query.bm25K1",defaultQryK1);
00119     qryTFPrm.bm25B = defaultQryB;
00120     
00121     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00122     fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00123   }
00124 };
00125 
00126 namespace OkapiParameter {
00127 
00129 
00130   static TFParam tfPrm;
00131   static FeedbackParam fbPrm;
00133 
00134 
00135   static void get()
00136   {
00137     tfPrm.k1 = ParamGetDouble("BM25K1",defaultK1);
00138     tfPrm.b =  ParamGetDouble("BM25B",defaultB);
00139     tfPrm.k3 = ParamGetDouble("BM25K3", defaultK3);
00140     fbPrm.expQTF = ParamGetDouble("BM25QTF", defaultExpQTF);
00141     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00142     
00143   }
00144 };
00145 
00146 namespace SimpleKLParameter {
00148 
00149   static SimpleKLParameter::DocSmoothParam docPrm;
00150   static SimpleKLParameter::QueryModelParam qryPrm;
00151   static String smoothSupportFile;
00153     
00154   static void get()
00155   {
00156     smoothSupportFile = ParamGetString("smoothSupportFile", "");
00157 
00158     string tmpString = RetrievalParameter::getLower("adjustedScoreMethod", 
00159                                                     "negativekld");
00160     if (tmpString == "querylikelihood" || tmpString == "ql") {
00161       qryPrm.adjScoreMethod = SimpleKLParameter::QUERYLIKELIHOOD;
00162     } else if (tmpString == "crossentropy" ||tmpString == "ce") {
00163       qryPrm.adjScoreMethod = SimpleKLParameter::CROSSENTROPY;
00164     } else if (tmpString == "negativekld" || tmpString == "-d") {
00165       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00166     } else {
00167       cerr << "Unknown scoreMethod " << tmpString << ". Using NEGATIVEKLD" 
00168            << endl;
00169       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00170     }
00171 
00172     tmpString = RetrievalParameter::getLower("smoothMethod", 
00173                                              "dirichletprior");
00174     if (tmpString == "jelinikmercer" || tmpString == "jm" || tmpString == "0")
00175       docPrm.smthMethod = SimpleKLParameter::JELINEKMERCER;
00176     else if (tmpString == "dirichletprior" || tmpString == "dir" || 
00177              tmpString == "1")
00178       docPrm.smthMethod = SimpleKLParameter::DIRICHLETPRIOR;
00179     else if (tmpString == "absolutediscount" || tmpString == "ad" || 
00180              tmpString == "2")
00181       docPrm.smthMethod = SimpleKLParameter::ABSOLUTEDISCOUNT;
00182     else if (tmpString == "twostage" || tmpString == "2s" || tmpString == "3")
00183       docPrm.smthMethod = SimpleKLParameter::TWOSTAGE;
00184     else {
00185       cerr << "Unknown smoothMethod " << tmpString << ". Using DIRICHLET" 
00186            << endl;
00187       docPrm.smthMethod = SimpleKLParameter::defaultSmoothMethod;
00188     }
00189     
00190 
00191     tmpString = RetrievalParameter::getLower("smoothStrategy", "interpolate");
00192     if (tmpString == "interpolate" || tmpString == "int" || tmpString == "0")
00193       docPrm.smthStrategy= SimpleKLParameter::INTERPOLATE;
00194     else if (tmpString == "backoff" || tmpString == "bo" || tmpString == "1")
00195       docPrm.smthStrategy= SimpleKLParameter::BACKOFF;
00196     else {
00197       cerr << "Unknown smoothStrategy " << tmpString << ". Using INTERPOLATE" 
00198            << endl;
00199       docPrm.smthStrategy= SimpleKLParameter::defaultSmoothStrategy;
00200     }
00201     
00202 
00203     docPrm.ADDelta = ParamGetDouble("discountDelta",defaultADDelta);
00204     docPrm.JMLambda = ParamGetDouble("JelinekMercerLambda",defaultJMLambda);
00205     docPrm.DirPrior = ParamGetDouble("DirichletPrior",defaultDirPrior);
00206     
00207     tmpString = RetrievalParameter::getLower("queryUpdateMethod", "mixture");
00208 
00209     if (tmpString == "mixture" || tmpString == "mix" || tmpString == "0")
00210       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00211     else if (tmpString == "divmin" || tmpString == "div" || tmpString == "1")
00212       qryPrm.fbMethod = SimpleKLParameter::DIVMIN;
00213     else if (tmpString == "markovchain" || tmpString == "mc" || 
00214              tmpString == "2")
00215       qryPrm.fbMethod = SimpleKLParameter::MARKOVCHAIN;
00216     else if (tmpString == "relevancemodel1" || tmpString == "rm1" || 
00217              tmpString == "3")
00218       qryPrm.fbMethod = SimpleKLParameter::RM1;
00219     else if (tmpString == "relevancemodel2" || tmpString == "rm2" || 
00220              tmpString == "4")
00221       qryPrm.fbMethod = SimpleKLParameter::RM1;
00222     else {
00223       cerr << "Unknown queryUpdateMethod " << tmpString 
00224            << ". Using MIXTURE" 
00225            << endl;
00226       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00227     }
00228     
00229 
00230     qryPrm.fbCoeff = ParamGetDouble("feedbackCoefficient", defaultFBCoeff);
00231     qryPrm.fbPrTh = ParamGetDouble("feedbackProbThresh", defaultFBPrTh);
00232     qryPrm.fbPrSumTh = ParamGetDouble("feedbackProbSumThresh",
00233                                       defaultFBPrSumTh);
00234     qryPrm.fbTermCount = ParamGetInt("feedbackTermCount", defaultFBTermCount);
00235     qryPrm.fbMixtureNoise = ParamGetDouble("feedbackMixtureNoise",
00236                                            defaultFBMixNoise);
00237     qryPrm.emIterations = ParamGetInt("emIterations", defaultEMIterations);
00238                                               
00239   }
00240 };
00241 
00242 namespace CORIParameter {
00243   static String collectionCounts;
00244   static double cstffactor;
00245   static double cstfbaseline;
00246   static double doctffactor;
00247   static double doctfbaseline;
00248   static void get() {
00249     collectionCounts = ParamGetString("collCounts", "USE_INDEX_COUNTS");
00250     cstffactor = ParamGetDouble("CSCTF_factor", 150);
00251     cstfbaseline = ParamGetDouble("CSCTF_baseline", 50);
00252     doctffactor = ParamGetDouble("DOCCTF_factor", 1.5);
00253     doctfbaseline = ParamGetDouble("DOCCTF_baseline", 0.5);
00254   }
00255 };
00256 
00257 namespace CosSimParameter {
00258 
00260 
00261   static FeedbackParam fbPrm;
00262   static String L2NormFile;
00264   
00265   static void get()
00266   {
00267     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00268     fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00269     L2NormFile = ParamGetString("L2File", defaultL2File);
00270   }
00271 };
00272 
00273 namespace InQueryParameter {
00276   static double fbCoeff = 0.5;
00278   static int fbTermCount = 50;
00280   static double defaultBelief = 0.4;
00282   static bool cacheIDF = false;
00283   static void get()
00284   {
00285     defaultBelief = ParamGetDouble("defaultBelief", defaultBelief);
00286     fbCoeff = ParamGetDouble("feedbackPosCoeff", fbCoeff);
00287     fbTermCount = ParamGetInt("feedbackTermCount", fbTermCount);
00288     string tmpString = RetrievalParameter::getLower("cacheIDF", "true");
00289     cacheIDF = (tmpString == "true" || tmpString == "1");
00290   }
00291 };
00292 
00293 #endif

Generated on Fri Feb 6 07:11:49 2004 for LEMUR by doxygen1.2.16