00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _RETRIEVALPARAMETER_HPP
00013 #define _RETRIEVALPARAMETER_HPP
00014
00016 #include "Param.hpp"
00017
00018 #include "TFIDFRetMethod.hpp"
00019 #include "OkapiRetMethod.hpp"
00020 #include "SimpleKLRetMethod.hpp"
00021 #include "CORIRetMethod.hpp"
00022 #include "CosSimRetMethod.hpp"
00023 #include "InQueryRetMethod.hpp"
00024
00026 namespace RetrievalParameter {
00027
00029
00030
00031 static String databaseIndex;
00033 static String retModel;
00035 static String textQuerySet;
00037 static String resultFile;
00039 static bool TRECresultFileFormat;
00041 static int fbDocCount;
00043 static int resultCount;
00045 static bool cacheDocReps;
00047 static bool useWorkingSet;
00049 static String workSetFile;
00050
00052
00053 static string getLower(char *parm, char *def) {
00054 string tmpString = ParamGetString(parm, def);
00055
00056 for (int i = 0; i < tmpString.length(); i++)
00057 tmpString[i] = tolower(tmpString[i]);
00058 return tmpString;
00059 }
00060
00061 static void get() {
00062 databaseIndex = ParamGetString("index","");
00063 retModel = getLower("retModel","kl");
00064
00065 if (retModel == "0") retModel = "tfidf";
00066 if (retModel == "1") retModel = "okapi";
00067 if (retModel == "2") retModel = "kl";
00068 if (retModel == "3") retModel = "inquery";
00069 if (retModel == "4") retModel = "cori_cs";
00070 if (retModel == "5") retModel = "cos";
00071
00072 string tmp = getLower("cacheDocReps", "true");
00073 cacheDocReps = (tmp == "true" || tmp == "1");
00074
00075 tmp = getLower("useWorkingSet", "false");
00076 useWorkingSet = (tmp == "true" || tmp == "1");
00077
00078 workSetFile = ParamGetString("workingSetFile","");
00079
00080 textQuerySet = ParamGetString("textQuery","");
00081 resultFile = ParamGetString("resultFile","");
00082
00083 tmp = getLower("resultFileFormat","trec");
00084 TRECresultFileFormat = (tmp == "trec" || tmp == "1");
00085
00086
00087 fbDocCount = ParamGetInt("feedbackDocCount",0);
00088 resultCount = ParamGetInt("resultCount", 1000);
00089
00090 }
00091 };
00092
00093
00094 namespace TFIDFParameter {
00095
00097
00098 static WeightParam docTFPrm;
00099 static WeightParam qryTFPrm;
00100 static FeedbackParam fbPrm;
00102
00103 static void get()
00104 {
00105 string tfmethod = RetrievalParameter::getLower("doc.tfMethod", "bm25");
00106 if (tfmethod == "rawtf") docTFPrm.tf = RAWTF;
00107 else if (tfmethod == "logf") docTFPrm.tf = LOGTF;
00108 else if (tfmethod == "bm25") docTFPrm.tf = BM25;
00109
00110 docTFPrm.bm25K1 = ParamGetDouble("doc.bm25K1",defaultDocK1);
00111 docTFPrm.bm25B = ParamGetDouble("doc.bm25B",defaultDocB);
00112
00113 tfmethod = RetrievalParameter::getLower("query.tfMethod", "bm25");
00114 if (tfmethod == "rawtf") qryTFPrm.tf = RAWTF;
00115 else if (tfmethod == "logf") qryTFPrm.tf = LOGTF;
00116 else if (tfmethod == "bm25") qryTFPrm.tf = BM25;
00117
00118 qryTFPrm.bm25K1 = ParamGetDouble("query.bm25K1",defaultQryK1);
00119 qryTFPrm.bm25B = defaultQryB;
00120
00121 fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00122 fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff);
00123 }
00124 };
00125
00126 namespace OkapiParameter {
00127
00129
00130 static TFParam tfPrm;
00131 static FeedbackParam fbPrm;
00133
00134
00135 static void get()
00136 {
00137 tfPrm.k1 = ParamGetDouble("BM25K1",defaultK1);
00138 tfPrm.b = ParamGetDouble("BM25B",defaultB);
00139 tfPrm.k3 = ParamGetDouble("BM25K3", defaultK3);
00140 fbPrm.expQTF = ParamGetDouble("BM25QTF", defaultExpQTF);
00141 fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00142
00143 }
00144 };
00145
00146 namespace SimpleKLParameter {
00148
00149 static SimpleKLParameter::DocSmoothParam docPrm;
00150 static SimpleKLParameter::QueryModelParam qryPrm;
00151 static String smoothSupportFile;
00153
00154 static void get()
00155 {
00156 smoothSupportFile = ParamGetString("smoothSupportFile", "");
00157
00158 string tmpString = RetrievalParameter::getLower("adjustedScoreMethod",
00159 "negativekld");
00160 if (tmpString == "querylikelihood" || tmpString == "ql") {
00161 qryPrm.adjScoreMethod = SimpleKLParameter::QUERYLIKELIHOOD;
00162 } else if (tmpString == "crossentropy" ||tmpString == "ce") {
00163 qryPrm.adjScoreMethod = SimpleKLParameter::CROSSENTROPY;
00164 } else if (tmpString == "negativekld" || tmpString == "-d") {
00165 qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00166 } else {
00167 cerr << "Unknown scoreMethod " << tmpString << ". Using NEGATIVEKLD"
00168 << endl;
00169 qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00170 }
00171
00172 tmpString = RetrievalParameter::getLower("smoothMethod",
00173 "dirichletprior");
00174 if (tmpString == "jelinikmercer" || tmpString == "jm" || tmpString == "0")
00175 docPrm.smthMethod = SimpleKLParameter::JELINEKMERCER;
00176 else if (tmpString == "dirichletprior" || tmpString == "dir" ||
00177 tmpString == "1")
00178 docPrm.smthMethod = SimpleKLParameter::DIRICHLETPRIOR;
00179 else if (tmpString == "absolutediscount" || tmpString == "ad" ||
00180 tmpString == "2")
00181 docPrm.smthMethod = SimpleKLParameter::ABSOLUTEDISCOUNT;
00182 else if (tmpString == "twostage" || tmpString == "2s" || tmpString == "3")
00183 docPrm.smthMethod = SimpleKLParameter::TWOSTAGE;
00184 else {
00185 cerr << "Unknown smoothMethod " << tmpString << ". Using DIRICHLET"
00186 << endl;
00187 docPrm.smthMethod = SimpleKLParameter::defaultSmoothMethod;
00188 }
00189
00190
00191 tmpString = RetrievalParameter::getLower("smoothStrategy", "interpolate");
00192 if (tmpString == "interpolate" || tmpString == "int" || tmpString == "0")
00193 docPrm.smthStrategy= SimpleKLParameter::INTERPOLATE;
00194 else if (tmpString == "backoff" || tmpString == "bo" || tmpString == "1")
00195 docPrm.smthStrategy= SimpleKLParameter::BACKOFF;
00196 else {
00197 cerr << "Unknown smoothStrategy " << tmpString << ". Using INTERPOLATE"
00198 << endl;
00199 docPrm.smthStrategy= SimpleKLParameter::defaultSmoothStrategy;
00200 }
00201
00202
00203 docPrm.ADDelta = ParamGetDouble("discountDelta",defaultADDelta);
00204 docPrm.JMLambda = ParamGetDouble("JelinekMercerLambda",defaultJMLambda);
00205 docPrm.DirPrior = ParamGetDouble("DirichletPrior",defaultDirPrior);
00206
00207 tmpString = RetrievalParameter::getLower("queryUpdateMethod", "mixture");
00208
00209 if (tmpString == "mixture" || tmpString == "mix" || tmpString == "0")
00210 qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00211 else if (tmpString == "divmin" || tmpString == "div" || tmpString == "1")
00212 qryPrm.fbMethod = SimpleKLParameter::DIVMIN;
00213 else if (tmpString == "markovchain" || tmpString == "mc" ||
00214 tmpString == "2")
00215 qryPrm.fbMethod = SimpleKLParameter::MARKOVCHAIN;
00216 else if (tmpString == "relevancemodel1" || tmpString == "rm1" ||
00217 tmpString == "3")
00218 qryPrm.fbMethod = SimpleKLParameter::RM1;
00219 else if (tmpString == "relevancemodel2" || tmpString == "rm2" ||
00220 tmpString == "4")
00221 qryPrm.fbMethod = SimpleKLParameter::RM1;
00222 else {
00223 cerr << "Unknown queryUpdateMethod " << tmpString
00224 << ". Using MIXTURE"
00225 << endl;
00226 qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00227 }
00228
00229
00230 qryPrm.fbCoeff = ParamGetDouble("feedbackCoefficient", defaultFBCoeff);
00231 qryPrm.fbPrTh = ParamGetDouble("feedbackProbThresh", defaultFBPrTh);
00232 qryPrm.fbPrSumTh = ParamGetDouble("feedbackProbSumThresh",
00233 defaultFBPrSumTh);
00234 qryPrm.fbTermCount = ParamGetInt("feedbackTermCount", defaultFBTermCount);
00235 qryPrm.fbMixtureNoise = ParamGetDouble("feedbackMixtureNoise",
00236 defaultFBMixNoise);
00237 qryPrm.emIterations = ParamGetInt("emIterations", defaultEMIterations);
00238
00239 }
00240 };
00241
00242 namespace CORIParameter {
00243 static String collectionCounts;
00244 static double cstffactor;
00245 static double cstfbaseline;
00246 static double doctffactor;
00247 static double doctfbaseline;
00248 static void get() {
00249 collectionCounts = ParamGetString("collCounts", "USE_INDEX_COUNTS");
00250 cstffactor = ParamGetDouble("CSCTF_factor", 150);
00251 cstfbaseline = ParamGetDouble("CSCTF_baseline", 50);
00252 doctffactor = ParamGetDouble("DOCCTF_factor", 1.5);
00253 doctfbaseline = ParamGetDouble("DOCCTF_baseline", 0.5);
00254 }
00255 };
00256
00257 namespace CosSimParameter {
00258
00260
00261 static FeedbackParam fbPrm;
00262 static String L2NormFile;
00264
00265 static void get()
00266 {
00267 fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00268 fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff);
00269 L2NormFile = ParamGetString("L2File", defaultL2File);
00270 }
00271 };
00272
00273 namespace InQueryParameter {
00276 static double fbCoeff = 0.5;
00278 static int fbTermCount = 50;
00280 static double defaultBelief = 0.4;
00282 static bool cacheIDF = false;
00283 static void get()
00284 {
00285 defaultBelief = ParamGetDouble("defaultBelief", defaultBelief);
00286 fbCoeff = ParamGetDouble("feedbackPosCoeff", fbCoeff);
00287 fbTermCount = ParamGetInt("feedbackTermCount", fbTermCount);
00288 string tmpString = RetrievalParameter::getLower("cacheIDF", "true");
00289 cacheIDF = (tmpString == "true" || tmpString == "1");
00290 }
00291 };
00292
00293 #endif