Package mekano :: Package ml :: Module utils
Source Code for Module mekano.ml.utils

 1  """Handy functions to produce scores and decisions for a list of documents. 
 2   
 3  For tuning thresholds, see L{Thresholder}. 
 4  """ 
 5   
 6 -def scoreAll(classifier, docs): 
 7      """Score all docs using a classifier. 
 8       
 9      @param classifier   : A binary classifier 
10      @param docs         : A list of docs of type L{AtomVector} 
11      @return             : A list of scores corresponding to the `docs` 
12      """ 
13      return [classifier.score(d) for d in docs] 
14   
15 -def decideAll(classifier, docs, thresholds): 
16      """Makes decisions based on given thresholds for all docs using a multiclassifier. 
17       
18      @param classifier   : A L{MultiClassifier} 
19      @param docs         : List of docs 
20      @param thresholds   : A label:float dictionary 
21      @return             : A list of sets of positive labels 
22      """ 
23      return [applyThresholds(classifier.score(d), thresholds) for d in docs] 
24   
25   
26 -def applyThresholds(scores, thresholds): 
27      """Apply thresholds to a label:score dictionary corresponding to a single doc. 
28   
29      @param scores       : A label:float dictionary of L{MultiClassifier}-produced scores 
30      @param thresholds   : A label:float dictionary of thresholds 
31      @return             : A set of positive labels 
32      """ 
33       
34      return set([cat for cat, score in scores.iteritems() if score >= thresholds[cat]]) 
35