1 from __future__ import with_statement
2 from ..atoms import AtomVector
5 """
6 mc = MultiClassifier()
7
8 Manages a set of classifiers for a multi-class/multi-label
9 classification problem.
10 There are two ways to create a MultiClassifier.
11 First, manually create the classifier for each label, then
12 add it to MultiClasifier:
13
14 >>> mc.add(label, classifier)
15
16 Each classifier should provide a score function.
17
18 scores = mc.score(av)
19 'scores' is a map of label:score
20
21 Second, provide a BaseClassifier class and a multi-labeled dataset
22 and optimal params to be passed to BaseClassifier, in which case
23 MultiClassifier with binarize the dataset and do the training:
24
25 >>> mc = mekano.MultiClassifier.create(mekano.LogisticRegressionClassifier, trainset, LAMBDA=0.1, c=1.0)
26
27 """
28
30 self.classifiers = {}
31 self.labelset = set()
32
33 - def add(self, label, classifier):
34 self.classifiers[label] = classifier
35 self.labelset.add(label)
36
38 return self.classifiers[key]
39
41 """Score a vector av
42
43 Returns a map of label:score
44 """
45 return dict([(l,c.score(av)) for l,c in self.classifiers.iteritems()])
46
48 return "<MultiClassfier: %d classifiers>" % (len(self.classifiers))
49
50 @staticmethod
51 - def create(BaseClassifier, ds, **params):
52 """Create a MultiClassifier from a base classifier class and multi-labeled dataset.
53
54 'params' are optional parameters to pass to the BaseClassifier constructor.
55 """
56 ret = MultiClassifier()
57 bds = ds.binarize()
58 for label, bd in bds.iteritems():
59 print "MultiClassifier: Training for", label
60 baseClassifier = BaseClassifier(**params)
61 baseClassifier.train(bd)
62 ret.add(label, baseClassifier)
63 return ret
64