1 """Classes and functions for creating and managing unique atoms.
2
3 The main exposed class is L{AtomFactory}.
4
5 Useful functions: L{convertAtom} and L{convertAtomVector}.
6 """
7
8 from __future__ import with_statement
9 import cPickle
10 from atomvector import AtomVector
13 """
14 A single AtomFactory makes unique atoms for the given
15 objects. By atoms, we just mean numbers.
16 Objects just have to be hashable.
17
18 To get the Pipeline's (type, object) atoms, just use
19 tuples!
20
21 >>> af = AtomFactory("mytokens")
22 >>> a1 = af["apples"]
23 >>> a2 = af["oranges"]
24 >>> assert(a1 == 1)
25 >>> assert(a2 == 2)
26 >>> assert(af(1) == "apples")
27 >>> a.lock() # Do not allow changes.
28
29 Loading/saving:
30 >>> a = AtomFactory.fromfile(filename)
31 >>> a.save(filename)
32
33 @note: C{af(1)} is candy for C{af.get_object(1)}
34
35 """
36
38 self.name = name
39
40 self.obj_to_atom = {}
41 self.atom_to_obj = []
42 self.locked = False
43
45 return "<AtomFactory: %s %d atoms>" % (self.name, len(self.atom_to_obj))
46
48 try:
49 return self.obj_to_atom[obj]
50 except KeyError:
51 if self.locked:
52 raise
53 a = len(self.atom_to_obj) + 1
54 self.obj_to_atom[obj] = a
55 self.atom_to_obj.append(obj)
56 return a
57
59 return self.atom_to_obj[a-1]
60
62 return self.atom_to_obj[a-1]
63
65 return len(self.atom_to_obj)
66
68 return obj in self.obj_to_atom
69
71 """Lock the AtomFactory.
72
73 No new atoms can be added; Only old ones can be retrieved.
74 """
75 self.locked = True
76
78 """Returns a new AtomFactory with the given objects removed.
79 """
80 objects = set(objects)
81 new_af = AtomFactory(self.name)
82 for obj in self.atom_to_obj:
83 if obj not in objects:
84 new_af[obj]
85 return new_af
86
87 - def save(self, filename):
88 with open(filename, "w") as fout:
89 cPickle.dump(self, fout, -1)
90
92 """Save each object on a line.
93
94 This should be enough to reconstruct the AtomFactory,
95 and is also useful for things like LDA's vocabulary file.
96 """
97 with open(filename, "w") as fout:
98 for obj in self.atom_to_obj:
99 fout.write("%s\n" % obj)
100
101 @staticmethod
103 with open(filename, "r") as fin:
104 a = cPickle.load(fin)
105 return a
106
108 """Convert an atom from one AtomFactory to another.
109
110 @param oldAF : The old AtomFactory to which atom belongs
111 @param newAF : The new AtomFactory
112 @param atom : The atom to convert
113 @return : The converted atom
114 @raise Exception : If atom cannot be found in oldAF
115 """
116 o = oldAF.get_object(atom)
117 if o not in newAF:
118 raise Exception, "%r not in newAF" % o
119 return newAF[o]
120
122 """Convert an L{AtomVector} from one AtomFactory to another.
123
124 @param oldAF : The old AtomFactory to which AtomVector av belongs
125 @param newAF : The new AtomFactory
126 @param av : The AtomVector to convert
127 @return : The converted AtomVector
128 """
129 new_av = AtomVector(av.name)
130 for a, v in av.iteritems():
131 try:
132 a = convertAtom(oldAF, newAF, a)
133 new_av[a] = v
134 except Exception:
135 pass
136 return new_av
137