Package mekano :: Package atoms :: Module atomfactory
[hide private]
[frames] | no frames]

Source Code for Module mekano.atoms.atomfactory

  1  """Classes and functions for creating and managing unique atoms. 
  2   
  3  The main exposed class is L{AtomFactory}. 
  4   
  5  Useful functions: L{convertAtom} and L{convertAtomVector}. 
  6  """ 
  7   
  8  from __future__ import with_statement 
  9  import cPickle 
 10  from atomvector import AtomVector 
11 12 -class AtomFactory:
13 """ 14 A single AtomFactory makes unique atoms for the given 15 objects. By atoms, we just mean numbers. 16 Objects just have to be hashable. 17 18 To get the Pipeline's (type, object) atoms, just use 19 tuples! 20 21 >>> af = AtomFactory("mytokens") 22 >>> a1 = af["apples"] 23 >>> a2 = af["oranges"] 24 >>> assert(a1 == 1) 25 >>> assert(a2 == 2) 26 >>> assert(af(1) == "apples") 27 >>> a.lock() # Do not allow changes. 28 29 Loading/saving: 30 >>> a = AtomFactory.fromfile(filename) 31 >>> a.save(filename) 32 33 @note: C{af(1)} is candy for C{af.get_object(1)} 34 35 """ 36
37 - def __init__(self, name = "noname"):
38 self.name = name 39 # make a bi-map 40 self.obj_to_atom = {} 41 self.atom_to_obj = [] 42 self.locked = False
43
44 - def __repr__(self):
45 return "<AtomFactory: %s %d atoms>" % (self.name, len(self.atom_to_obj))
46
47 - def __getitem__(self, obj):
48 try: 49 return self.obj_to_atom[obj] 50 except KeyError: 51 if self.locked: 52 raise 53 a = len(self.atom_to_obj) + 1 54 self.obj_to_atom[obj] = a 55 self.atom_to_obj.append(obj) 56 return a
57
58 - def get_object(self, a):
59 return self.atom_to_obj[a-1]
60
61 - def __call__(self, a):
62 return self.atom_to_obj[a-1]
63
64 - def __len__(self):
65 return len(self.atom_to_obj)
66
67 - def __contains__(self, obj):
68 return obj in self.obj_to_atom
69
70 - def lock(self):
71 """Lock the AtomFactory. 72 73 No new atoms can be added; Only old ones can be retrieved. 74 """ 75 self.locked = True
76
77 - def remove(self, objects):
78 """Returns a new AtomFactory with the given objects removed. 79 """ 80 objects = set(objects) 81 new_af = AtomFactory(self.name) 82 for obj in self.atom_to_obj: 83 if obj not in objects: 84 new_af[obj] 85 return new_af
86
87 - def save(self, filename):
88 with open(filename, "w") as fout: 89 cPickle.dump(self, fout, -1)
90
91 - def savetxt(self, filename):
92 """Save each object on a line. 93 94 This should be enough to reconstruct the AtomFactory, 95 and is also useful for things like LDA's vocabulary file. 96 """ 97 with open(filename, "w") as fout: 98 for obj in self.atom_to_obj: 99 fout.write("%s\n" % obj)
100 101 @staticmethod
102 - def fromfile(filename):
103 with open(filename, "r") as fin: 104 a = cPickle.load(fin) 105 return a
106
107 -def convertAtom(oldAF, newAF, atom):
108 """Convert an atom from one AtomFactory to another. 109 110 @param oldAF : The old AtomFactory to which atom belongs 111 @param newAF : The new AtomFactory 112 @param atom : The atom to convert 113 @return : The converted atom 114 @raise Exception : If atom cannot be found in oldAF 115 """ 116 o = oldAF.get_object(atom) 117 if o not in newAF: 118 raise Exception, "%r not in newAF" % o 119 return newAF[o]
120
121 -def convertAtomVector(oldAF, newAF, av):
122 """Convert an L{AtomVector} from one AtomFactory to another. 123 124 @param oldAF : The old AtomFactory to which AtomVector av belongs 125 @param newAF : The new AtomFactory 126 @param av : The AtomVector to convert 127 @return : The converted AtomVector 128 """ 129 new_av = AtomVector(av.name) 130 for a, v in av.iteritems(): 131 try: 132 a = convertAtom(oldAF, newAF, a) 133 new_av[a] = v 134 except Exception: # todo: why are we suppressing the exception ?! 135 pass 136 return new_av
137