Package saui_pr4 :: Module text2words
[hide private]
[frames] | no frames]

Source Code for Module saui_pr4.text2words

 1  #!/usr/bin/env python 
 2   
 3  # Copyright (c) 2007 Carnegie Mellon University. 
 4  # 
 5  # You may modify and redistribute this file under the same terms as 
 6  # the CMU Sphinx system.  See 
 7  # http://cmusphinx.sourceforge.net/html/LICENSE for more information. 
 8  # 
 9  # Briefly, don't remove the copyright.  Otherwise, do what you like. 
10   
11  """ 
12  Take a text file and extract words from it using Flite's text 
13  normalization, yielding a transcription suitable for language model 
14  training and a pronunciation dictionary. 
15   
16  This is part of project 4 in 05-631 Software Architecture for User 
17  Interfaces, Fall 2007. 
18  """ 
19   
20  __author__ = "David Huggins-Daines <dhuggins@cs.cmu.edu>" 
21   
22  import flite 
23  import getopt 
24  import sys 
25   
26 -def usage():
27 """Show usage of this module.""" 28 sys.stderr.write("""Usage: text2words.py [-o DICTIONARY] < TEXT > WORDS""")
29
30 -class TextNorm(object):
31 - def __init__(self):
32 self.vox = flite.load_voice() 33 self.vox.set_post_synth_callback(self.get_words) 34 self.dic = {}
35
36 - def normalize(self, par):
37 self.vox.text_to_speech(par, "none")
38
39 - def get_words(self, utt):
40 words = [] 41 for w in utt['Word']: 42 segs = [] 43 for syl in w.as_rel('SylStructure'): 44 for seg in syl: 45 if seg.name == 'ax': 46 segs.append('AH') 47 elif seg.name == 'pau': 48 segs.append('SIL') 49 else: 50 segs.append(seg.name.upper()) 51 if len(segs) > 0: 52 words.append(w.name) 53 if not w.name in self.dic: 54 self.dic[w.name] = " ".join(segs) 55 print "<s>", " ".join(words), "</s>"
56
57 -def main():
58 try: 59 opts, args = getopt.getopt(sys.argv[1:], 'o:', ['output=']) 60 except getopt.GetoptError: 61 usage() 62 sys.exit(2) 63 outfile = None 64 for o, a in opts: 65 if o in ('-o', '--output'): 66 outfile = a 67 68 norm = TextNorm() 69 par = [] 70 for line in sys.stdin: 71 line = line.strip() 72 if line == "": 73 norm.normalize(" ".join(par)) 74 par = [] 75 else: 76 par.append(line) 77 if outfile != None: 78 outfh = open(outfile, "w") 79 words = norm.dic.keys() 80 words.sort() 81 for w in words: 82 outfh.write("%-30s %s\n" % (w, norm.dic[w]))
83 84 if __name__ == "__main__": 85 main() 86