The script contains two procedures for debugging: cntallo and showallo for counting and displaying all different word-initial-allophones. These numbers can be compared to a (bugfree) output of the search module.
if 0 {
How to create a JANUS 3 tree out of a JANUS 2 tree
==================================================
1. Format conversion
--------------------
create a file which mirrors the original JANUS 2 tree, it has one
tree node per line, and the same nodes as the original tree.
Refer to the JANUS 3 tree file format specification for details.
2. If you are not using a clustering tree
-----------------------------------------
run fixSTree (below) to asign to every incomplete context
the same model as if the missing context was SIL; save the
resulting stree
3. If you are using a clustering tree
-------------------------------------
a) start up with the raw JANUS 3 tree, run mkheader to create
a tree which contains subtrees for biphones and monophones;
also, modify the root nodes of your triphone tree to use
the names any(any,any)-[bme].
b) Concatenate the biphones subtree and the triphones tree
c) start the system up with the concatenated tree and run
fixSTree; save the resulting stree
NOTE: don't mind if you get many warnings about undefined
model indices (these are the biphone models that have
yet to be defined.
NOTE: while fixing the tree you will probably get messages
about incorrigible triphones. You might want to check
if all of them contain special phonemes (like word-
boundaries or such). If one of the incorrigible
triphones consists of regular phones only, then
something's wrong, complain to Ivica.
}
# ===========================================================
# the procedure mkheader will write the biphone and monophone
# subtrees needed to run a system with inclomplete contexts.
# The phones argument is a list of all the used monophones.
# The fileName argument is the name of the file to write.
# NOTE: The writte tree will use "any" as name of a Phones
# object that contains all phones. Take a look at your
# PhonesSet and (if needed) add the "any" Phones.
# ===========================================================
proc mkheader { phones fileName } {
lappend phones "-"
set phones1 [lrange $phones 1 999999]
set phones0 [lrange $phones 0 [expr [llength $phones] -2]]
set firstPhone [lindex $phones 0]
set fp [open $fileName w]
puts $fp "ROOT-b {-1=any} - DoKnowLeft-b DontKnowLeft-b -"
puts $fp "ROOT-m {-1=any} - DoKnowLeft-m DontKnowLeft-m -"
puts $fp "ROOT-e {-1=any} - DoKnowLeft-e DontKnowLeft-e -"
puts $fp "DontKnowLeft-b {+1=any} - ask_${firstPhone}(unk,any)-b ask_${firstPhone}(unk,unk)-b -"
puts $fp "DontKnowLeft-m {+1=any} - ask_${firstPhone}(unk,any)-m ask_${firstPhone}(unk,unk)-m -"
puts $fp "DontKnowLeft-e {+1=any} - ask_${firstPhone}(unk,any)-e ask_${firstPhone}(unk,unk)-e -"
puts $fp "DoKnowLeft-b {+1=any} - any(any,any)-b ask_${firstPhone}(any,unk)-b -"
puts $fp "DoKnowLeft-m {+1=any} - any(any,any)-m ask_${firstPhone}(any,unk)-m -"
puts $fp "DoKnowLeft-e {+1=any} - any(any,any)-e ask_${firstPhone}(any,unk)-e -"
foreach root { b m e } {
set thisPhone [lindex $phones 0]
foreach nextPhone $phones1 {
puts $fp "ask_${thisPhone}(any,unk)-$root \{0=$thisPhone\} ask_${nextPhone}(any,unk)-$root ${thisPhone}(ask_$firstPhone,any)-$root - -"
puts $fp "ask_${thisPhone}(unk,any)-$root \{0=$thisPhone\} ask_${nextPhone}(unk,any)-$root ${thisPhone}(any,ask_$firstPhone)-$root - -"
set thisPhone $nextPhone
}
foreach phone $phones0 {
set thisContext [lindex $phones 0]
foreach nextContext $phones1 {
puts $fp "${phone}(any,ask_$thisContext)-$root \{+1=$thisContext\} ${phone}(any,ask_$nextContext)-$root ${phone}($thisContext,unk)-$root - -"
puts $fp "${phone}(ask_$thisContext,any)-$root \{-1=$thisContext\} ${phone}(ask_$nextContext,any)-$root ${phone}(unk,$thisContext)-$root - -"
set thisContext $nextContext
}
}
foreach phone $phones0 {
foreach context $phones0 {
puts $fp "${phone}($context,unk)-$root \{\} - - - undefd"
puts $fp "${phone}(unk,$context)-$root \{\} - - - undefd"
}
}
set thisPhone [lindex $phones 0]
foreach nextPhone $phones1 {
puts $fp "ask_${thisPhone}(unk,unk)-$root \{0=$thisPhone\} ask_${nextPhone}(unk,unk)-$root ${thisPhone}(unk,unk)-$root - -"
set thisPhone $nextPhone
}
foreach phone $phones0 {
puts $fp "${phone}(unk,unk)-$root \{\} - - - undefd"
}
}
close $fp
}
# ===========================================================
# fixStree will replace the given stree-node's models; every
# node which is the end-node for an incomplete triphone (one
# or both contexts unknown) will get the model that is used
# for the triphone where the missing contexts are SIL or WB
# ===========================================================
proc fixSTree { monophones tree senones } {
set context WB ;# or SIL (WB for non x-word, SIL for x-word)
foreach p {b m e} {
foreach x [$monophones] {
foreach y [$monophones] {
set nodeAndIndex [$tree get ROOT-$p "$x $y" -1 0 -node 1]
set snIndex [ lindex $nodeAndIndex 0 ]
set node [ lindex $nodeAndIndex 1 ]
set newIndex [$tree get ROOT-$p "$x $y $context" -1 1]
if {$newIndex < 0} { puts "Can't correct $x $y $context"
} else { $tree:$node configure -model $newIndex }
set nodeAndIndex [$tree get ROOT-$p "$x $y" 0 1 -node 1]
set snIndex [lindex $nodeAndIndex 0]
set node [lindex $nodeAndIndex 1]
set newIndex [$tree get ROOT-$p "$context $x $y" -1 1]
if {$newIndex < 0} { puts "Can't correct $context $x $y"
} else { $tree:$node configure -model $newIndex }
}
set nodeAndIndex [$tree get ROOT-$p "$x" 0 0 -node 1]
set snIndex [ lindex $nodeAndIndex 0 ]
set node [ lindex $nodeAndIndex 1 ]
set newIndex [$tree get ROOT-$p "$context $x $context" -1 1]
if {$newIndex < 0} { puts "Can't correct $context $x $context"
} else { $tree:$node configure -model $newIndex }
}
}
return
}
# ===========================================================
# the following procedure counts the number of different
# allophones that can be at the beginning of a word, given
# a dictionary an a senone tree
# ===========================================================
proc cntallo { dict stree } {
set wordN [llength [$dict]]
for { set wordX 0 } { $wordX < $wordN } { incr wordX } {
set phones [lrange [$dict.phones name [lindex [$dict.item($wordX)] 1]] 0 1]
if { [llength $phones] == 1 } { set to 0 } else { set to 1 }
set allo([$stree get ROOT-b $phones 0 $to],[$stree get ROOT-m $phones 0 $to],[$stree get ROOT-e $phones 0 $to]) 1
}
return [array size allo]
}
# ===========================================================
# the following procedure prints out all words together with
# their initial allophone; can be used to campare to JANUSV2
# ===========================================================
proc showallo { dict stree file } {
set wordN [llength [$dict]]
for { set wordX 0 } { $wordX < $wordN } { incr wordX } {
set phones [lrange [$dict.phones name [lindex [$dict.item($wordX)] 1]] 0 1]
if { [llength $phones] == 1 } { set to 0 } else { set to 1 }
set begX [$stree get ROOT-b $phones 0 $to]
set midX [$stree get ROOT-m $phones 0 $to]
set endX [$stree get ROOT-e $phones 0 $to]
if { $begX < 0 } { set beg "(none)" } else { set beg [lindex [$stree.senoneSet.item($begX)] 0] }
if { $midX < 0 } { set mid "(none)" } else { set mid [lindex [$stree.senoneSet.item($midX)] 0] }
if { $endX < 0 } { set end "(none)" } else { set end [lindex [$stree.senoneSet.item($endX)] 0] }
puts $file "[lindex [$dict.item($wordX)] 0] = $beg $mid $end"
}
}