#!/bin/csh ######################################################################## # don't change these ######################################################################## set sfchome = "/net/alf14/usr/sfc" set arch = `$sfchome/exec/hosttype.x` ######################################################################## # here is a list of things you probably want to set # # defaults have been set to what Matt used in the TREC eval decode, # when possible ######################################################################## # directory to write out nbest lists to set outdir = "$sfchome/lattice/work/m007/m007c" # number of files to skip at beginning of control file; number of # files to rescore in control file (use these flags to split # up control file into sections, for parallelization) set ctloff = 0 set ctlcnt = 3 # directory where lattices are located; list of lattice files sans # extension # # for training decode, use: # set latdir = "/net/db7/usr1/robust/spokes/TREC/corpora/h496/TREC6_train/Decode/Output/stories.s3new-51k.1164.latdir" # set ctlfile = "$sfchome/lattice/work/l007/l007c.ctl" # # for eval decode, use: # set latdir = "/net/db7/usr1/robust/spokes/TREC/STORES/Lattices/space-2/TREC6-Evaltest-51k" # set ctlfile = "$sfchome/error/work/a021/a021d4.ctl" # (this contains all 11372 lattices, except for k960611.23-14 which # is corrupted) # set latdir = "/net/db7/usr1/robust/spokes/TREC/corpora/h496/TREC6_train/Decode/Output/stories.s3new-51k.1164.latdir" set ctlfile = "$sfchome/lattice/work/l007/l007c.ctl" ######################################################################## # here is a list of things you might want to set # # defaults have been set to what Matt used in the TREC eval decode, # when possible ######################################################################## # size of nbest lists to generate set nbest = 150 # language weight; insertion penalty; noise penalty; noise file, # containing penalties for particular tokens set langwgt = 9.5 set inspen = 0.2 set noisepen = 0.05 set noisefile = "/net/db7/usr6/robust/TREC/corpora/Eval/Decode/extras/filler-dt96.pen" # this is the LM used by Matt, it is actually # /net/db7/usr6/robust/TREC/corpora/Eval/Decode/extras/bn92-96-51k+_phrases+xb-1-1.arpabo.Z.DMP # converted to my format set arpabo = "/net/alf9/usr2/sfc/tmp/m007a.dmp" # MB of memory rescorer will use (when memory limit is reached for # a lattice, search beams are narrowed) set mem = 100 # sets log base with which scores in nbest file are expressed # to internal log base, which is 1.000333333333; this is set # for compatibility with version 0.1; by default, is set to -10.0, # which outputs scores log 10 set logbaseout = -1 ######################################################################## # here is the actual command line ######################################################################## $sfchome/pub/$arch/bin/Lattice \ -hub4 -nbest.3 \ -langwgt $langwgt \ -inspen $inspen \ -noisepen $noisepen \ -noisefile $noisefile \ -latdir $latdir \ -ctlfile $ctlfile \ -ctloff $ctloff \ -ctlcnt $ctlcnt \ -outdir $outdir \ -latext .lat.Z \ -arpabo $arpabo \ -outext .nbest.gz \ -nbest $nbest \ -mem $mem \ -bbrelax -flLogBaseOut $logbaseout ######################################################################## # ########################################################################