# INSTALLATION: Just change the values of the CDROM macros to point to your
# copy of the CDROMs, and type "make TIMIT" or "make RM".
#
# WARNING: You will need 150Mbyte of free disk and it will take about eight
# hours of CPU to "make TIMIT".   Double this for RM.
#
# COPYRIGHT: (C) 1992,1993 Tony Robinson
# Permission is granted to use this software for non-commercial,
# non-military purposes.  It may be copied and distributed freely, provided
# that this notice is copied and distributed with it.  Modified versions may
# be distributed with the same permissions and restrictions, provided that
# clear notice of the alterations is given.  This software carries NO
# WARRANTY, expressed or implied.  The user assumes all risks, known or
# unknown, direct or indirect, which involve this software in any way.
# 
# Acknowledgement is requested if this software contributes significantly
# toward any research publication.
# 
# Dr Tony Robinson (ajr@eng.cam.ac.uk)
# Cambridge University Engineering Department
# Trumpington Street
# Cambridge, CB2 1PZ
# England

# These must be changed
TIMIT-CDROM=/db/cd-hope0/timit
RM-CDROM0=/cd/joy/rm1
RM-CDROM1=/cd/tiger/rm1

# these can be overridden
CC=gcc
CFLAGS=-s -O2
TM-WEIGHTS=timit.wei
RM-WEIGHTS=rm1.wei

# these are my latest versions 
MKTRUE=	mkrmtrue
MKLIST=	mkwplist
PRE=	pre27
SCALE=	scale
FON=	fon84
SPLAT=	splat
PPARSE=	pparse4
WPARSE=	wparse27
MATCH=	dpmatch2
DURN=	duration11
DISPLY= display

EVALS=	 feb89     oct89     feb91     sep92
EVALDBLS=feb89.dbl oct89.dbl feb91.dbl sep92.dbl

SOURCES=${MKTRUE}.c ${MKLIST}.c ${PRE}.c ${SCALE}.c ${FON}.c ${SPLAT}.c ${PPARSE}.c ${WPARSE}.c ${MATCH}.c ${DURN}.c ${DISPLY}.c
EXECS=  ${MKTRUE}   ${MKLIST}   ${PRE}   ${SCALE}   ${FON}   ${SPLAT}   ${PPARSE}   ${WPARSE}   ${MATCH}   ${DURN}   ${DISPLY}
LIBS=	tools.a -lm

default: RM

TIMIT: tidy
	make	CC="${CC}" \
		CFLAGS="${CFLAGS}" \
		DIR0=${TIMIT-CDROM} \
		TARG=tm \
		NINP=23 \
		NSTA=176 \
		NOUT=61 \
		WEIGHTS=${TM-WEIGHTS} \
		tm-results.res
	cat tm-results.res

RM: tidy
	make	CC="${CC}" \
		CFLAGS="${CFLAGS}" \
		DIR0=${RM-CDROM0} \
		DIR1=${RM-CDROM1} \
		TARG=rm \
		NINP=23 \
		NSTA=256 \
		NOUT=68 \
		WEIGHTS=${RM-WEIGHTS} \
		rm-results.res
	cat rm-results.res
	
tm-results.res: ${MATCH} tm-parsed
	@ echo '#'
	@ echo '# scoring the phone strings:' $@
	@ echo '#'
	./${MATCH} -rprefix cdrwm/ -rsuffix .phn -uprefix cdrwm/ -usuffix \
	  .fon test.dbl > tmp-$@
	mv tmp-$@ $@

rm-results.res: ${MATCH} rm-parsed true
	@ echo '#'
	@ echo '# scoring the word strings:' $@
	@ echo '#'
	for NAME in ${EVALS} ; do \
	  ./${MATCH} -rprefix true/ -rsuffix .wrd -uprefix cdrwm/ \
	    -usuffix .wno $$NAME.dbl > rm-$$NAME-nog.res ; \
	  ./${MATCH} -rprefix true/ -rsuffix .wrd -uprefix cdrwm/ \
	    -usuffix .wwp $$NAME.dbl > rm-$$NAME-wpg.res ; \
	done
	for NAME in ${EVALS} ; do \
	   echo -n $$NAME'	' >> tmp-$@ ; \
	   tail -1 rm-$$NAME-nog.res >> tmp-$@ ; \
	done
	echo >> tmp-$@
	for NAME in ${EVALS} ; do \
	   echo -n $$NAME'	' >> tmp-$@ ; \
	   tail -1 rm-$$NAME-wpg.res >> tmp-$@ ; \
	done
	mv tmp-$@ $@

tm-parsed: ${PPARSE} test.spl test.dbl tm-test.dur
	@ echo '#'
	@ echo '# parsing the phone probabilities into phone strings:' $@
	@ echo '#'
	for NAME in `cat test.dbl` ; do \
	  ./${PPARSE} -phntab tm-phone.tab cdrwm/$$NAME.lna tm-test.dur > cdrwm/$$NAME.fon ; done
	touch $@

rm-parsed: ${WPARSE} test.spl ${EVALDBLS} rm-test.dur pcdsril.dct rm-phone.tab wplist.wpg 
	@ echo '#'
	@ echo '# parsing the phone probabilities into word strings:' $@
	@ echo '#'
	ARGS='-wrd -slp rm-test.dur -dict pcdsril.dct -wplist wplist.wpg \
	  -phntab rm-phone.tab -act_prefix cdrwm/ -wrd_prefix cdrwm/' ; \
	for NAME in ${EVALS} ; do \
	  ./${WPARSE}         -nist tmp-$$NAME-nog.hyp $$ARGS $$NAME.dbl ; \
	  mv tmp-$$NAME-nog.hyp $$NAME-nog.hyp; \
	  ./${WPARSE} -wpgram -nist tmp-$$NAME-wpg.hyp $$ARGS $$NAME.dbl ; \
	  mv tmp-$$NAME-wpg.hyp $$NAME-wpg.hyp; \
	done
	touch $@

train.spl: ${SPLAT} cdrwm train.dbl train.lna
	@ echo '#'
	@ echo '# putting the phone probabilities in one file per sentence:' $@
	@ echo '#'
	./${SPLAT} -nout ${NOUT} -prefix cdrwm/ train.dbl train.lna
	touch $@

test.spl: ${SPLAT} cdrwm test.dbl test.lna
	@ echo '#'
	@ echo '# putting the phone probabilities in one file per sentence:' $@
	@ echo '#'
	./${SPLAT} -nout ${NOUT} -prefix cdrwm/ test.dbl test.lna
	touch $@

tm-test.dur: ${DURN} train.pre tm-phone.tab
	@ echo '#'
	@ echo '# creating a file of state transition probabilities:' $@
	@ echo '#'
	./${DURN} -apriori -lo_limit 0.0 -markov -ninp ${NINP} \
	   -phntab tm-phone.tab train.pre tmp-$@
	mv tmp-$@ $@

rm-train.dur: ${DURN} train.pre rm-phone.tab
	@ echo '#'
	@ echo '# creating a file of state transition probabilities:' $@
	@ echo '#'
	./${DURN} -apriori -self_loop -lo_limit 0.0 -ninp ${NINP} \
	   -phntab rm-phone.tab train.pre tmp-$@
	mv tmp-$@ $@

rm-test.dur: ${DURN} train.nil rm-phone.tab
	@ echo '#'
	@ echo '# creating a file of state transition probabilities:' $@
	@ echo '#'
	./${DURN} -apriori -self_loop -ninp 0 -phntab rm-phone.tab train.nil \
	  tmp-$@
	mv tmp-$@ $@

train.nil: ${PRE} rm-phone.tab train.dbl train.phn ${SCALE}
	@ echo '#'
	@ echo '# running the preprocessor to give frame label output:' $@
	@ echo '#'
	./${PRE} -npower 0 -nftchan 0 -phntab rm-phone.tab -adc_prefix \
	  cdrwm/  -phn_prefix cdrwm/ train.dbl tmp-0
	./${SCALE} -ninp 0 tmp-0 tmp-0 tmp-$@
	rm -f tmp-0
	mv tmp-$@ $@

train.phn: train.dbl ${WPARSE} rm-train.dur cdrwm true train.spl
	@ echo '#'
	@ echo '# making forced alignments on training data:' $@
	@ echo '#'
	for NAME in `cat train.dbl` ; do \
	  echo $$NAME | ./${WPARSE} -phn -wrd -forced -print_silence -slp \
	    rm-train.dur -dict true/$$NAME.dct -act_prefix cdrwm/ \
	    -phn_prefix cdrwm/ -phn_suffix .phn -wrd_prefix cdrwm/ \
	    -wrd_suffix .wrd cdrwm/$$NAME.lna - ; \
	done
	touch $@

train.lna: ${FON} train.pre
	@ echo '#'
	@ echo '# running the recurrent net to give phone probabilities:' $@
	@ echo '#'
	./${FON} -lna -ninp ${NINP} -nsta ${NSTA} -nout ${NOUT} -dump tmp-$@ \
	  train.pre ${WEIGHTS} -
	mv tmp-$@ $@

test.lna: ${FON} test.pre
	@ echo '#'
	@ echo '# running the recurrent net to give phone probabilities:' $@
	@ echo '#'
	@ echo '#'
	./${FON} -lna -ninp ${NINP} -nsta ${NSTA} -nout ${NOUT} -dump tmp-$@ \
	  test.pre ${WEIGHTS} -
	mv tmp-$@ $@

train.pre test.pre: ${SCALE} train.flt test.flt
	@ echo '#'
	@ echo '# making a byte-packed version of the preprocessor output:' $@
	@ echo '#'
	./${SCALE} train.flt `echo $@ | sed s/.pre$$/.flt/` tmp-$@
	mv tmp-$@ $@

train.flt test.flt: ${PRE} ${TARG}-phone.tab cdrwm train.dbl test.dbl
	@ echo '#'
	@ echo '# running the preprocessor to give floating point output:' $@
	@ echo '#'
########### go back to -f0now
	./${PRE} -f0 -voice -phntab ${TARG}-phone.tab -adc_prefix cdrwm/ \
	  -phn_prefix cdrwm/ `echo $@ | sed s/.flt$$/.dbl/` tmp-$@
	mv tmp-$@ $@

train.dbl test.dbl: cdrwm
	@ echo '#'
	@ echo '# compiling a list of all the sentences:' $@
	@ echo '#'
	( cd cdrwm; \
	  find `echo $@ | sed s/.dbl$$//` -name \*.wav -print | \
	    sed "s:.wav$$::" | egrep -v '(sa[12]$$|sb[0-9][0-9]$$)' ) | \
	    sort > tmp-$@
	mv tmp-$@ $@

${EVALDBLS}:
	@ echo '#'
	@ echo '# compiling a list of all the evaluation set sentences:' $@
	@ echo '#'
	BASE=`echo $@ | sed s/.dbl$$//` ; \
	for RM1 in ${DIR0} ${DIR1} ; do \
		if [ -d $$RM1/doc/tests/?_$$BASE ] ; then \
			egrep -v '^;' $$RM1/doc/tests/?_$$BASE/?_indtst.ndx | \
			sed -e 's:^/rm1/ind/eval:test:' -e 's:.wav$$::' | \
			sort > tmp-$@ ; \
		fi ; \
	done ; \
	mv tmp-$@ $@

# this is only called if running RM
true: cdrwm ${MKTRUE} pcdsril.dct
	@ echo '#'
	@ echo "# making a directory of 'true' labels:" $@
	@ echo '#'
	mkdir tmp-$@
	for DIR in train test ; do \
		mkdir tmp-$@/$$DIR ; \
		for NAME in `cd cdrwm ; ls $$DIR ` ; do \
			ln -s .. tmp-$@/$$DIR/$$NAME ; \
		done ; \
	done
	./${MKTRUE} -word pcdsril.dct -sent $$DIR0/doc/al_sents.snr \
		-prefix	tmp-$@/
	rm -rf $@
	mv tmp-$@ $@

cdrwm: mkdirln
	@ echo '#'
	@ echo '# making a shadow cdrom directory that is writable:' $@
	@ echo '#'
	if [ ${TARG} = tm ] ; then \
		./mkdirln ${DIR0} tmp-$@ ; \
	else \
		mkdir tmp-$@ ; \
		for RM1 in ${DIR0} ${DIR1} ; do \
			if [ -d $$RM1/ind_trn ] ; then \
				./mkdirln $$RM1/ind_trn tmp-$@/train ; \
			fi ; \
			if [ -d $$RM1/ind/dev_aug ] ; then \
				./mkdirln $$RM1/ind/dev_aug tmp-$@/train ; \
			fi ; \
			if [ -d $$RM1/ind/eval ] ; then \
				./mkdirln $$RM1/ind/eval tmp-$@/test ; \
			fi ; \
		done ; \
	fi
	rm -rf $@
	mv tmp-$@ $@

tm-phone.tab:
	cp ${HOME}/lib/allophone.tab $@

wplist.wpg: ${MKLIST} pcdsril.dct
	@ echo '#'
	@ echo '# compiling the word-pair grammar:' $@
	@ echo '#'
	./${MKLIST} -word pcdsril.dct -gram ${DIR0}/doc/wp_gram.txt \
	  -wplist tmp-$@
	mv tmp-$@ $@

rm-phone.tab: pcdsril.dct
	@ echo '#'
	@ echo '# making a list of phones:' $@
	@ echo '#'
	echo 'h#' > tmp-$@
	tail +3 pcdsril.dct | cut -f2- -d' ' | tr ' ' '\012' | sort -u >>tmp-$@
	mv tmp-$@ $@

pcdsril.dct: 
	@ echo '#'
	@ echo '# creating the pronunciation dictionary:' $@
	@ echo '#'
	SUFFIX=score/src/rdev/pcdsril.txt ; \
	if   [ -f $$DIR0/../$$SUFFIX ] ; then \
		FILE=$$DIR0/../$$SUFFIX ; \
	elif [ -f $$DIR1/../$$SUFFIX ] ; then \
		FILE=$$DIR1/../$$SUFFIX ; \
	else \
		echo 'FATAL: could not find: '$$SUFFIX ; \
		exit 1 ; \
	fi ; \
	tail +20 $$FILE | cut -f1 -d' ' | tr [a-z] [A-Z] > tmp-0 ; \
	tail +20 $$FILE | cut -f2- -d' ' > tmp-1
	echo '@INIT h#' > tmp-$@
	echo '@QUIT h#' >> tmp-$@
	paste -d' ' tmp-0 tmp-1 | tr -s ' ' >> tmp-$@
	rm -f tmp-0 tmp-1
	mv tmp-$@ $@

${EXECS}: tools.a
	if [ ! -f $@.c ] ; then cp ${HOME}/y0/$@.c . ; fi
	${CC} -I tools ${CFLAGS} -o $@ $@.c ${LIBS}

tools.a: tools
	@ echo '#'
	@ echo '# creating a library of useful tools:' $@
	@ echo '#'
	cd tools; \
	if   [ `uname -s` = SunOS ] ; then \
		if uname  -r | egrep '^4' ; then \
			HOSTTYPE=sparc ; \
		else \
			HOSTTYPE=solaris ; \
		fi ; \
	elif [ `uname -s` = IRIX ] ; then \
		HOSTTYPE=sgi; \
	else \
		HOSTTYPE=unknown ; \
	fi; \
	make HOSTTYPE=$$HOSTTYPE; \
	ln $${HOSTTYPE}tools.a ../tools.a

tools:
	mkdir tmp-$@
	cp ${HOME}/tools/[A-Z]*.c tmp-$@
	cp ${HOME}/tools/[A-Z]*.h ${HOME}/tools/Makefile tmp-$@
	touch tmp-$@/nrutil.h
	rm -rf $@
	mv tmp-$@ $@

release: spotless
	DIR=`pwd | sed 's:^.*/::` ;\
	cd .. ;\
	tar cvf - $$DIR | compress > $$DIR.tar.Z

tidy:
	rm -rf tmp-*

clean: tidy
	cd tools; make clean
	rm -f tools.a tools/*tools.a

spotless: clean
	rm -rf ${EXECS} mkdirln tools/tools.a 
	for NAME in train test ; do \
	  for SUFFIX in dbl flt pre lna nil dur spl phn ; do \
	    rm -f $$NAME.$$SUFFIX ; \
	  done ; \
	done
	for NAME in ${EVALS} ; do \
	  rm -f $$NAME.dbl $$NAME-nog.hyp $$NAME-wpg.hyp \
	    rm-$$NAME-nog.res rm-$$NAME-wpg.res ; \
	done
	rm -rf cdrwm true rm-phone.tab pcdsril.dct [rt]m-parsed wplist.wpg
	rm -rf [rt]m-results.res [rt]m-test.dur [rt]m-train.dur *~

nuke: spotless
	rm -rf mkdirln.c ${SOURCES} tools tm-phone.tab
