\newcommand{\noopsort}[1]{} \newcommand{\printfirst}[2]{#1} \newcommand{\singleletter}[1]{#1} \newcommand{\switchargs}[2]{#2#1} \begin{thebibliography}{1} \bibitem{Bahl:89} L.~Bahl, P.~Brown, P.~de~Souza, and R.~Mercer. \newblock A tree-based statistical language model for natural language speech recognition. \newblock {\em IEEE Transactions on Acoustics, Speech, and Signal Processing}, 37(7), 1989. \bibitem{Brown:59a} D.~Brown. \newblock A note on approximations to discrete probability distributions. \newblock {\em Information and Control}, 2:386--392, 1959. \bibitem{Darroch:72} J.~Darroch and D.~Ratcliff. \newblock Generalized iterative scaling for log-linear models. \newblock {\em Ann. Math. Statistics}, 43:1470--1480, 1972. \bibitem{Lafferty:95} S.~Della Pietra, V.~Della Pietra, and J.~Lafferty. \newblock Inducing features of random fields. \newblock Technical report, Carnegie Mellon University Computer Science Technical Report CMU-CS-95-144, 1995. \end{thebibliography}