;;; Geoff Gordon, 1995 ;;; ;;; EM example: mixture of two normals ;;; Fit means, std devs, and mixing proportion to a given sample ;;; ;;; (setf x (test-sample 30 20 1 1 3)) ;;; (em x) ;;; ;;; Only argument which should need explanation is :sigmas in em, ;;; which describes how to estimate variances, and takes these values: ;;; :fix -- set them to a random reasonable value and keep them there ;;; :pool -- pooled variance estimate: variances are assumed to be ;;; equal, and are estimated from mean squared error ;;; :indiv -- not recommended: this shows how one might try to estimate ;;; each variance individually, but since the likelihood can be ;;; infinite if one of the variances is 0, the max likelihood ;;; estimate is undefined in this case ;;; (a . b) -- same as :indiv, but use an inverse gamma prior with ;;; parameters a & b for each variance -- this removes the infinite ;;; likelihood problem by penalizing for near-zero variances ;;; ;;; Gets initial estimates by picking two random data points, setting ;;; the two means to these points, and setting the standard deviations ;;; to the difference between the two initial means. A smarter method ;;; could make convergence faster. ;;; Generate a normal deviate with mean mu and standard deviation sigma. (defun normal (&optional (mu 0) (sigma 1)) (let ((u (random 1.0)) (v (random 1.0))) (+ mu (* sigma (sqrt (* -2 (log (- 1.0 u)))) (cos (* 2 pi v)))))) ;;; Generate n iid samples from a normal distribution (defun normal-sample (n &rest norm-opts) (let (vec) (dotimes (i n) (push (apply #'normal norm-opts) vec)) vec)) ;;; Standard normal density (defun normal-density (x) (* 0.3989422804014327d0 (exp (* x x -.5d0)))) ;;; Sample from a mixture of two normals, means differ by mu (defun test-sample (n1 n2 s1 s2 mu) (let ((x1 (normal-sample n1 0 s1)) (x2 (normal-sample n2 mu s2))) (append x1 x2))) (defun em (xs &key (maxiter 30) (sigmas :pool)) (loop with n = (length xs) with mu1 = (elt xs (random n)) and mu2 = (elt xs (random n)) with sigma1 = (abs (- mu1 mu2)) with sigma2 = sigma1 with mix = .5 for iter from 1 to maxiter for ps = (loop for x in xs for p1 = (* (/ mix sigma1) (normal-density (/ (- x mu1) sigma1))) for p2 = (* (/ (- 1 mix) sigma2) (normal-density (/ (- x mu2) sigma2))) collect (/ p1 (max (+ p1 p2) 1e-10))) do (loop for x in xs for p in ps sum (* x p) into sum1 sum p into n1 sum (* x (- 1 p)) into sum2 sum (- 1 p) into n2 finally (setf mix (/ n1 n)) (setf mu1 (/ sum1 (max n1 1e-10))) (setf mu2 (/ sum2 (max n2 1e-10)))) (loop for x in xs for p in ps sum (* p (* (- x mu1) (- x mu1))) into ssq1 sum p into n1 sum (* (- 1 p) (* (- x mu2) (- x mu2))) into ssq2 sum (- 1 p) into n2 finally (cond ((eq sigmas :fix) ; don't estimate variances ) ((eq sigmas :pool) ; pooled variance estimate (setf sigma1 (setf sigma2 (sqrt (/ (+ ssq1 ssq2) n))))) ((eq sigmas :indiv) ; unpooled -- doesn't work well (setf sigma1 (sqrt (/ ssq1 (max n1 1e-10)))) (setf sigma2 (sqrt (/ ssq2 (max n2 1e-10))))) (t ; unpooled with inverse gamma prior (setf sigma1 (sqrt (/ (+ (car sigmas) ssq1) (+ (cdr sigmas) n1)))) (setf sigma2 (sqrt (/ (+ (car sigmas) ssq2) (+ (cdr sigmas) n2))))) )) (format t "Iter ~3D: mu1 ~9,3F mu2 ~9,3F s1 ~9,3F s2 ~9,3F p ~,4F~%" iter mu1 mu2 sigma1 sigma2 mix) finally (return (values mu1 mu2 sigma1 sigma2 mix))))