% Do a Poisson singular value decomposition with a constant column. % x: matrix to be analyzed % l: number of factors to find (incl the constant column) % iters: computation limit % startu, startv: starting point for optimization % Returns u (with l columns) and v (with l rows) such that (x - exp(u*v)) % is small. First column of u is constrained to be constant (which % implies that the first element of each column of v is a bias term which % will be used to make sure that exp(u*v) has the same column sums that % x does). function [u, v] = newp1svd(x, l, iters, startu, startv) % starting point for optimization (can be overridden by args) [n, d] = size(x); u = randn(n, l)*1e-5/l; v = randn(l, d)*1e-5/l; % process input parameters if (nargin < 3) iters = 50; end if (nargin >= 4) u = startu; end if (nargin >= 5) v = startv; end % first column of u is constant unif = 1/n; u(:,1) = unif * ones(n,1); % this loop can be terminated early if convergence check is passed converge = 1e-5; for iter = 1:iters % save parameters for convergence check lastu = u; lastv = v; % Newton step to improve v as a predictor of x given u for i = 1:d % residual for i-th column of x expectx = exp(u*v(:,i)); err = x(:,i) - expectx; % rescale residual by derivative of link (avoiding divide-by-zero) deriv = expectx; derivnz = deriv; derivnz(deriv == 0) = 1; adjx = u*v(:,i) + err ./ derivnz; % weighted regression (regularized in case v is underdetermined) wts = spdiags(deriv,0,n,n); covar = (u' * wts * u + 1e-8*speye(l)); v(:,i) = covar \ (u' * wts * adjx); end % Newton step to improve u as a predictor of x given v for i = 1:n % residual for i-th row of x expectx = exp(u(i,:)*v); err = x(i,:) - expectx; % rescale residual by derivative of link (avoiding divide-by-zero) deriv = expectx; derivnz = deriv; derivnz(deriv == 0) = 1; adjx = u(i,:)*v + err ./ derivnz; % Weighted regression (regularized in case u is underdetermined). Note % that we modify our update to leave u(:,1) unchanged: the update for u % would have been the solution to the (weighted) normal equations % [u(i,:) * covar == adjx * wts * v'] % but the constraint u(i,1)=const introduces a Lagrange multiplier that % lets us drop the first column of the normal equations. (And the % first row goes away too since we just plug in the current value for % u(i,1).) wts = spdiags(deriv',0,d,d); covar = (v * wts * v' + 1e-8*speye(l)); target = adjx * wts * v(2:end,:)' - u(i,1) * covar(1,2:end); u(i,2:end) = target / covar(2:end, 2:end); end % rescale so that u and v have comparable norm in corresponding % rows/columns -- this probably isn't necessary, but might help us from % getting accidental underflow on u or v, and makes the output look % pretty. usum = sum(abs(u),1); vsum = sum(abs(v),2); scale = sqrt(usum' ./ vsum); u = u .* repmat(1./scale', [n 1]); v = v .* repmat(scale, [1 d]); % check convergence curnorm = sum(sum(exp(u*v))) / numel(x); change = sum(sum(abs(exp(u*v)-exp(lastu*lastv)))) / numel(x); fprintf('%3d: %g %g\n', iter, curnorm, change); if (i > 1 && (change / (curnorm + converge) < converge)) break; end end