uiopen('/Users/ggordon/Documents/Teaching/2009-fall/10-601/dolphins.csv',1) pause % spectral embedding of dolphins dd = [dolphins+1 ones(length(dolphins),1)]; maxid = max(dolphins(:))+1; A = spconvert([dd; maxid maxid 0]); A = A + A'; D = sum(A); T = diag(1./sqrt(D))*A*diag(1./sqrt(D)); [u,s,v] = svds(T, 5); X1 = u(:,2:3)*s(2:3,2:3); figure(1) plot(X1(:,1), X1(:,2), 'o', 'MarkerSize', 8, 'LineWidth', 2); axis equal; marg = .05; axis([min(X1(:,1))-marg max(X1(:,1))+marg min(X1(:,2))-marg max(X1(:,2))+marg]) print -depsc dolph.eps % "601" data X2 = [ -0.7235 0.4654 -0.8618 0.5115 -1.0276 0.5207 -1.1935 0.4286 -1.2581 0.2258 -1.2581 -0.0046 -1.2949 -0.1982 -1.1106 -0.4378 -0.8894 -0.5760 -0.7880 -0.4839 -0.5945 -0.4654 -0.4747 -0.2719 -0.5300 -0.0046 -0.7880 0.0599 -0.9631 0.0691 -1.1659 0.0138 0.1889 0.6498 0.0138 0.6682 -0.1152 0.6313 -0.2074 0.3917 -0.2258 -0.0691 -0.1982 -0.4101 -0.1429 -0.5207 0.1797 -0.5668 0.2258 -0.3733 0.4009 -0.1152 0.3364 0.1152 0.4009 0.3180 0.2903 0.6406 1.0553 0.6682 1.0645 0.4194 1.0645 0.2166 1.0645 -0.0415 1.0369 -0.2811 1.0922 -0.4654 0.9724 -0.6682 0.9724 -0.4194 0.9539 0.1152 0.9724 0.4931 -0.6325 0.4608 -0.7224 0.5170 -0.8468 0.5123 -1.0749 0.4936 -1.2062 0.4187 -1.3099 0.3205 -1.2684 0.2035 -1.2961 0.1053 -1.3306 0.0304 -1.1786 -0.1708 -1.2684 -0.2269 -1.2684 -0.2971 -1.0956 -0.4468 -1.2131 -0.4842 -1.0334 -0.5591 -0.7293 -0.5591 -0.6325 -0.3719 -0.6878 -0.2175 -0.6256 -0.2175 -0.5841 -0.1427 -0.6532 -0.0257 -0.6325 0.0351 -0.7707 0.0772 -0.8537 0.0819 -1.0956 0.0444 -0.1348 0.3205 -0.1832 0.1988 -0.1417 0.0070 -0.2316 -0.2316 -0.1417 -0.3205 -0.1279 -0.4515 0.0449 -0.5544 0.2800 -0.5450 0.3975 -0.3813 0.3422 -0.3064 0.4528 -0.1240 0.5012 -0.0164 0.4942 0.0865 -0.2316 0.2363 0.4942 0.2830 0.4113 0.4749 0.3767 0.5825 1.0196 0.3392 0.9850 0.2596 1.1094 0.1661 1.0196 0.0585 0.9159 -0.0444 0.9505 -0.2550 0.8813 -0.3205 0.8744 -0.4468 0.9643 -0.5825 -0.8675 0.5965 -1.0611 0.5591 -1.3652 0.1988 -1.2339 -0.3673 -1.1855 -0.4281 -0.2177 0.1053 0.0449 0.5591 -0.1417 0.5076 -0.1901 0.4795 -0.2108 0.5544 -0.2247 0.0912 0.3836 -0.2316 -1.1094 -0.4889 -0.9712 -0.4842 -1.2546 -0.3860 -1.3237 -0.0725 -0.7984 0.6058 -1.2477 0.3673 0.4320 -0.2363 0.2800 -0.4561 0.1694 -0.4795 -0.0588 0.5263 0.0449 0.6012 0.9712 0.3532 1.1233 0.5076 1.0196 -0.1520 ]; plot(X2(:,1), X2(:,2), 'o', 'MarkerSize', 8, 'LineWidth', 2); axis equal; marg = .05; axis([min(X2(:,1))-marg max(X2(:,1))+marg min(X2(:,2))-marg max(X2(:,2))+marg]) print -depsc 601.eps % spectral embedding of 601 data d2 = distances(X2'); A = exp(-d2*50); D = sum(A); T = diag(1./sqrt(D))*A*diag(1./sqrt(D)); [u,s,v] = svds(T, 5); X3 = u(:,2:3)*s(2:3,2:3); plot(X3(:,1), X3(:,2), 'o', 'MarkerSize', 8, 'LineWidth', 2); axis equal; marg = .05; axis([min(X3(:,1))-marg max(X3(:,1))+marg min(X3(:,2))-marg max(X3(:,2))+marg]) print -depsc 601-spectral-embed.eps % adjacency graph for 601 data clf; hold on; [gx, gy] = gplot(A>.1, X2); plot(gx, gy, 'o-', 'MarkerSize', 8, 'LineWidth', 2); [gx, gy] = gplot(A>.01, X2); plot(gx, gy, '-', 'LineWidth', 1); [gx, gy] = gplot(A>.001, X2); plot(gx, gy, '-', 'LineWidth', .2); [gx, gy] = gplot(A>.25, X2); plot(gx, gy, '-', 'LineWidth', 2.5); [gx, gy] = gplot(A>.5, X2); plot(gx, gy, '-', 'LineWidth', 3); hold off; axis equal; marg = .05; axis([min(X2(:,1))-marg max(X2(:,1))+marg min(X2(:,2))-marg max(X2(:,2))+marg]) print -depsc adjacency.eps % two-normals data n = 150; X4 = [randn(n,2); randn(n,2)+repmat([1.5 .5], n, 1)]; plot(X4(:,1), X4(:,2), 'o', 'MarkerSize', 8, 'LineWidth', 2); axis equal; marg = .05; axis([min(X4(:,1))-marg max(X4(:,1))+marg min(X4(:,2))-marg max(X4(:,2))+marg]) print -depsc 2normals.eps % k-means, memberships initialized uniformly at random X = X3; k = 3; colors = [.5 .1 .1; .1 .5 .1; .1 .1 .5; .5 .5 .1; .5 .1 .5; .1 .5 .5; .1 .1 .1; .5 .5 .5]; z = rand(size(X,1),k); [val, idx] = max(z, [], 2); z = accumarray([(1:length(idx))' idx], 1); y = zeros(k,size(X,2)); for i = 1:20 for j = 1:size(z,2) y(j,:) = mean(X(z(:,j) > .5,:)); end d2 = distances(X', y'); [ign, idx] = min(d2, [], 2); z = accumarray([(1:size(X,1))' idx(:)], 1); figure(1); clf; hold on; for j = 1:size(z,2) mask = z(:,j) > .5; plot(X(mask,1), X(mask,2), '.', 'MarkerSize', 5, 'Color', colors(j,:)); plot(y(j,1), y(j,2), 'o', 'MarkerSize', 15, 'LineWidth', 3, 'Color', colors(j,:) + .2); end hold off; axis equal; axis([min(X(:,1))-marg max(X(:,1))+marg min(X(:,2))-marg max(X(:,2))+marg]) print('-depsc',sprintf('kmeans%02d.eps', i)); pause; end % k-means v2: initialize centers randomly instead of memberships X = X4; k = 2; colors = [.5 .1 .1; .1 .5 .1; .1 .1 .5; .5 .5 .1; .5 .1 .5; .1 .5 .5; .1 .1 .1; .5 .5 .5]; y = X(1+floor(rand(k,1)*size(X,1)),:); for i = 1:20 d2 = distances(X', y'); [ign, idx] = min(d2, [], 2); z = accumarray([(1:size(X,1))' idx(:)], 1); figure(1); clf; hold on; for j = 1:size(z,2) mask = z(:,j) > .5; plot(X(mask,1), X(mask,2), '.', 'MarkerSize', 10, 'Color', colors(j,:)); plot(y(j,1), y(j,2), 'o', 'MarkerSize', 15, 'LineWidth', 3, 'Color', colors(j,:) + .2); end hold off; axis equal; axis([min(X(:,1))-marg max(X(:,1))+marg min(X(:,2))-marg max(X(:,2))+marg]) print('-depsc',sprintf('kmeans%02d.eps', i)); pause; for j = 1:size(z,2) y(j,:) = mean(X(z(:,j) > .5,:)); end end % soft k-means X = X4; k = 2; sigma = 1; colors = [.5 .5 .1; .1 .7 .7]; y = X(1+floor(rand(k,1)*size(X,1)),:); for iter = 1:20 d2 = distances(X', y'); [minval, idx] = min(d2, [], 2); zhard = accumarray([(1:size(X,1))' idx(:)], 1); d2 = d2 - repmat(minval, 1, size(d2, 2)); zsoft = exp(-d2/(2*sigma^2)); zsoft = zsoft ./ repmat(sum(zsoft,2), 1, size(zsoft,2)); figure(1); clf; hold on; for i = 1:size(X,1) color = zsoft(i,:) * colors(1:size(zsoft,2),:); plot(X(i,1), X(i,2), '.', 'MarkerSize', 10, 'Color', color); end for j = 1:size(z,2) plot(y(j,1), y(j,2), 'o', 'MarkerSize', 15, 'LineWidth', 3, 'Color', colors(j,:) + .2); end hold off; axis equal; axis([min(X(:,1))-marg max(X(:,1))+marg min(X(:,2))-marg max(X(:,2))+marg]) print('-depsc',sprintf('skmeans%02d.eps', iter)); pause; for j = 1:size(z,2) y(j,:) = sum(X .* repmat(zsoft(:,j), 1, size(X,2))) / sum(zsoft(:,j)); end end % plot clusters from k-means in original space figure(1); clf; hold on; for j = 1:size(z,2) mask = z(:,j) > .5; plot(X2(mask,1), X2(mask,2), 'o', 'MarkerSize', 8, 'LineWidth', 2, 'Color', colors(j,:)); end hold off; axis equal; axis([min(X2(:,1))-marg max(X2(:,1))+marg min(X2(:,2))-marg max(X2(:,2))+marg]) print -depsc 601-clusters.eps; % log-sum-exp function [gxs, gys] = meshgrid(-3:.333:3); surfl(gxs,gys,log(exp(gxs)+exp(gys))); set(gca, 'FontSize', 18) print -depsc lse.eps