% A version of the Euclidean k-means algorithm where the several trials are
% ran and one with the least within-cluster sum of squares (WCSS) is chosen
% as the final output
%
% Input:
% data - row-instance data matrix
% k - number of clusters
% trials - number of trials to run before determining best clustering
% implementation - which specific k-means implementation to use
%
% implementation:
% 'm' - one that comes with matlab
% 'cl' - one by Chen & Lin (wychen@alumni.cs.ucsb.edu)
%
% Output:
% pred - predicted cluster labels
%
% Author: Frank Lin (frank@cs.cmu.edu)

function [pred]=kmeans_wcss(data,k,trials,implementation)

fprintf('running %d k-means trials\n',trials);

% initialize wcss and labels
minwcss=0;
pred=zeros(0);

for i=1:trials
    
    % run k-means
    if strcmp(implementation,'m')
        % MATLAB version
        [IDX,~,SUMD]=kmeans(data,k,'emptyaction','singleton');
        wcss=sum(SUMD);
    elseif strcmp(implementation,'cl')
        % Chen & Lin version
        [IDX,SUMD]=kmeans_cl(data,'random',k);
        wcss=full(SUMD);
    else
        fprintf('implementation not recognized: %s\n',implementation)
    end
    
    % update min wcss
    if minwcss==0||wcss<minwcss
        minwcss=wcss;
        pred=IDX;
    end
    
end

fprintf('k-means min wcss: %e\n',minwcss);

end