% sample usage: test_dfc(3, 1500, 200, 5, [.1 .2], .2, .25, 1, 1) 
% k = number of subspaces (i.e., number of clusters)
% d = dimensionality of data points
% n_i = number of data points from each subspace
% r_i = rank of each subspace 
% outlier_frac = percentage of outlier points
% lambda = regularizer in lrr optimization
% samp_frac = percentage of sampled columns for DFC
% do_full = run LRR?
% do_dfc = run DFC?
function test_dfc(k, d, n_i, r_i, outlier_vec, lambda, samp_frac, do_full, do_dfc)
if nargin < 9
  do_dfc = 1;
end
tolerance = 1e-4;
n_xo = n_i * k; 
num_trials=3;
ind_trial = 0;
disp(['perc outliers:  ' num2str(outlier_vec)]);
for trial = 1:num_trials
  ind_trial = ind_trial+1;
  ind_out = 0;
  for out_frac=outlier_vec
    fprintf(1, 'outfrac=%.2f\n', out_frac);
    ind_out = ind_out + 1;
    n_outliers = round((n_xo / (1 - out_frac)) - n_xo);
    [X, true_subspaces, true_outliers] = generate_data(k, d, n_i, r_i, n_outliers, ind_trial);
    if do_full
      [Z,E] = run_lrr(X,X, lambda); 
      [success(ind_trial,ind_out), err(ind_trial,ind_out)] = ...
                    check_success(Z,E,X,true_subspaces, true_outliers, tolerance);
      fprintf(1, '  base: success = %d, error =%.2e\n', success(ind_trial,ind_out), err(ind_trial,ind_out));
    end
    if do_dfc
      [Z,E_dfc] = run_lrr_dfc(X, lambda, samp_frac, k*r_i); 
      [success_dfc(ind_trial,ind_out), err_dfc(ind_trial,ind_out)] = ...
                    check_success(Z,E_dfc,X,true_subspaces, true_outliers, tolerance);
      fprintf(1, '  dfc: success = %d, error =%.2e\n', success_dfc(ind_trial,ind_out), err_dfc(ind_trial,ind_out));
    end
  end
  if ind_trial > 1 
    fprintf(1, 'num trials = %d\n', ind_trial);
    if do_full
      disp(mean(success));
      disp(mean(err));
    end
    if do_dfc  
      disp(mean(success_dfc));
      disp(mean(err_dfc));
    end
  end
end

function [success, err] = check_success(Z,E,X,true_subspaces, true_outliers, tolerance)
%outlier detection
norm_E = sqrt(sum(E.^2,1));
norm_X = sqrt(sum(X.^2,1));
estimated_outliers = norm_E>tolerance*norm_X;
% numerator: cardinality of intersection of true and estimated outliers
% denominator: cardinality of union of true and estimated outliers
acc = sum(estimated_outliers & true_outliers) / sum(estimated_outliers | true_outliers);
if acc >=1
  %disp(['outlier detection rate: ' num2str(100*acc) '%']);
  [U,S,V] = svd(Z,'econ');
  S = diag(S);
  r = sum(S>tolerance*S(1));
  U = U(:,1:r);
  estimated_subspaces = U*U';
  estimated_subspaces = estimated_subspaces(~estimated_outliers,~estimated_outliers);
  err = norm(estimated_subspaces-true_subspaces,2);
  if err<tolerance 
    success = 1;    
  else
    success = 0;
    %disp(['error: ' num2str(err)]);
    %disp(['outlier detection rate: ' num2str(100*acc) '%']);
  end
else
    success = 0;
    err = -1;
    %disp(['outlier detection rate: ' num2str(100*acc) '%']);
end

function[Z,E] = run_lrr_dfc(X, lambda, samp_frac, true_rank)
% partition columns
t = ceil(1/samp_frac);
partitions = partition(1:size(X,2), t); 
% run lrr on each subproblem
Z = [];
for i = 1:t
  C = X(:, partitions{i});
  scaling_term = sqrt(size(X,2)/size(C,2));
  % rescale lambda?? 
  [C_hat, E_hat] = run_lrr(C, X, scaling_term * lambda);
  if i == 1
    [U,S,V] = svd(C_hat,'econ');
    proj = U * U';
  else
    C_hat = proj * C_hat;
  end
  Z = [Z C_hat];
end
E = X - X*Z;

function[Z,E] = run_lrr(X, A, lambda)
%%% for efficiency 
[U,S,Q] = svd(A,'econ');
S = diag(S);
r = sum(S>1e-4*S(1));
Q = Q(:,1:r);
B = A*Q;
[Z,E] = lrra(X,B,lambda,false);
Z = Q * Z;

function[X, true_subspaces, true_outliers] = generate_data(k, d, n_i, r_i, n_outliers, my_seed)
rand('state', my_seed);
randn('state', my_seed);
%generate data
[U,S,V] = svd(randn(d));
R = orth(randn(d));
U1 = U(:,1:r_i);
X = U1*rand(r_i,n_i);
for i=2:k
    U1 = R*U1;
    X = [X,U1*rand(r_i,n_i)];
end
%generate outliers and concatenate them to original data
N = mean(mean(abs(X)))*randn(d,n_outliers);
X = [X,N];
%disp(['outlier fraction: ' num2str(100*n_outliers/size(X,2)) '%']);
true_outliers = false(1,size(X,2));
true_outliers(end-n_outliers+1:end) = true;
% permute results (so that dfc sampling is possible) 
my_perm = randperm(size(X,2));
X = X(:,my_perm);
true_outliers = true_outliers(my_perm);
X0 = X(:,true_outliers<1);
[U0,S0,V0] = svd(X0,'econ');
S0 = diag(S0);
r = sum(S0>1e-4*S0(1));
V0 = V0(:,1:r);
true_subspaces = V0*V0';
