% This function is used to setup various datasets, and call
% the convex program.  
% This program will not run by itself.
% I kept it here for your reference only.  
% Note the data directories has changed. 
% I only included the code for kernel_type='flex'.

function runme(kernel_type)


% We need Y to be 0/1, not -1/1.  For multiclasses, each row is an indicator vector e.g. [0 0 1 0].

% --------------------------------------------------------------
% ICML'03 1vs2 dataset, 10NN unweighted graph
Wfilename = '/afs/cs.cmu.edu/user/zhuxj/semi_supervised_datasets/one_vs_two/one_vs_two_10NN_unweighted_graph.mat'
Yfilename = '/afs/cs.cmu.edu/user/zhuxj/semi_supervised_datasets/one_vs_two/Y'
force_unweighted = 1
task = '1vs2_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 10:10:50 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% --------------------------------------------------------------
% ICML'03 pc vs. mac dataset, 10NN unweighted graph
%Dfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/pcmac/with_header/innerprod.txt'
Wfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/pcmac/with_header/graph_10NNw.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/pcmac/with_header/Y.txt'
force_unweighted = 1
task = 'pcmac_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 10:20:100 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% --------------------------------------------------------------
% ICML'03 religion vs. atheism dataset, 10NN unweighted graph
%Dfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/religionatheism/with_header/innerprod.txt'
Wfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/religionatheism/with_header/graph_10NNw.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/religionatheism/with_header/Y.txt'
force_unweighted = 1
task = 'religionatheism_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 10:20:100 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% --------------------------------------------------------------
% ICML'03 baseballhockey dataset, 10NN unweighted graph
%Dfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/baseballhockey/with_header/innerprod.txt'
Wfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/baseballhockey/with_header/graph_10NNw.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/20newsgroups/baseballhockey/with_header/Y.txt'
force_unweighted = 1
task = 'baseballhockey_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 10:20:100 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% --------------------------------------------------------------
% ICML'03 odd vs. even dataset, 10NN unweighted graph
Wfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/odd_vs_even/10NNunweighted.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/odd_vs_even/Y.txt'
force_unweighted = 1
task = 'oddeven_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 10:20:100 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

%--------------------------------------------------------------
%ICML'03 ten digits dataset (multi-class), 10NN unweighted graph
Wfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/handwrittendigits/10NNunweighted.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-4/semi_supervised/semi_supervised_datasets/handwrittendigits/Y.mat'
force_unweighted = 1
task = 'tendigits_10NNunweighted'
m = 200	% number of eigenvectors
Lsizes = 50:50:250 % labeled set sizes
trials = 30     % number of random trials per size
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% --------------------------------------------------------------
% ISOLET, 26 classes, 100NN unweighted graph
Wfilename = '/afs/cs.cmu.edu/project/lemur-5/more_semisupervised_data/isolet/isolet_100NNunweighted.mat'
Yfilename = '/afs/cs.cmu.edu/project/lemur-5/more_semisupervised_data/isolet/Y.mat'
force_unweighted = 1
m = 200	% number of eigenvectors
Lsizes = 50:50:250 % labeled set sizes
trials = 30     % number of random trials per size
task = 'isolet_100NNunweighted'
innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)



% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function innerfunction_trial(Wfilename, Yfilename, force_unweighted, m, Lsizes, trials, task, kernel_type)

% find the optimal SVM slack variable upper bound
svmCxvalidfilename = sprintf('xvalidate_svmC_output_%s_%s.mat', task, kernel_type)
load(svmCxvalidfilename);
[tmpy tmpi]=max(output(:,4));
svmC = output(tmpi,3)


savefilename = sprintf('output_%s_%s.mat', task, kernel_type)

% --------- common variables ------------------------------------

% load the true labels
load(Yfilename);
% Check to make sure Y is 0/1
if (min(min(Y))~=0 || max(max(Y))~=1)
	error('Y must be 0/1. Quit.');
end
% if binary problem, add an complementary column
if (size(Y,2)==1),
	Y = [Y 1-Y];
end

n = size(Y, 1)

original_Y = Y; clear Y;

% load spectrum file, if we've computed it before
spectrumfile = sprintf('./spectrum_dir/%s.spectrum', task);
if (exist(spectrumfile, 'file'))
	load(spectrumfile, '-mat');
else
	% load the graph, create graph Laplacian
	load(Wfilename);

	if (force_unweighted == 1)
		W = (W~=0); % make the weighted graph unweighted.
	end

	D=diag(sum(W));
	L=D-W;

	% compute the first m eigenvectors of the Laplacian with smallest eigenvalues.
	[vec val flag]=eigs(L, m, 'SM');
	% sort the eigensystem by eigenvalues from small to large
	%       [original_eigenvalues tmpi] = sort(diag(val));
	% NOTE: for ten digits dataset, somehow the smallest eigenvalue is not zero
	% but a negative number.   I think it's a problem with eigs().  For convenience,
	% I'll just take the absolute value here.
	[original_eigenvalues tmpi] = sort(diag(abs(val)));
	original_eigenvectors = vec(:,tmpi);
	clear W D L vec val

	save(spectrumfile, 'original_eigenvalues', 'original_eigenvectors');
end




%rand('state',sum(100*clock))
counter=1; % used to collect output
% labeled set size
for Lsize = Lsizes,
   for trial = 1:trials,

	output(counter,1)=Lsize;
	output(counter,2)=trial;

	permfile = sprintf('./spectrum_dir/%s.L%02d.trial%02d.perm', task, Lsize, trial);
	if (exist(permfile, 'file'))
		fprintf('loading existing permfile %s\n', permfile);
		load(permfile, '-mat');
	else
		fprintf('creating new permfile %s\n', permfile);
		% randomly sample labeled data by permutation
		% but if a training sample is missing some classes, we resample.
		perm = randperm(n);
		while (nnz(sum(original_Y(perm(1:Lsize),:))) ~= size(original_Y,2)),
			perm = randperm(n);
		end
		sum(original_Y(perm(1:Lsize),:))   % debug
		save(permfile, 'perm');
	end

	% permute everything
	% so that (1:Lsize) will be labeled data
	vec = original_eigenvectors(perm,:);
	Y = original_Y(perm,:);

        % create yy' submatrix on training data, convert it to +1/-1.
        YYtrans_tr = Y(1:Lsize,:)*Y(1:Lsize,:)';
        T = 2*YYtrans_tr - 1;

	% depending on the kernel type, call different functions
	if (strcmp(kernel_type, 'flex'))
		% find the constant eigenvector(s -- numerically there might be more than one)
		constant_eigenvector_indices = find(std(original_eigenvectors)<1e-10);
		[mu, mycputime] = flexible_kernel(m, vec(1:Lsize,:), T, constant_eigenvector_indices);
		outputmu(counter,:) = mu';
		output(counter,5) = mycputime;
	elseif (strcmp(kernel_type, 'diffusion'))
		% find the best hyperparameter that maximize the alignment
		% note: fminbnd() was unable to find the optimal for the isolet task
		% which is hyperparameter = 0.  Instead a nonsense answer (around 60) was returned.
		% for this reason, we force the optimal answer for the isolet task.
		if (strcmp(task, 'isolet_100NNunweighted'))
			hyperparameter = 0;
		else
			hyperparameter = fminbnd(@neg_align_diffusion, 0, 100, [], original_eigenvalues, vec(1:Lsize,:), T)
		end
		mu = exp( - hyperparameter/2 * original_eigenvalues);
		outputmu(counter,:) = mu';
		output(counter,5) = hyperparameter;
	elseif (strcmp(kernel_type, 'gf'))
		% find the best hyperparameter that maximize the alignment
		% notice: if we remove the constant eigenvector, then we might need to allow the lower bound
		% to be negative, so that we can bring the second eigenvalue close to zero.
		hyperparameter = fminbnd(@neg_align_gf, -min(original_eigenvalues)+1e-6, 10, [], original_eigenvalues, vec(1:Lsize,:), T)
		mu = 1./(original_eigenvalues + hyperparameter);
		outputmu(counter,:) = mu';
		output(counter,5) = hyperparameter;
	elseif (strcmp(kernel_type, 'no_order'))
		[mu, mycputime] = no_order(m, vec(1:Lsize,:), T);
		outputmu(counter,:) = mu';
		output(counter,5) = mycputime;
	elseif (strcmp(kernel_type, 'flexCC'))
		% like 'flex', but also constraint the constant eigenvector.
		[mu, mycputime] = flexibleCC_kernel(m, vec(1:Lsize,:), T);
		outputmu(counter,:) = mu';
		output(counter,5) = mycputime;
	else
		error('Unknown kernel type. Quit.');
	end


	% construct the kernel
	K=vec*diag(mu)*vec';

	% no longer normalize the kernel
	%%% K = normalizeK(K);

	% compute the alignment the hard way, for debug
	output(counter,3) = ...
	   sum(sum(K(1:Lsize,1:Lsize).* T))/...
	     sqrt(sum(sum(K(1:Lsize,1:Lsize).*K(1:Lsize,1:Lsize))) * sum(sum(T.^2)));

	% ---------------------------------------------
	% compute SVM performance with this kernel K
	cwd=cd;
	cd /afs/cs/usr/zhuxj/misc/svm

	% if it's binary problem, only need to classify the first column
	if (size(Y,2)==2)
		% convert Y to +1/-1
		[nsv alpha bias] = svc(K,2*Y(1:Lsize,1)-1, svmC);  
		predictedY = svcoutput(K,2*Y(1:Lsize,1)-1,alpha,bias);
		acc_svm = sum([(2*Y(Lsize+1:n,1)-1) == predictedY])/(n-Lsize);
	% otherwise we do 1-against-all
	else
		clear one_against_all;
		for class = 1:size(Y,2),
			[nsv alpha bias] = svc(K,2*Y(1:Lsize,class)-1, svmC);  
			one_against_all(:,class) = jerry_svcmargin(K,2*Y(1:Lsize,class)-1,alpha,bias);
		end
		% classficiation is correct, if the correct class has largest value
		acc_svm = sum(max(one_against_all .* Y(Lsize+1:n,:), [], 2) ==  max(one_against_all, [], 2))/(n-Lsize);
	end

	output(counter,4) = acc_svm;
	cd(cwd)
	% end SVM
	% ---------------------------------------------

	% debug
	format short g
	output

	counter = counter + 1;
	save tmpoutput.mat output outputmu
   end % for trial
end % for Lsize

save(savefilename, 'output', 'outputmu');
