% Provides various seeding methods for classification experiments.
%
% Input:
% truth - label vector, 0 is an unknown label and is not a potential seed
% feature - row-instance feature matrix
% alg - seeding algorithm name
% p - seeding algorithm parameter(s)
%
% Output:
% seed - row-instance seed matrix; L(i,j)=1 if j is a seed label for i,
% otherwise 0
%
% Author: Frank Lin (frank@cs.cmu.edu)

function seed=x_seed(truth,feature,alg,p)

% indices of known (all labeled) instances
known=find(truth);
% number of known instances
n=length(known);
% number of classes
c=max(truth);
% number of instances known to be in each class
sizes=hist(truth(known),c);

% parse the algorithm name
[ordering,picking]=strtok(alg,'-');

if strcmp(ordering,'rand') % random ordering
    order=known(randperm(length(known)));
elseif strcmp(ordering,'highdegree') % high degree ordering
    % get degree (feature) count
    degree=sum(feature,2);
    % sort by degree
    [~,index]=sort(degree(known),1,'descend');
    order=known(index);
elseif strcmp(ordering,'highpagerank') % high pagerank ordering
    % get PageRank score
    pr=mrw(normcol(feature),ones(length(truth),1),0,0.15,0.001/n,500,false);
    % sort by PR score
    [~,index]=sort(pr(known),1,'descend');
    order=known(index);
else
    fprintf('ordering option not recognized: %s\n',ordering);
end

% initialize output
seed=zeros(length(truth),c);

if strcmp(picking,'-num') % pick the top p labels as seeds
    for i=1:p
        seed(order(i),truth(order(i)))=1;
    end
elseif strcmp(picking,'-ratio') % pick the top p*n labels seeds
    for i=1:p*n
        seed(order(i),truth(order(i)))=1;
    end
elseif strcmp(picking,'-stratmin') % at least p seeds per class from top
    counts=zeros(1,c);
    i=1;
    while min(counts)<p
        next=order(i);
        seed(next,truth(next))=1;
        counts(truth(next))=counts(truth(next))+1;
        i=i+1;
    end
elseif strcmp(picking,'-stratnum') % random p seeds per class from top
    counts=zeros(1,c);
    i=1;
    while min(counts)<p
        next=order(i);
        if counts(truth(next))<p
            seed(next,truth(next))=1;
            counts(truth(next))=counts(truth(next))+1;
        end
        i=i+1;
    end
elseif strcmp(picking,'-stratratio') % random p*sizes(class) seeds per class from top
    counts=zeros(1,c);
    i=1;
    while min(counts./sizes)<p
        next=order(i);
        if counts(truth(next))/sizes(truth(next))<p
            seed(next,truth(next))=1;
            counts(truth(next))=counts(truth(next))+1;
        end
        i=i+1;
    end
else
    fprintf('picking option not recognized: %s\n',picking);
end

fprintf('seeds:');
fprintf(' %d/%d',[sum(seed);sizes]);
fprintf('\n');

end