clear all;

cumulativeAccuracy = 0;

%This is a configurable parameter of proactive-DIEL. In all our experiments
%we set this to -0.35.
penaltyParam = -0.35;

for instance = 1:200
sequence = 1;
instance


load(strcat('Referral/datasetLite',num2str(instance),'.mat'));
load('sequence1.mat');

ADetailed = zeros(100, 5000);
ANum = zeros(500,1);
AMean = zeros(5000,1);


referredExpert = 0;

A = zeros(5000,1);

rewardsMean = zeros(100, 10, 100);
rewardsSqSum = zeros(100,10,100);
rewardsNumObs = 2*ones(100,10,100);
explicitBid = zeros(100,10,100);

%To store sample mean 
rewardsZeros = zeros(100,10,100);
rewardsOnes = zeros(100,10,100);


for i = 1:100
    numCandidates = referralCandidates(i,101);
    candidates = referralCandidates(i,1:numCandidates);
    budget = 2 * numCandidates;
    [rewardsMean,rewardsSqSum, explicitBid]  = initPriorBoundedAusAI(i, topicMean(i,:), candidates, budget, rewardsMean, rewardsSqSum, explicitBid);
end


initialRewardsMean = rewardsMean;
iterLength = 500000;



tic
totalReward = 0;


for iter = 1: iterLength

    e1 = expertQuestionPair(iter,1);
    q = expertQuestionPair(iter,2);
    topic = questionTopic(q);
    
    
    ANum(e1) = ANum(e1) + 1;
    
    %Get the actual expertise.
    actualExpertise = getExpertiseShort(topicMean(e1,topic), topicVariance(e1,topic));
    
    %If the expert solves the question 
    r = rand(1,1);
    
    if(r <= actualExpertise)
        totalReward = totalReward + 1;
        ADetailed(e1,ANum(e1)) = 1;
        
    else

        %We need to refer it to another expert. For which we have to find
        %the best expert according to the expected reward.
        
        %Get the candidates 
        numCandidates = referralCandidates(e1,101);
        candidates = referralCandidates(e1,1:numCandidates);
        
        bestExpectedReward = 0;
        bestExpert = 0;
           
        for k=1:numCandidates

                numObservations = rewardsNumObs(e1,topic,candidates(k));                
                meanReward = rewardsMean(e1,topic,candidates(k)); 
                
                stdReward = rewardsSqSum(e1,topic,candidates(k));
                stdReward = stdReward / (numObservations - 1);
                stdReward = realsqrt(stdReward);
                
                factor = realsqrt(numObservations);               
                
                expectedReward = meanReward +  stdReward/factor; 
                
                if(expectedReward >= bestExpectedReward)
                    bestExpectedReward = expectedReward;
                    bestExpert = candidates(k);
                end
        end
        
        e2 = bestExpert;
        
        %Get the actual expertise.
        actualExpertise = getExpertiseShort(topicMean(e2,topic), topicVariance(e2,topic));
    
        %Check if the referred expert solves the question 
        r = rand(1,1);
        
        %Increase the number of observations      
        rewardsNumObs(e1,topic,e2) = rewardsNumObs(e1,topic,e2) + 1;
       
        reward  = 0;
        if(r <= actualExpertise)
            rewardsOnes(e1,topic,e2) = rewardsOnes(e1,topic,e2) + 1;
            totalReward = totalReward + 1;
            ADetailed(e1,ANum(e1)) = 1;
            
            reward = 1;
        else
            rewardsZeros(e1,topic,e2) = rewardsZeros(e1,topic,e2) + 1; 
            %Decide if you want to negatively penalized or not 
            if(explicitBid(e1,topic,e2) == 1)
                penaltyProb = initialRewardsMean(e1,topic,e2);
            else
                penaltyProb = rewardsOnes(e1,topic,e2) / (rewardsZeros(e1,topic,e2) + rewardsOnes(e1,topic,e2));
            end
        
            r = rand(1,1);
            if(r <= penaltyProb)
                reward = penaltyParam;
            end
        end
        
        %Update mean and variance 
        oldMean = rewardsMean(e1,topic,e2);
        oldSqSum = rewardsSqSum(e1,topic,e2);
        
        delta = reward - oldMean;
        newMean = oldMean + delta / rewardsNumObs(e1,topic,e2);
        newSqSum = oldSqSum + delta * (reward - newMean);
        
        rewardsMean(e1,topic,e2) = newMean;
        rewardsSqSum(e1,topic,e2) = newSqSum;
        
    end
    
    %Update if necessary 
    if(mod(iter,100) == 0)
        A(iter/100) = totalReward;
        totalReward = 0;
    end

    
end


toc

%This gives a rough estimate of the overall performance 
cumulativeAccuracy = cumulativeAccuracy + mean(A(4900:5000));
mean(A(4900:5000))
cumulativeAccuracy/instance


save(strcat('ReferralResults/ProactiveDIELAusAI2016',num2str(instance),'seq',num2str(sequence),'.mat'),'ADetailed','ANum'); 
end
 
