clear all;

cumulativeAccuracy = 0;

for instance = 1:1000
instance
sequence = 1;

%ADetailed's i'th row stores the reward-sequence of i'th expert
%ANum stores the length of the sequence
ADetailed = zeros(100, 5000);
ANum = zeros(500,1);
AMean = zeros(5000,1);


A = zeros(5000,1);
iterLength = 500000;


load('tinv90.mat');
load(strcat('Referral/datasetLite',num2str(instance),'.mat'));
load('sequence1.mat');


rewardsMean = zeros(100, 10, 100);
rewardsSqSum = zeros(100,10,100);
rewardsNumObs = zeros(100,10,100);



%Initialize mean, variance 
rewardsMean(:,:,:) = deal(0.5);
rewardsSqSum(:,:,:) = deal(0.5);
rewardsNumObs(:,:,:) = deal(2);

tic
totalReward = 0;

for iter = 1: iterLength

    e1 = expertQuestionPair(iter,1);
    %e1 = randi(100,1,1);
    

    q = expertQuestionPair(iter,2);
    topic = questionTopic(q);
    
    
    ANum(e1) = ANum(e1) + 1;
    
    %Get the actual expertise of the initial expert
    actualExpertise = getExpertiseShort(topicMean(e1,topic), topicVariance(e1,topic));
    
    %If the initial expert solves the question 
    r = rand(1,1);
        
    if(r <= actualExpertise)
        totalReward = totalReward + 1;
        ADetailed(e1,ANum(e1)) = 1;
        
    else
        
        %Get the candidates 
        numCandidates = referralCandidates(e1,101);
        candidates = referralCandidates(e1,1:numCandidates);
        
        %Initially, best expected reward is zero
        bestExpectedReward = 0;       

        for k=1:numCandidates

                numObservations = rewardsNumObs(e1,topic,candidates(k));                
                meanReward = rewardsMean(e1,topic,candidates(k)); 
                
                stdReward = rewardsSqSum(e1,topic,candidates(k));
                stdReward = stdReward / (numObservations - 1);
                stdReward = realsqrt(stdReward);                
                
                factor = realsqrt(numObservations);             
         
                expectedReward = meanReward + stdReward/factor*(tinv90(numObservations - 1)); 
                
                              
                if(expectedReward >= bestExpectedReward)
                    bestExpectedReward = expectedReward;
                    bestExpert = candidates(k);
                end
        end
        
        e2 = bestExpert;
 
        %Get the actual expertise of the referred expert
        actualExpertise = getExpertiseShort(topicMean(e2,topic), topicVariance(e2,topic));
    
        %Check if the referred expert solves the question 
        r = rand(1,1);
        
        %Increment the number of observations      
        rewardsNumObs(e1,topic,e2) = rewardsNumObs(e1,topic,e2) + 1;
       
        reward  = 0;
        if(r <= actualExpertise)
            totalReward = totalReward + 1;
            
            ADetailed(e1,ANum(e1)) = 1;
            reward = 1;                             
        end
        
        %Update mean and variance 
        oldMean = rewardsMean(e1,topic,e2);
        oldSqSum = rewardsSqSum(e1,topic,e2);
        
        delta = reward - oldMean;
        newMean = oldMean + delta / rewardsNumObs(e1,topic,e2);
        newSqSum = oldSqSum + delta * (reward - newMean);
        
        rewardsMean(e1,topic,e2) = newMean;
        rewardsSqSum(e1,topic,e2) = newSqSum;
        
    end
    
    %This update if for debugging purpose 
    if(mod(iter,100) == 0)
        A(iter/100) = totalReward;
        totalReward = 0;
    end

    
end


toc

%A debug-purpose rough estimate of the overall performance 
cumulativeAccuracy = cumulativeAccuracy + mean(A(4900:5000));
mean(A(4900:5000))
cumulativeAccuracy/instance


save(strcat('ReferralResults/ECAI2016',num2str(instance),'seq',num2str(sequence),'.mat'),'ADetailed','ANum'); 
end
 
