clear all;

CumulativeResult = 0;

%The two configurable parameters of proactive DIEL_t 
C1 = 50; 
C2 = 1;

for instance = 1:1000
sequence = 1;
instance

load(strcat('Referral/datasetLite',num2str(instance),'.mat'));
load('sequence1.mat');

ADetailed = zeros(100, 5000);
ANum = zeros(500,1);
AMean = zeros(5000,1);

A = zeros(5000,1);

rewardsMean = zeros(100, 10, 100);

rewardsSqSum = zeros(100,10,100);
rewardsNumObs = zeros(100,10,100);
explicitBid = zeros(100,10,100);

%To store sample mean 
rewardsZeros = zeros(100,10,100);
rewardsOnes = zeros(100,10,100);

%Initialize number of observations, mean and variance is introduced in 
%initPriorBoundedEUMAS method 
rewardsNumObs(:,:,:) = deal(2);

for i = 1:100
    numCandidates = referralCandidates(i,101);
    candidates = referralCandidates(i,1:numCandidates);
    budget = 2 * numCandidates;
    [rewardsMean,rewardsSqSum, explicitBid]  = initPriorBoundedEUMAS(i, topicMean(i,:), candidates, budget, rewardsMean, rewardsSqSum, explicitBid);
end



initialRewardsMean = rewardsMean;
iterLength = 500000;
referral = 0;

tic

totalReward = 0;

for iter = 1: iterLength

    e1 = expertQuestionPair(iter,1);
    q = expertQuestionPair(iter,2);
    topic = questionTopic(q);
    
    
    ANum(e1) = ANum(e1) + 1;
    
    %Get the actual expertise.
    actualExpertise = getExpertiseShort(topicMean(e1,topic), topicVariance(e1,topic));
    
    %If the expert solves the question 
    r = rand(1,1);
        
    if(r <= actualExpertise)
        totalReward = totalReward + 1;
        ADetailed(e1,ANum(e1)) = 1;
        
    else
        %We need to refer it to another expert. For which we have to find
        %the best expert according to the expected reward.
       
        %Get the candidates 
        numCandidates = referralCandidates(e1,101);
        candidates = referralCandidates(e1,1:numCandidates);
        
        bestExpectedReward = 0;
        
        %Randomize the sequence of experts 
        expertSequence = randperm(numCandidates);
        
                
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DIEL Block
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        
        
        for k=1:numCandidates

                numObservations = rewardsNumObs(e1,topic,candidates(expertSequence(k)));                
                meanReward = rewardsMean(e1,topic,candidates(expertSequence(k))); 
                
                stdReward = rewardsSqSum(e1,topic,candidates(expertSequence(k)));
                stdReward = stdReward / (numObservations - 1);
                stdReward = realsqrt(stdReward);
                
                
                factor = realsqrt(numObservations);
                
                expectedReward = meanReward + stdReward/factor;
                                              
                if(expectedReward >= bestExpectedReward)
                    bestExpectedReward = expectedReward;
                    bestExpert = candidates(expertSequence(k));
                end
        end
        
        
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%        
        
        e2 = bestExpert;
       
     
        %Get the actual expertise.
        actualExpertise = getExpertiseShort(topicMean(e2,topic), topicVariance(e2,topic));
    
        %If the expert solves the question 
        r = rand(1,1);
        
        %Increase number of observations      
        rewardsNumObs(e1,topic,e2) = rewardsNumObs(e1,topic,e2) + 1;
       
        reward  = 0;
        if(r <= actualExpertise)
            rewardsOnes(e1,topic,e2) = rewardsOnes(e1,topic,e2) + 1;
            totalReward = totalReward + 1;
            ADetailed(e1,ANum(e1)) = 1;
             
            reward = 1;
        else
            rewardsZeros(e1,topic,e2) = rewardsZeros(e1,topic,e2) + 1; 
            reward = 0;
        end
        
        %%Trustfactors is a 2x3 matrix. For both explicit bids, a row stores 
        % the absolute difference of the advertised and observed means, the
        % advertised mean (this is required to identify the best skill),
        % and the number of observed samples. 
        
        trustFactors = zeros(2,3); 
        skillIter = 0;
        totalSamples = 0;
        implicitSamples = 0;
        
        
        for topicIter = 1:10 
            numSamples = rewardsZeros(e1,topicIter,e2) + rewardsOnes(e1,topicIter,e2);
            totalSamples = totalSamples + numSamples;
            if(explicitBid(e1,topicIter,e2) == 1)
                 skillIter = skillIter + 1;
                        
                 if(numSamples == 0)
                    estimatedMean = initialRewardsMean(e1,topicIter,e2);
                 else
                    estimatedMean = rewardsOnes(e1,topicIter,e2) / numSamples;
                 end
                        
                 discrepancy = abs(initialRewardsMean(e1,topicIter,e2) - estimatedMean);
                                               
                 trustFactors(skillIter,1) = discrepancy; % |mu_advertised - mu_observed|
                 trustFactors(skillIter,2) = initialRewardsMean(e1,topicIter,e2); %|mu_advertised|
                 trustFactors(skillIter,3) = numSamples;% n_best(or secondBest)
            end
        end
        
        
        if(trustFactors(1,2) >= trustFactors(2,2))%%This part figures out which one is the best skill
            distrust = (trustFactors(1,1) * trustFactors(1,3))/ (trustFactors(1,3) + C1) + (trustFactors(2,1) * (trustFactors(2,3)))/ (trustFactors(2,3) + C1); 
        else
            distrust = (trustFactors(2,1) * trustFactors(2,3))/ (trustFactors(2,3) + C1) + (trustFactors(1,1) * (trustFactors(1,3)))/ (trustFactors(1,3) + C1); 
        end
            
        reward  = reward - (C2 * distrust);
          
        %Update mean and variance 
        oldMean = rewardsMean(e1,topic,e2);
        oldSqSum = rewardsSqSum(e1,topic,e2);
        
        delta = reward - oldMean;
        newMean = oldMean + delta / rewardsNumObs(e1,topic,e2);
        newSqSum = oldSqSum + delta * (reward - newMean);
        
        rewardsMean(e1,topic,e2) = newMean;
        rewardsSqSum(e1,topic,e2) = newSqSum;

    end
    
    %Update if necessary (this just gives a rough estimate of the result)
    if(mod(iter,100) == 0)
        A(iter/100) = totalReward;
        totalReward = 0;
    end

    
end

toc

CumulativeResult = CumulativeResult + mean(A(4900:5000));
CumulativeResult/instance



save(strcat('ReferralResults/ProactiveDIELEUMAS2017',num2str(instance),'seq',num2str(sequence),'.mat'),'ADetailed','ANum'); 
end
 
