%Minimum Error Rate Training For Statistical Machine Translation
%Copyright (2005) Ashish Venugopal

%This program is free software; you can redistribute it and/or
%modify it under the terms of the GNU General Public License
%as published by the Free Software Foundation; either version 2
%of the License, or (at your option) any later version.
%This program is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of 
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%  You should have received a copy of the GNU General Public
%  License along with this program; if not, write to the Free
%  Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA

ALLDATA = load(feats_opt);
CANDS = load(cands_opt);

%init opt is now going to have ranges for each parameter
INIT = load(init_opt);


disp('Loaded translation data');
startEndIndices = zeros(size(CANDS,1), 2);
startIndex=1;
count = 0;
for s=1:1:size(CANDS,1)
    endIndex = startIndex + CANDS(s, 2)-1;
    count = count + 1;
    startEndIndices(count, :) = [startIndex, endIndex];
    startIndex=endIndex+1;
end



[nlines, NF] = size(INIT);
ALLFEAT = ALLDATA(:, 1:NF);

ALLSCORES = ALLDATA(:, NF+1:size(ALLDATA,2));
clear ALLDATA;

%these variables are read in from the parameter restriction file
leftBound = INIT(1, :)
rightBound = INIT(2, :)
startingParams = INIT(3, :);

%when a lambda is within ExpansionMargin percentage ratio
%of either (symetric) bound, then multiply the bound by
%the ExpansionFactor
ExpansionMargin = 0.0;
ExpansionFactor = 1.2;

%number of times lambdas are randomly initialized for optimization
NumRandomTests = 5;
PermutationsEpsilon = 0.0001;

%generate initial sums
initialScore = zeros(1, size(ALLSCORES,2));
numSentences = size(startEndIndices,1);


for s=1:numSentences
  
  sentStart = startEndIndices(s, 1);
  initialScore = initialScore+ALLSCORES(sentStart, :);;
  
end

numgrams = (size(initialScore,2)-1)/2.0;
referenceLength = initialScore(1, length(initialScore));
ngramData = initialScore(1, 1:length(initialScore)-1);

ngramData = reshape(ngramData, 2, numgrams);
basePrecision = ngramData(1,:)./ngramData(2,:);

%ngramData(2,1) is the number of unigrams suggested
lratio = referenceLength/ngramData(2,1);
lpen = 1.0;
if lratio>1.0
  lpen = exp(1.0-lratio);
end 
bleuScore = (lpen*exp(mean(log(basePrecision))));
newError = 1.0-(lpen*exp(mean(log(basePrecision))));
str = 'InitialScore:';
str = sprintf('%s %f\n',str,bleuScore);

disp(str);


%saving the full optimization data after each complete iteration

FullResults = zeros(NumRandomTests, NF+1);

%top level randomization of start values
for (topLevel=1:NumRandomTests)
  
  %generate random values in the restricted ranges, but dont
  %initialize the sampled values right next to the bounds, since then
  %the Expansion will always kick in. This way if the optimization
  %finds better parameters in the margin, only then will be have to
  %expand the range
  % if there are three rows in INIT and this is the first
  % iteration, then use the value from the last full run
  if (topLevel==1 && nlines==3)
    LAMBDAS = startingParams;
  else
    LAMBDAS = (leftBound+ExpansionMargin) + ((rightBound-ExpansionMargin)-(leftBound+ExpansionMargin)).*rand(1, NF);
  end
  str = 'Start Values:';
  disp(str);
  str = '';
  for i=1:length(LAMBDAS)
    str = sprintf('%s %f',str,LAMBDAS(i));
  end
  disp(str);
  
  
  %lastValue hold initial degenerate values of lambda
  lastValue = ones(1,size(LAMBDAS,2))*-1.0;
  %holds the degerate value for the error
  savedError = 1.0;
  lastError = 1.0;
  
  numIter = 1;
  ConvergedLimit = 2;
  
  converged = 0;
  IterationLimit = 20;
  
  
  %generate a random permutation of the dimension indices to vary
  %the search order, acrosss random initializations
  %ARV 06/20
  oRange = randperm(NF); 
  while (numIter<IterationLimit && converged<=ConvergedLimit);
    numIter = numIter + 1;
    lastValue = LAMBDAS;
    %go thru each dimension
    
    for optiDim=oRange 
      %save the value of lambda in case the error increases
      %this error increase only happens due errors in precions
      %when differentiating between very close boundaries
      
      savedLambda = LAMBDAS(1, optiDim);
      %cancel out this dimension so that we can evaluate it at
      %different values
      LAMBDAS(1, optiDim) = 0;
      totalErrorDelta = [];
      
      %compute error boundaries for each sentence individually
      for s=1:numSentences
	
        sentStart = startEndIndices(s, 1);
        sentEnd = startEndIndices(s, 2);
        %s
    
        %prepare the sentence specific data
        FEAT = ALLFEAT(sentStart:sentEnd,:);
        SCORES = ALLSCORES(sentStart:sentEnd,:);
	
        [numCands, numFeats] = size(FEAT);

        %determine basic intercept and slope information for this sentence
        a = FEAT*LAMBDAS';
        slopes = FEAT(:, optiDim);

        [minLeftValue, minLeftCandidate] = min(a + slopes*leftBound(optiDim));
        [minRightValue, minRightCandidate] = min(a + slopes*rightBound(optiDim));

	originalMinLeftCandidate = minLeftCandidate;
	originalMinRightCandidate = minRightCandidate;
	

        slopeOfRightCandidate = slopes(minRightCandidate);
        slopeOfLeftCandidate = slopes(minLeftCandidate);
        relevantIndices = find (slopes < slopeOfLeftCandidate & slopes >= slopeOfRightCandidate);


        %initialize data that must be saved for this iteration
        %numIntersects starts at 1 since the first intersection point=leftBound
        numIntersects = 1;     
	intersectPoints = zeros(numCands+1, 2);
        intersectPoints(1, :) = [leftBound(optiDim) minLeftValue];

        %initialize the errorDelta that comes from crosssing the leftBound
        errorDeltas = zeros(numCands+1, size(SCORES,2)+1);
        errorDeltas(1, :) = [intersectPoints(1, 1) SCORES(minLeftCandidate, :)];
        %used to create error deltas across intersection boundaries
        workingError = SCORES(minLeftCandidate, :);
	


        %the relevant indices determine which candidate to consider for
        %evaluation. currently those sentences which have a slope lower
        %than the current sentence are chosen, becuase these
        %sentences have a chance of intersecting the current
        %sentence and becoming the lowest cost translation
        while (length(relevantIndices)>0)
    
            bestL = rightBound(optiDim);
            bestValue = minRightValue;
            bestSlope = slopeOfLeftCandidate;
    
    
            %to save the new index of the min candidate
            newMinCandidate = minLeftCandidate;
        
            for c=1:length(relevantIndices)
        
                x = relevantIndices(c);
        
                top = a(x,1)-a(minLeftCandidate,1);
                bottom = slopes(minLeftCandidate,1)-slopes(x,1);
        
                if (bottom~=0)
                    newL = top/bottom;
        
                    if (newL < bestL)
                        candidateSlope = slopes(x);
                        bestL = newL;
                        bestValue = a(x)+ (newL*candidateSlope);
                        newMinCandidate = c;
                        bestSlope = candidateSlope;               
                    end
            
                    %allows new intersections to be proposed that intersect at the
                    %same lambda. In this case consider only the one with the
                    %steeper slope since it will be lower in the next region
                    if (newL == bestL & (slopes(x)<bestSlope))
                        candidateSlope = slopes(x);
                        bestL = newL;
                        bestValue = a(x)+ (newL*candidateSlope);
                        newMinCandidate = c;
                        bestSlope = candidateSlope;
                    end
                end 
            end
    
            %we have found a valid intersection point and a the candidate index
            numIntersects = numIntersects + 1;
            intersectPoints(numIntersects,:) =  [bestL bestValue];
	    
            %reduce the data that is considered relevant for training
            slopes = slopes(relevantIndices);
            a = a(relevantIndices);
            SCORES = SCORES(relevantIndices, :);
    
            minLeftCandidate= newMinCandidate;
            slopeOfLeftCandidate = slopes(minLeftCandidate);

            errorD = SCORES(minLeftCandidate, :) - workingError; 
            errorDeltas(numIntersects, :) = [intersectPoints(numIntersects, 1) errorD];
            workingError = SCORES(minLeftCandidate, :);
    
            relevantIndices = find (slopes < slopeOfLeftCandidate & slopes >= slopeOfRightCandidate);
    
        end
	
        %ends the search for all relevant indices within this sentence
        %prunes the lists down to their actual lengths
        intersectPoints = intersectPoints(1:numIntersects, :);
        errorDeltas = errorDeltas(1:numIntersects, :);
    
        totalErrorDelta = [totalErrorDelta; errorDeltas];
    end %end of search thru each sentence

    totalErrorDelta = sortrows(totalErrorDelta);

    %generate bleu scores with length penalty for these boundaries
    lenTotalErrorDelta = length(totalErrorDelta);
    errorLineLen = size(totalErrorDelta, 2);
    numgrams = (size(ALLSCORES,2)-1)/2.0;

    % a 0 count line vector to compare against
    zeroLine = zeros(1, errorLineLen-1);

    totalErrorDelta = [totalErrorDelta; [rightBound(optiDim), zeroLine]];
    totalBaseError = zeroLine;
    
    %set the lowest error to the max possible error of this metric
    lowestError = 1.0;
    lowestLambda = leftBound(optiDim);
    
    for bIndex=1:lenTotalErrorDelta
        %increment the base count data
        incrementalError = totalErrorDelta(bIndex,2:errorLineLen);
    
        %no need to recalculate the error if there is no change in error count
    
        if (any(incrementalError))
                   
            totalBaseError = totalBaseError + incrementalError;
        
            %if there are several boundary values that are the same we want to
            %only consider a difference in score once they have all been
            %accounted for
 
            if (totalErrorDelta(bIndex, 1)~=totalErrorDelta(bIndex+1, 1))
            
            
                %calculate the new bleu score
                referenceLength = totalBaseError(1, length(totalBaseError));
                ngramData = totalBaseError(1, 1:length(totalBaseError)-1);

                ngramData = reshape(ngramData, 2, numgrams);
                basePrecision = ngramData(1,:)./ngramData(2,:);
            
                %ngramData(2,1) is the number of unigrams suggested
                lratio = referenceLength/ngramData(2,1);
                lpen = 1.0;
                if lratio>1.0
                        lpen = exp(1.0-lratio);
                end 
                newError = 1.0-(lpen*exp(mean(log(basePrecision))));

                %see if an improvement has been made
                if (newError < lowestError)
                    lowestError = newError;
                    lowestLambda = (totalErrorDelta(bIndex, 1)+totalErrorDelta(bIndex+1, 1))/2.0;

                end
		%use this to plot an error surface if you want
                %errorSurface = [errorSurface; [totalErrorDelta(bIndex, 1) totalErrorDelta(bIndex+1, 1)  ((totalErrorDelta(bIndex, 1)+totalErrorDelta(bIndex+1, 1))/2.0) newError]];
            
            end
        
        end
    end
    
    if lowestError<=savedError
        savedError=lowestError;
        LAMBDAS(1,optiDim)=lowestLambda;
    else
        disp('Rejecting optimized value ------------');
                %keyboard;
        LAMBDAS(1,optiDim) = savedLambda;
    end

    for i=1:length(LAMBDAS)
       if var(ALLFEAT(:,i))==0
           LAMBDAS(i)=0;
       end
    end
   
    str = '';
    for i=1:length(LAMBDAS)
        str = sprintf('%s %f',str,LAMBDAS(i));
    end
    str = sprintf('%s %f', str, savedError);
    disp(str);
    end
    disp('Iteration Completed');

    FullResults(topLevel,:) = [LAMBDAS, savedError]; 

    if (abs(savedError-lastError)<PermutationsEpsilon)
      converged = converged+1;
      disp('Increment Convergence');
    else
      converged = 0;
    end

    lastError = savedError;
end




%perform expansion
didExpansion = 0;
for optiDim=1:NF

  %this prevents expansion across 0
  if (rightBound(optiDim) ~= 0)
    if ((((rightBound(optiDim)-LAMBDAS(optiDim))/rightBound(optiDim))<ExpansionMargin) & (rightBound(optiDim)>0))
      rightBound(optiDim) = ExpansionFactor*rightBound(optiDim);
      expstr = sprintf('Right Expanding %d to %f', optiDim, ...
		       rightBound(optiDim));
      disp(expstr);
      didExpansion = 1;
    end
  end
  
  if (leftBound(optiDim) ~= 0)
    if (((abs(leftBound(optiDim)-LAMBDAS(optiDim))/abs(leftBound(optiDim))<ExpansionMargin) & (leftBound(optiDim)<0)))
      leftBound(optiDim) = ExpansionFactor*leftBound(optiDim);
      expstr = sprintf('Left Expanding %d to %f', optiDim, ...
		       leftBound(optiDim));
      disp(expstr);
      didExpansion = 1;
    end
  end
  %if expansion occurred, this implies that we need to expand our
  %search range, and therefore we should get one more random test to
  %do this.
  if (didExpansion==1)
    NumRandomTests = NumRandomTests + 1;
  end


end

str = '';
str = sprintf('%s %f',str,LAMBDAS(i));

str = sprintf('Inter %s %f', str, savedError);
disp(str);
str = sprintf('InterOpt: %f', 1.0-savedError);
disp(str);


%end of random initializations
end 


[mval, mRow] = min(FullResults(:, length(LAMBDAS)+1));
str='';
for i=1:length(LAMBDAS)
    str = sprintf('%s %f',str,FullResults(mRow, i));
end
str = sprintf('FinalParameters %s %f', str, mval);
disp(str);
str = sprintf('Final Score %f',1.0-mval);
disp(str);
exit;