%Minimum Error Rate Training For Statistical Machine Translation %Copyright (2005) Ashish Venugopal %This program is free software; you can redistribute it and/or %modify it under the terms of the GNU General Public License %as published by the Free Software Foundation; either version 2 %of the License, or (at your option) any later version. %This program is distributed in the hope that it will be useful, %but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % You should have received a copy of the GNU General Public % License along with this program; if not, write to the Free % Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA ALLDATA = load(feats_opt); CANDS = load(cands_opt); %init opt is now going to have ranges for each parameter INIT = load(init_opt); disp('Loaded translation data'); startEndIndices = zeros(size(CANDS,1), 2); startIndex=1; count = 0; for s=1:1:size(CANDS,1) endIndex = startIndex + CANDS(s, 2)-1; count = count + 1; startEndIndices(count, :) = [startIndex, endIndex]; startIndex=endIndex+1; end [nlines, NF] = size(INIT); ALLFEAT = ALLDATA(:, 1:NF); ALLSCORES = ALLDATA(:, NF+1:size(ALLDATA,2)); clear ALLDATA; %these variables are read in from the parameter restriction file leftBound = INIT(1, :) rightBound = INIT(2, :) startingParams = INIT(3, :); %when a lambda is within ExpansionMargin percentage ratio %of either (symetric) bound, then multiply the bound by %the ExpansionFactor ExpansionMargin = 0.0; ExpansionFactor = 1.2; %number of times lambdas are randomly initialized for optimization NumRandomTests = 5; PermutationsEpsilon = 0.0001; %generate initial sums initialScore = zeros(1, size(ALLSCORES,2)); numSentences = size(startEndIndices,1); for s=1:numSentences sentStart = startEndIndices(s, 1); initialScore = initialScore+ALLSCORES(sentStart, :);; end numgrams = (size(initialScore,2)-1)/2.0; referenceLength = initialScore(1, length(initialScore)); ngramData = initialScore(1, 1:length(initialScore)-1); ngramData = reshape(ngramData, 2, numgrams); basePrecision = ngramData(1,:)./ngramData(2,:); %ngramData(2,1) is the number of unigrams suggested lratio = referenceLength/ngramData(2,1); lpen = 1.0; if lratio>1.0 lpen = exp(1.0-lratio); end bleuScore = (lpen*exp(mean(log(basePrecision)))); newError = 1.0-(lpen*exp(mean(log(basePrecision)))); str = 'InitialScore:'; str = sprintf('%s %f\n',str,bleuScore); disp(str); %saving the full optimization data after each complete iteration FullResults = zeros(NumRandomTests, NF+1); %top level randomization of start values for (topLevel=1:NumRandomTests) %generate random values in the restricted ranges, but dont %initialize the sampled values right next to the bounds, since then %the Expansion will always kick in. This way if the optimization %finds better parameters in the margin, only then will be have to %expand the range % if there are three rows in INIT and this is the first % iteration, then use the value from the last full run if (topLevel==1 && nlines==3) LAMBDAS = startingParams; else LAMBDAS = (leftBound+ExpansionMargin) + ((rightBound-ExpansionMargin)-(leftBound+ExpansionMargin)).*rand(1, NF); end str = 'Start Values:'; disp(str); str = ''; for i=1:length(LAMBDAS) str = sprintf('%s %f',str,LAMBDAS(i)); end disp(str); %lastValue hold initial degenerate values of lambda lastValue = ones(1,size(LAMBDAS,2))*-1.0; %holds the degerate value for the error savedError = 1.0; lastError = 1.0; numIter = 1; ConvergedLimit = 2; converged = 0; IterationLimit = 20; %generate a random permutation of the dimension indices to vary %the search order, acrosss random initializations %ARV 06/20 oRange = randperm(NF); while (numIter= slopeOfRightCandidate); %initialize data that must be saved for this iteration %numIntersects starts at 1 since the first intersection point=leftBound numIntersects = 1; intersectPoints = zeros(numCands+1, 2); intersectPoints(1, :) = [leftBound(optiDim) minLeftValue]; %initialize the errorDelta that comes from crosssing the leftBound errorDeltas = zeros(numCands+1, size(SCORES,2)+1); errorDeltas(1, :) = [intersectPoints(1, 1) SCORES(minLeftCandidate, :)]; %used to create error deltas across intersection boundaries workingError = SCORES(minLeftCandidate, :); %the relevant indices determine which candidate to consider for %evaluation. currently those sentences which have a slope lower %than the current sentence are chosen, becuase these %sentences have a chance of intersecting the current %sentence and becoming the lowest cost translation while (length(relevantIndices)>0) bestL = rightBound(optiDim); bestValue = minRightValue; bestSlope = slopeOfLeftCandidate; %to save the new index of the min candidate newMinCandidate = minLeftCandidate; for c=1:length(relevantIndices) x = relevantIndices(c); top = a(x,1)-a(minLeftCandidate,1); bottom = slopes(minLeftCandidate,1)-slopes(x,1); if (bottom~=0) newL = top/bottom; if (newL < bestL) candidateSlope = slopes(x); bestL = newL; bestValue = a(x)+ (newL*candidateSlope); newMinCandidate = c; bestSlope = candidateSlope; end %allows new intersections to be proposed that intersect at the %same lambda. In this case consider only the one with the %steeper slope since it will be lower in the next region if (newL == bestL & (slopes(x)= slopeOfRightCandidate); end %ends the search for all relevant indices within this sentence %prunes the lists down to their actual lengths intersectPoints = intersectPoints(1:numIntersects, :); errorDeltas = errorDeltas(1:numIntersects, :); totalErrorDelta = [totalErrorDelta; errorDeltas]; end %end of search thru each sentence totalErrorDelta = sortrows(totalErrorDelta); %generate bleu scores with length penalty for these boundaries lenTotalErrorDelta = length(totalErrorDelta); errorLineLen = size(totalErrorDelta, 2); numgrams = (size(ALLSCORES,2)-1)/2.0; % a 0 count line vector to compare against zeroLine = zeros(1, errorLineLen-1); totalErrorDelta = [totalErrorDelta; [rightBound(optiDim), zeroLine]]; totalBaseError = zeroLine; %set the lowest error to the max possible error of this metric lowestError = 1.0; lowestLambda = leftBound(optiDim); for bIndex=1:lenTotalErrorDelta %increment the base count data incrementalError = totalErrorDelta(bIndex,2:errorLineLen); %no need to recalculate the error if there is no change in error count if (any(incrementalError)) totalBaseError = totalBaseError + incrementalError; %if there are several boundary values that are the same we want to %only consider a difference in score once they have all been %accounted for if (totalErrorDelta(bIndex, 1)~=totalErrorDelta(bIndex+1, 1)) %calculate the new bleu score referenceLength = totalBaseError(1, length(totalBaseError)); ngramData = totalBaseError(1, 1:length(totalBaseError)-1); ngramData = reshape(ngramData, 2, numgrams); basePrecision = ngramData(1,:)./ngramData(2,:); %ngramData(2,1) is the number of unigrams suggested lratio = referenceLength/ngramData(2,1); lpen = 1.0; if lratio>1.0 lpen = exp(1.0-lratio); end newError = 1.0-(lpen*exp(mean(log(basePrecision)))); %see if an improvement has been made if (newError < lowestError) lowestError = newError; lowestLambda = (totalErrorDelta(bIndex, 1)+totalErrorDelta(bIndex+1, 1))/2.0; end %use this to plot an error surface if you want %errorSurface = [errorSurface; [totalErrorDelta(bIndex, 1) totalErrorDelta(bIndex+1, 1) ((totalErrorDelta(bIndex, 1)+totalErrorDelta(bIndex+1, 1))/2.0) newError]]; end end end if lowestError<=savedError savedError=lowestError; LAMBDAS(1,optiDim)=lowestLambda; else disp('Rejecting optimized value ------------'); %keyboard; LAMBDAS(1,optiDim) = savedLambda; end for i=1:length(LAMBDAS) if var(ALLFEAT(:,i))==0 LAMBDAS(i)=0; end end str = ''; for i=1:length(LAMBDAS) str = sprintf('%s %f',str,LAMBDAS(i)); end str = sprintf('%s %f', str, savedError); disp(str); end disp('Iteration Completed'); FullResults(topLevel,:) = [LAMBDAS, savedError]; if (abs(savedError-lastError)0)) rightBound(optiDim) = ExpansionFactor*rightBound(optiDim); expstr = sprintf('Right Expanding %d to %f', optiDim, ... rightBound(optiDim)); disp(expstr); didExpansion = 1; end end if (leftBound(optiDim) ~= 0) if (((abs(leftBound(optiDim)-LAMBDAS(optiDim))/abs(leftBound(optiDim))