dlrLibs Utility Libraries: optimizerBFGS.h Source File

00001 
00015 #ifndef _DLR_OPTIMIZERBFGS_H_
00016 #define _DLR_OPTIMIZERBFGS_H_
00017 
00018 #include <vector>
00019 #include <dlrCommon/triple.h>
00020 #include <dlrOptimization/optimizer.h>
00021 #include <dlrOptimization/optimizerLineSearch.h>
00022 
00023 namespace dlr {
00024 
00025   namespace optimization {
00026 
00043     template <class Functor>
00044     class OptimizerBFGS
00045       : public Optimizer<Functor>
00046     {
00047     public:
00048       // Typedefs for convenience
00049       typedef typename Functor::argument_type argument_type;
00050       typedef typename Functor::result_type result_type;
00051 
00057       OptimizerBFGS();
00058 
00068       explicit OptimizerBFGS(const Functor& functor);
00069 
00076       OptimizerBFGS(const OptimizerBFGS& source);
00077 
00082       virtual
00083       ~OptimizerBFGS();
00084 
00099       virtual std::vector<size_t>
00100       getNumberOfFunctionCalls() {return this->m_functionCallCount;}
00101 
00116       virtual std::vector<size_t>
00117       getNumberOfGradientCalls() {return this->m_gradientCallCount;}
00118     
00133       virtual std::vector<size_t>
00134       getNumberOfIterations() {return this->m_iterationCount;}
00135                   
00164       virtual void
00165       setParameters(size_t iterationLimit = 500,
00166                     size_t numberOfRestarts = 1,
00167                     double argumentTolerance = 1.2E-7, // Generally 4*(num. Eps.)
00168                     double gradientTolerance = 0.00001,
00169                     double lineSearchAlpha = 1.0E-4,
00170                     double lineSearchArgumentTolerance = 1.0e-7,
00171                     double numericEpsilon = 3.0E-8,
00172                     double maximumStepMagnitudeFactor = 100.0);
00173     
00174     
00183       virtual void
00184       setIterationLimit(size_t iterationLimit) {
00185         this->m_iterationLimit = iterationLimit;
00186       }
00187 
00188     
00198       virtual void
00199       setNumberOfRestarts(size_t numberOfRestarts) {
00200         this->m_numberOfRestarts = numberOfRestarts;
00201       }
00202 
00203     
00212       virtual void
00213       setStartPoint(const typename Functor::argument_type& startPoint);
00214 
00215 
00227       virtual void
00228       setVerbosity(int verbosity) {}
00229 
00230     
00238       virtual OptimizerBFGS&
00239       operator=(const OptimizerBFGS& source);
00240     
00241     protected:
00242 
00259       double
00260       gradientConvergenceMetric(const argument_type& theta,
00261                                 const result_type& value,
00262                                 const argument_type& gradient);
00263 
00272       virtual
00273       std::pair<typename Functor::argument_type, typename Functor::result_type>
00274       run();
00275 
00304       Triple<typename Functor::argument_type, typename Functor::result_type,
00305              typename Functor::argument_type>
00306       doBfgs(const argument_type& theta,
00307              const result_type& startValue,
00308              const argument_type& startGradient,
00309              size_t& numberOfFunctionCalls,
00310              size_t& numberOfGradientCalls,
00311              size_t& numberOfIterations);
00312 
00313       // Data members.
00314       double m_argumentTolerance;
00315       size_t m_iterationLimit;
00316       double m_gradientTolerance;
00317       double m_lineSearchAlpha;
00318       double m_lineSearchArgumentTolerance;
00319       double m_maximumStepMagnitudeFactor;
00320       size_t m_numberOfRestarts;
00321       double m_numericEpsilon;
00322       OptimizerLineSearch<Functor> m_optimizerLineSearch;
00323       argument_type m_startPoint;
00324 
00325       // Data members used for bookkeeping.
00326       std::vector<size_t> m_functionCallCount;
00327       std::vector<size_t> m_gradientCallCount;
00328       std::vector<size_t> m_iterationCount;
00329     
00330     }; // class OptimizerBFGS
00331 
00332   } // namespace optimization
00333 
00334 } // namespace dlr
00335 
00336 
00337 /* ======= Declarations to maintain compatibility with legacy code. ======= */
00338 
00339 namespace dlr {
00340 
00341   using optimization::OptimizerBFGS;
00342 
00343 } // namespace dlr
00344 
00345 
00346 /*******************************************************************
00347  * Member function definitions follow.  This would be a .cpp file
00348  * if it weren't templated.
00349  *******************************************************************/
00350 
00351 #include <math.h>
00352 #include <dlrNumeric/array1D.h>
00353 #include <dlrNumeric/array2D.h>
00354 #include <dlrNumeric/utilities.h>
00355 #include <dlrOptimization/optimizerCommon.h>
00356 
00357 namespace dlr {
00358 
00359   namespace optimization {
00360   
00361     template <class Functor>
00362     OptimizerBFGS<Functor>::
00363     OptimizerBFGS()
00364       : Optimizer<Functor>(),
00365         m_argumentTolerance(),
00366         m_iterationLimit(),
00367         m_gradientTolerance(),
00368         m_lineSearchAlpha(),
00369         m_lineSearchArgumentTolerance(),
00370         m_maximumStepMagnitudeFactor(),
00371         m_numberOfRestarts(),
00372         m_numericEpsilon(),
00373         m_optimizerLineSearch(),
00374         m_startPoint(),
00375         m_functionCallCount(),
00376         m_gradientCallCount(),
00377         m_iterationCount()
00378     {
00379       this->setParameters();
00380     }
00381 
00382     template <class Functor>
00383     OptimizerBFGS<Functor>::
00384     OptimizerBFGS(const Functor& functor)
00385       : Optimizer<Functor>(functor),
00386         m_argumentTolerance(),
00387         m_iterationLimit(),
00388         m_gradientTolerance(),
00389         m_lineSearchAlpha(),
00390         m_lineSearchArgumentTolerance(),
00391         m_maximumStepMagnitudeFactor(),
00392         m_numberOfRestarts(),
00393         m_numericEpsilon(),
00394         m_optimizerLineSearch(functor),
00395         m_startPoint(),
00396         m_functionCallCount(),
00397         m_gradientCallCount(),
00398         m_iterationCount()    
00399     {
00400       this->setParameters();
00401     }
00402   
00403     template<class Functor>
00404     OptimizerBFGS<Functor>::
00405     OptimizerBFGS(const OptimizerBFGS& source)
00406       : Optimizer<Functor>(source),
00407         m_argumentTolerance(source.m_argumentTolerance),
00408         m_iterationLimit(source.m_iterationLimit),
00409         m_gradientTolerance(source.m_gradientTolerance),
00410         m_lineSearchAlpha(source.m_lineSearchAlpha),
00411         m_lineSearchArgumentTolerance(source.m_lineSearchArgumentTolerance),
00412         m_maximumStepMagnitudeFactor(source.m_maximumStepMagnitudeFactor),
00413         m_numberOfRestarts(source.m_numberOfRestarts),
00414         m_numericEpsilon(source.m_numericEpsilon),
00415         m_optimizerLineSearch(source.m_optimizerLineSearch),
00416         m_startPoint(source.m_startPoint.size()),
00417         m_functionCallCount(source.m_functionCallCount),
00418         m_gradientCallCount(source.m_gradientCallCount),
00419         m_iterationCount(source.m_iterationCount)
00420     {
00421       copyArgumentType(source.m_startPoint, this->m_startPoint);
00422     }
00423 
00424     template <class Functor>
00425     OptimizerBFGS<Functor>::
00426     ~OptimizerBFGS()
00427     {
00428       // Empty
00429     }
00430 
00431     template<class Functor>
00432     void
00433     OptimizerBFGS<Functor>::
00434     setParameters(size_t iterationLimit,
00435                   size_t numberOfRestarts,
00436                   double argumentTolerance,
00437                   double gradientTolerance,
00438                   double lineSearchAlpha,
00439                   double lineSearchArgumentTolerance,
00440                   double numericEpsilon,
00441                   double maximumStepMagnitudeFactor)
00442     {
00443       // Copy input arguments.
00444       this->m_iterationLimit = iterationLimit;
00445       this->m_numberOfRestarts = numberOfRestarts;
00446       this->m_argumentTolerance = argumentTolerance;
00447       this->m_gradientTolerance = gradientTolerance;
00448       this->m_lineSearchAlpha = lineSearchAlpha;
00449       this->m_lineSearchArgumentTolerance = lineSearchArgumentTolerance;
00450       this->m_numericEpsilon = numericEpsilon;
00451       this->m_maximumStepMagnitudeFactor = maximumStepMagnitudeFactor;
00452 
00453       // Reset memory of previous minimization.
00454       this->m_functionCallCount.clear();
00455       this->m_gradientCallCount.clear();
00456       this->m_iterationCount.clear();
00457     
00458       // We've changed the parameters, so we'll have to rerun the 
00459       // optimization.  Indicate this by setting the inherited member
00460       // m_needsOptimization.    
00461       Optimizer<Functor>::m_needsOptimization = true;
00462     }    
00463 
00464     template<class Functor>
00465     void
00466     OptimizerBFGS<Functor>::
00467     setStartPoint(const typename Functor::argument_type& startPoint)
00468     {
00469       copyArgumentType(startPoint, this->m_startPoint);
00470 
00471       // Reset memory of previous minimization.
00472       this->m_functionCallCount.clear();
00473       this->m_gradientCallCount.clear();
00474       this->m_iterationCount.clear();
00475 
00476       // We've changed the parameters, so we'll have to rerun the 
00477       // optimization.  Indicate this by setting the inherited member
00478       // m_needsOptimization.    
00479       Optimizer<Functor>::m_needsOptimization = true;
00480     }
00481 
00482     template<class Functor>
00483     OptimizerBFGS<Functor>&
00484     OptimizerBFGS<Functor>::
00485     operator=(const OptimizerBFGS<Functor>& source)
00486     {
00487       Optimizer<Functor>::operator=(source);
00488       this->m_argumentTolerance = source.m_argumentTolerance;
00489       this->m_iterationLimit = source.m_iterationLimit;
00490       this->m_gradientTolerance = source.m_gradientTolerance;
00491       this->m_lineSearchAlpha = source.m_lineSearchAlpha;
00492       this->m_lineSearchArgumentTolerance = source.m_lineSearchArgumentTolerance;
00493       this->m_maximumStepMagnitudeFactor = source.m_maximumStepMagnitudeFactor;
00494       this->m_numberOfRestarts = source.m_numberOfRestarts;
00495       this->m_numericEpsilon = source.m_numericEpsilon;
00496       this->m_optimizerLineSearch = source.m_optimizerLineSearch;
00497       copyArgumentType(source.m_startPoint, this->m_startPoint);
00498 
00499       this->m_functionCallCount = source.m_functionCallCount;
00500       this->m_gradientCallCount = source.m_gradientCallCount;
00501       this->m_iterationCount = source.m_iterationCount;
00502     
00503       return *this;
00504     }
00505 
00506 
00507     // =============== Protected member functions below =============== //
00508 
00509     template <class Functor>
00510     double
00511     OptimizerBFGS<Functor>::
00512     gradientConvergenceMetric(const argument_type& theta,
00513                               const result_type& value,
00514                               const argument_type& gradient)
00515     {
00516       double returnValue = 0.0;
00517       double denominator = std::max(static_cast<double>(value), 1.0);
00518       for(size_t index = 0; index < theta.size(); ++index) {
00519         double thetaAbsValue = std::fabs(theta[index]);
00520         double gradientAbsValue = std::fabs(gradient[index]);
00521         double candidate =
00522           gradientAbsValue * std::max(thetaAbsValue, 1.0) / denominator;
00523         if(candidate > returnValue) {
00524           returnValue = candidate;
00525         }
00526       }
00527       return returnValue;
00528     }
00529 
00530     template <class Functor>
00531     std::pair<typename Functor::argument_type, typename Functor::result_type>
00532     OptimizerBFGS<Functor>::
00533     run()
00534     {
00535       // Check that we have a valid startPoint.
00536       if(this->m_startPoint.size() == 0) {
00537         DLR_THROW3(StateException, "OptimizerBFGS<Functor>::run()",
00538                    "startPoint has not been initialized.");
00539       }
00540     
00541       // Initialize working location so that we start at the right place.
00542       argument_type theta(this->m_startPoint.size());
00543       copyArgumentType(this->m_startPoint, theta);
00544 
00545       // Compute initial values of function and its gradient.
00546       result_type startValue = this->m_functor(theta);
00547       argument_type startGradient = this->m_functor.gradient(theta);
00548 
00549       // Now run the optimization.
00550       this->m_functionCallCount.clear();
00551       this->m_gradientCallCount.clear();
00552       this->m_iterationCount.clear();
00553       size_t functionCallCount;
00554       size_t gradientCallCount;
00555       size_t iterationCount;
00556       Triple<argument_type, result_type, argument_type>
00557         optimum_optimalValue_gradient =
00558         this->doBfgs(theta, startValue, startGradient, functionCallCount,
00559                      gradientCallCount, iterationCount);
00560       // When accounting function calls, don't forget to add the initial
00561       // function and gradient evaluations above.
00562       this->m_functionCallCount.push_back(functionCallCount + 1);
00563       this->m_gradientCallCount.push_back(gradientCallCount + 1);
00564       this->m_iterationCount.push_back(iterationCount);
00565 
00566       // Restart as many times as requested.
00567       for(size_t index = 0; index < this->m_numberOfRestarts; ++index) {
00568         copyArgumentType(optimum_optimalValue_gradient.first, theta);
00569         startValue = optimum_optimalValue_gradient.second;
00570         copyArgumentType(optimum_optimalValue_gradient.third,
00571                          startGradient);
00572         optimum_optimalValue_gradient =
00573           this->doBfgs(theta, startValue, startGradient, functionCallCount,
00574                        gradientCallCount, iterationCount);
00575         this->m_functionCallCount.push_back(functionCallCount);
00576         this->m_gradientCallCount.push_back(gradientCallCount);
00577         this->m_iterationCount.push_back(iterationCount);
00578       }
00579 
00580       return std::make_pair(optimum_optimalValue_gradient.first,
00581                             optimum_optimalValue_gradient.second);
00582     }
00583     
00584     template <class Functor>
00585     Triple<typename Functor::argument_type, typename Functor::result_type,
00586            typename Functor::argument_type>
00587     OptimizerBFGS<Functor>::
00588     doBfgs(const argument_type& theta,
00589            const result_type& startValue,
00590            const argument_type& startGradient,
00591            size_t& numberOfFunctionCalls,
00592            size_t& numberOfGradientCalls,
00593            size_t& numberOfIterations)
00594     {
00595       // Basic initializations.
00596       size_t dimensionality = theta.size();
00597       numberOfFunctionCalls = 0;
00598       numberOfGradientCalls = 0;
00599       numberOfIterations = 0;
00600 
00601       // We'll need non-const versions of the input arguments.
00602       argument_type thetaLocal(theta.size());
00603       copyArgumentType(theta, thetaLocal);
00604       result_type currentValue = startValue;
00605       // Of course, we should only copy startGradient if it's actually
00606       // been initialized.
00607       argument_type currentGradient;
00608       if(startGradient.size() != 0) {
00609         copyArgumentType(startGradient, currentGradient);
00610       } else {
00611         currentGradient = this->m_functor.gradient(thetaLocal);
00612         ++numberOfGradientCalls;
00613       }
00614 
00615       // If gradient magnitude is zero, we're already at an extremum or
00616       // a saddle point.  In either case, we don't know which way to go.
00617       // Instead, just terminate.
00618       if(dotArgumentType(currentGradient, currentGradient) == 0) {
00619         return makeTriple(thetaLocal, currentValue, currentGradient);
00620       }
00621     
00622       // Check that gradient dimension is correct.
00623       if(currentGradient.size() != dimensionality) {
00624         std::ostringstream message;
00625         message << "startPoint has dimensionality " << dimensionality
00626                 << " but objective function returns gradient with "
00627                 << "dimensionality " << currentGradient.size() << ".";
00628         DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00629                    message.str().c_str());
00630       }
00631     
00632       // Set up inverse hessian estimate.
00633       Array2D<double> inverseHessian(dimensionality, dimensionality);
00634       inverseHessian = 0.0;
00635       for(size_t index = 0; index < dimensionality; ++index) {
00636         // initialize to identity.
00637         inverseHessian(index, index) = 1.0;
00638       }
00639 
00640       // Set up initial search step.
00641       argument_type searchStep(dimensionality);
00642       for(size_t index = 0; index < dimensionality; ++index) {
00643         searchStep[index] = -(currentGradient[index]);
00644       }
00645 
00646       // Compute maximum allowable step size, allowing for numerical issues.
00647       double thetaMagnitudeSquared = 0.0;
00648       for(size_t index = 0; index < dimensionality; ++index) {
00649         thetaMagnitudeSquared += thetaLocal[index] * thetaLocal[index];
00650       }
00651       double thetaMagnitude = std::sqrt(thetaMagnitudeSquared);
00652       double maximumStepMagnitude =
00653         (this->m_maximumStepMagnitudeFactor
00654          * std::max(thetaMagnitude, static_cast<double>(dimensionality)));
00655 
00656       // Now set line search parameters
00657       // Make sure line search optimizer has the right objective function.
00658       // Since optimizer::setObjectiveFunction() doesn't update
00659       // m_optimizerLineSearch.
00660       this->m_optimizerLineSearch.setObjectiveFunction(this->m_functor);
00661       // Set line search parameters.
00662       this->m_optimizerLineSearch.setParameters(
00663         this->m_lineSearchArgumentTolerance, this->m_lineSearchAlpha,
00664         maximumStepMagnitude);
00665     
00666       // Perform the bfgs iteration.
00667       while(1) {
00668         // Perform line search.
00669         this->m_optimizerLineSearch.setStartPoint(thetaLocal, currentValue,
00670                                                   currentGradient);
00671         this->m_optimizerLineSearch.setInitialStep(searchStep);
00672         argument_type thetaNew = this->m_optimizerLineSearch.optimum();
00673         currentValue = this->m_optimizerLineSearch.optimalValue();
00674         numberOfFunctionCalls +=
00675           this->m_optimizerLineSearch.getNumberOfFunctionCalls();
00676 
00677         // Revise our initial step to match the data, and update current
00678         // position in parameter space.
00679         for(size_t index = 0; index < dimensionality; ++index) {
00680           searchStep[index] = thetaNew[index] - thetaLocal[index];
00681           thetaLocal[index] = thetaNew[index];
00682         }
00683 
00684         // Test for "insufficient parameter change" convergence.
00685         if(contextSensitiveScale(searchStep, thetaLocal)
00686            < this->m_argumentTolerance) {
00687           // Gradient at thetaLocal has not been computed yet.  Return
00688           // empty gradient.
00689           return makeTriple(thetaLocal, currentValue, argument_type());
00690         }
00691 
00692         // Temporarily save gradient value.
00693         Array1D<double> deltaGradient(dimensionality);
00694         for(size_t index = 0; index < dimensionality; ++index) {
00695           deltaGradient[index] = -currentGradient[index];
00696         }
00697 
00698         // Update gradient.
00699         currentGradient = this->m_functor.gradient(thetaLocal);
00700         ++numberOfGradientCalls;
00701 
00702         // Sanity check
00703         if(currentGradient.size() != dimensionality) {
00704           std::ostringstream message;
00705           message << "dimensionality of gradient changed mid-stream from "
00706                   << dimensionality << " to " << currentGradient.size() << ".";
00707           DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00708                      message.str().c_str());
00709         }
00710 
00711         // And compute change in gradient.  Remember that deltaGradient was
00712         // previously set to -currentGradient.
00713         for(size_t index = 0; index < dimensionality; ++index) {
00714           deltaGradient[index] += currentGradient[index];
00715         }
00716 
00717         // Test for "small gradient" convergence.
00718         if(this->gradientConvergenceMetric(thetaLocal, currentValue,
00719                                            currentGradient)
00720            < this->m_gradientTolerance) {
00721           return makeTriple(thetaLocal, currentValue, currentGradient);
00722         }
00723 
00724         // Now prepare to update estimate of the inverse hessian.
00725         Array1D<double> inverseHessianTimesDeltaGradient =
00726           matrixMultiply(inverseHessian, deltaGradient);
00727         double fac = dotArgumentType(deltaGradient, searchStep);
00728         if(fac < 0.0) {
00729           fac = 0.0;
00730         }
00731         double fae = dotArgumentType(
00732           deltaGradient, inverseHessianTimesDeltaGradient);
00733         double mag2DeltaGradient = dotArgumentType(deltaGradient, deltaGradient);
00734         double mag2SearchStep = dotArgumentType(searchStep, searchStep);
00735 
00736         // Are gradient change and search direction sufficiently aligned?
00737         if((fac * fac)
00738            > (this->m_numericEpsilon * mag2DeltaGradient * mag2SearchStep)) {
00739           // Some useful scalars.
00740           double oneOverFac = 1.0 / fac;
00741           double fad = 1.0 / fae;
00742 
00743           // Use deltaGradient as temporary storage
00744           for(size_t index = 0; index < dimensionality; ++index) {
00745             deltaGradient[index] =
00746               (oneOverFac * searchStep[index]
00747                - fad * inverseHessianTimesDeltaGradient[index]);
00748           }
00749 
00750           // Now update inverse Hession estimate.
00751           for(size_t row = 0; row < inverseHessian.rows(); ++row) {
00752             for(size_t column = 0; column < inverseHessian.columns(); ++column) {
00753               // First DFP term.
00754               // 
00755               // Note(xxx): redundant calculation?  this multiplication
00756               // is done immediately above.
00757               double increment0 = (oneOverFac * searchStep[row]
00758                                    * searchStep[column]);
00759               // Second DFP term.
00760               double decrement1 = (fad * inverseHessianTimesDeltaGradient[row]
00761                                    * inverseHessianTimesDeltaGradient[column]);
00762               // BFGS term.  Remember that deltaGradient has been preempted for
00763               // temporary storage at this point.
00764               double increment2 = (fae * deltaGradient[row]
00765                                    * deltaGradient[column]);
00766               // Now do the update.
00767               inverseHessian(row, column) += (increment0 - decrement1
00768                                               + increment2);
00769             }
00770           }
00771 
00772           // Array2D<double> increment0 =
00773           //   outerProduct(oneOverFac * searchStep, searchStep);
00774 
00775           // Second DFP term.
00776           // Array2D<double> decrement1 =
00777           //   outerProduct(fad * inverseHessianTimesDeltaGradient,
00778           //                inverseHessianTimesDeltaGradient);
00779 
00780           // BFGS term.  Remember that deltaGradient has been preempted for
00781           // temporary storage at this point.
00782           // Array2D<double> increment2 =
00783           //   outerProduct(fae * deltaGradient, deltaGradient);
00784 
00785           // Actually do the update.
00786           // inverseHessian += increment0;
00787           // inverseHessian -= decrement1;
00788           // inverseHessian += increment2;
00789         }
00790 
00791         // Use Newton's method to choose the next update.
00792         matrixMultiplyArgumentType(inverseHessian, currentGradient, searchStep);
00793         for(size_t index = 0; index < dimensionality; ++index) {
00794           searchStep[index] = -(searchStep[index]);
00795         }
00796 
00797         // Check for convergence failure.
00798         ++numberOfIterations;
00799         if(numberOfIterations > this->m_iterationLimit) {
00800           // We're going to bail out, but save the result anyway, just
00801           // in case someone cares.
00802           this->setOptimum(thetaLocal, currentValue, false);
00803 
00804           // Now throw the exception.
00805           std::ostringstream message;
00806           message << "Iteration limit of " << this->m_iterationLimit
00807                   << " exceeded.";
00808           DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00809                      message.str().c_str());
00810         }
00811       }
00812     }
00813 
00814   } // namespace optimization
00815 
00816 } // namespace dlr
00817 
00818 #endif // #ifndef _DLR_OPTIMIZERBFGS_H_