00001
00015 #ifndef _DLR_OPTIMIZERBFGS_H_
00016 #define _DLR_OPTIMIZERBFGS_H_
00017
00018 #include <vector>
00019 #include <dlrCommon/triple.h>
00020 #include <dlrOptimization/optimizer.h>
00021 #include <dlrOptimization/optimizerLineSearch.h>
00022
00023 namespace dlr {
00024
00025 namespace optimization {
00026
00043 template <class Functor>
00044 class OptimizerBFGS
00045 : public Optimizer<Functor>
00046 {
00047 public:
00048
00049 typedef typename Functor::argument_type argument_type;
00050 typedef typename Functor::result_type result_type;
00051
00057 OptimizerBFGS();
00058
00068 explicit OptimizerBFGS(const Functor& functor);
00069
00076 OptimizerBFGS(const OptimizerBFGS& source);
00077
00082 virtual
00083 ~OptimizerBFGS();
00084
00099 virtual std::vector<size_t>
00100 getNumberOfFunctionCalls() {return this->m_functionCallCount;}
00101
00116 virtual std::vector<size_t>
00117 getNumberOfGradientCalls() {return this->m_gradientCallCount;}
00118
00133 virtual std::vector<size_t>
00134 getNumberOfIterations() {return this->m_iterationCount;}
00135
00164 virtual void
00165 setParameters(size_t iterationLimit = 500,
00166 size_t numberOfRestarts = 1,
00167 double argumentTolerance = 1.2E-7,
00168 double gradientTolerance = 0.00001,
00169 double lineSearchAlpha = 1.0E-4,
00170 double lineSearchArgumentTolerance = 1.0e-7,
00171 double numericEpsilon = 3.0E-8,
00172 double maximumStepMagnitudeFactor = 100.0);
00173
00174
00183 virtual void
00184 setIterationLimit(size_t iterationLimit) {
00185 this->m_iterationLimit = iterationLimit;
00186 }
00187
00188
00198 virtual void
00199 setNumberOfRestarts(size_t numberOfRestarts) {
00200 this->m_numberOfRestarts = numberOfRestarts;
00201 }
00202
00203
00212 virtual void
00213 setStartPoint(const typename Functor::argument_type& startPoint);
00214
00215
00227 virtual void
00228 setVerbosity(int verbosity) {}
00229
00230
00238 virtual OptimizerBFGS&
00239 operator=(const OptimizerBFGS& source);
00240
00241 protected:
00242
00259 double
00260 gradientConvergenceMetric(const argument_type& theta,
00261 const result_type& value,
00262 const argument_type& gradient);
00263
00272 virtual
00273 std::pair<typename Functor::argument_type, typename Functor::result_type>
00274 run();
00275
00304 Triple<typename Functor::argument_type, typename Functor::result_type,
00305 typename Functor::argument_type>
00306 doBfgs(const argument_type& theta,
00307 const result_type& startValue,
00308 const argument_type& startGradient,
00309 size_t& numberOfFunctionCalls,
00310 size_t& numberOfGradientCalls,
00311 size_t& numberOfIterations);
00312
00313
00314 double m_argumentTolerance;
00315 size_t m_iterationLimit;
00316 double m_gradientTolerance;
00317 double m_lineSearchAlpha;
00318 double m_lineSearchArgumentTolerance;
00319 double m_maximumStepMagnitudeFactor;
00320 size_t m_numberOfRestarts;
00321 double m_numericEpsilon;
00322 OptimizerLineSearch<Functor> m_optimizerLineSearch;
00323 argument_type m_startPoint;
00324
00325
00326 std::vector<size_t> m_functionCallCount;
00327 std::vector<size_t> m_gradientCallCount;
00328 std::vector<size_t> m_iterationCount;
00329
00330 };
00331
00332 }
00333
00334 }
00335
00336
00337
00338
00339 namespace dlr {
00340
00341 using optimization::OptimizerBFGS;
00342
00343 }
00344
00345
00346
00347
00348
00349
00350
00351 #include <math.h>
00352 #include <dlrNumeric/array1D.h>
00353 #include <dlrNumeric/array2D.h>
00354 #include <dlrNumeric/utilities.h>
00355 #include <dlrOptimization/optimizerCommon.h>
00356
00357 namespace dlr {
00358
00359 namespace optimization {
00360
00361 template <class Functor>
00362 OptimizerBFGS<Functor>::
00363 OptimizerBFGS()
00364 : Optimizer<Functor>(),
00365 m_argumentTolerance(),
00366 m_iterationLimit(),
00367 m_gradientTolerance(),
00368 m_lineSearchAlpha(),
00369 m_lineSearchArgumentTolerance(),
00370 m_maximumStepMagnitudeFactor(),
00371 m_numberOfRestarts(),
00372 m_numericEpsilon(),
00373 m_optimizerLineSearch(),
00374 m_startPoint(),
00375 m_functionCallCount(),
00376 m_gradientCallCount(),
00377 m_iterationCount()
00378 {
00379 this->setParameters();
00380 }
00381
00382 template <class Functor>
00383 OptimizerBFGS<Functor>::
00384 OptimizerBFGS(const Functor& functor)
00385 : Optimizer<Functor>(functor),
00386 m_argumentTolerance(),
00387 m_iterationLimit(),
00388 m_gradientTolerance(),
00389 m_lineSearchAlpha(),
00390 m_lineSearchArgumentTolerance(),
00391 m_maximumStepMagnitudeFactor(),
00392 m_numberOfRestarts(),
00393 m_numericEpsilon(),
00394 m_optimizerLineSearch(functor),
00395 m_startPoint(),
00396 m_functionCallCount(),
00397 m_gradientCallCount(),
00398 m_iterationCount()
00399 {
00400 this->setParameters();
00401 }
00402
00403 template<class Functor>
00404 OptimizerBFGS<Functor>::
00405 OptimizerBFGS(const OptimizerBFGS& source)
00406 : Optimizer<Functor>(source),
00407 m_argumentTolerance(source.m_argumentTolerance),
00408 m_iterationLimit(source.m_iterationLimit),
00409 m_gradientTolerance(source.m_gradientTolerance),
00410 m_lineSearchAlpha(source.m_lineSearchAlpha),
00411 m_lineSearchArgumentTolerance(source.m_lineSearchArgumentTolerance),
00412 m_maximumStepMagnitudeFactor(source.m_maximumStepMagnitudeFactor),
00413 m_numberOfRestarts(source.m_numberOfRestarts),
00414 m_numericEpsilon(source.m_numericEpsilon),
00415 m_optimizerLineSearch(source.m_optimizerLineSearch),
00416 m_startPoint(source.m_startPoint.size()),
00417 m_functionCallCount(source.m_functionCallCount),
00418 m_gradientCallCount(source.m_gradientCallCount),
00419 m_iterationCount(source.m_iterationCount)
00420 {
00421 copyArgumentType(source.m_startPoint, this->m_startPoint);
00422 }
00423
00424 template <class Functor>
00425 OptimizerBFGS<Functor>::
00426 ~OptimizerBFGS()
00427 {
00428
00429 }
00430
00431 template<class Functor>
00432 void
00433 OptimizerBFGS<Functor>::
00434 setParameters(size_t iterationLimit,
00435 size_t numberOfRestarts,
00436 double argumentTolerance,
00437 double gradientTolerance,
00438 double lineSearchAlpha,
00439 double lineSearchArgumentTolerance,
00440 double numericEpsilon,
00441 double maximumStepMagnitudeFactor)
00442 {
00443
00444 this->m_iterationLimit = iterationLimit;
00445 this->m_numberOfRestarts = numberOfRestarts;
00446 this->m_argumentTolerance = argumentTolerance;
00447 this->m_gradientTolerance = gradientTolerance;
00448 this->m_lineSearchAlpha = lineSearchAlpha;
00449 this->m_lineSearchArgumentTolerance = lineSearchArgumentTolerance;
00450 this->m_numericEpsilon = numericEpsilon;
00451 this->m_maximumStepMagnitudeFactor = maximumStepMagnitudeFactor;
00452
00453
00454 this->m_functionCallCount.clear();
00455 this->m_gradientCallCount.clear();
00456 this->m_iterationCount.clear();
00457
00458
00459
00460
00461 Optimizer<Functor>::m_needsOptimization = true;
00462 }
00463
00464 template<class Functor>
00465 void
00466 OptimizerBFGS<Functor>::
00467 setStartPoint(const typename Functor::argument_type& startPoint)
00468 {
00469 copyArgumentType(startPoint, this->m_startPoint);
00470
00471
00472 this->m_functionCallCount.clear();
00473 this->m_gradientCallCount.clear();
00474 this->m_iterationCount.clear();
00475
00476
00477
00478
00479 Optimizer<Functor>::m_needsOptimization = true;
00480 }
00481
00482 template<class Functor>
00483 OptimizerBFGS<Functor>&
00484 OptimizerBFGS<Functor>::
00485 operator=(const OptimizerBFGS<Functor>& source)
00486 {
00487 Optimizer<Functor>::operator=(source);
00488 this->m_argumentTolerance = source.m_argumentTolerance;
00489 this->m_iterationLimit = source.m_iterationLimit;
00490 this->m_gradientTolerance = source.m_gradientTolerance;
00491 this->m_lineSearchAlpha = source.m_lineSearchAlpha;
00492 this->m_lineSearchArgumentTolerance = source.m_lineSearchArgumentTolerance;
00493 this->m_maximumStepMagnitudeFactor = source.m_maximumStepMagnitudeFactor;
00494 this->m_numberOfRestarts = source.m_numberOfRestarts;
00495 this->m_numericEpsilon = source.m_numericEpsilon;
00496 this->m_optimizerLineSearch = source.m_optimizerLineSearch;
00497 copyArgumentType(source.m_startPoint, this->m_startPoint);
00498
00499 this->m_functionCallCount = source.m_functionCallCount;
00500 this->m_gradientCallCount = source.m_gradientCallCount;
00501 this->m_iterationCount = source.m_iterationCount;
00502
00503 return *this;
00504 }
00505
00506
00507
00508
00509 template <class Functor>
00510 double
00511 OptimizerBFGS<Functor>::
00512 gradientConvergenceMetric(const argument_type& theta,
00513 const result_type& value,
00514 const argument_type& gradient)
00515 {
00516 double returnValue = 0.0;
00517 double denominator = std::max(static_cast<double>(value), 1.0);
00518 for(size_t index = 0; index < theta.size(); ++index) {
00519 double thetaAbsValue = std::fabs(theta[index]);
00520 double gradientAbsValue = std::fabs(gradient[index]);
00521 double candidate =
00522 gradientAbsValue * std::max(thetaAbsValue, 1.0) / denominator;
00523 if(candidate > returnValue) {
00524 returnValue = candidate;
00525 }
00526 }
00527 return returnValue;
00528 }
00529
00530 template <class Functor>
00531 std::pair<typename Functor::argument_type, typename Functor::result_type>
00532 OptimizerBFGS<Functor>::
00533 run()
00534 {
00535
00536 if(this->m_startPoint.size() == 0) {
00537 DLR_THROW3(StateException, "OptimizerBFGS<Functor>::run()",
00538 "startPoint has not been initialized.");
00539 }
00540
00541
00542 argument_type theta(this->m_startPoint.size());
00543 copyArgumentType(this->m_startPoint, theta);
00544
00545
00546 result_type startValue = this->m_functor(theta);
00547 argument_type startGradient = this->m_functor.gradient(theta);
00548
00549
00550 this->m_functionCallCount.clear();
00551 this->m_gradientCallCount.clear();
00552 this->m_iterationCount.clear();
00553 size_t functionCallCount;
00554 size_t gradientCallCount;
00555 size_t iterationCount;
00556 Triple<argument_type, result_type, argument_type>
00557 optimum_optimalValue_gradient =
00558 this->doBfgs(theta, startValue, startGradient, functionCallCount,
00559 gradientCallCount, iterationCount);
00560
00561
00562 this->m_functionCallCount.push_back(functionCallCount + 1);
00563 this->m_gradientCallCount.push_back(gradientCallCount + 1);
00564 this->m_iterationCount.push_back(iterationCount);
00565
00566
00567 for(size_t index = 0; index < this->m_numberOfRestarts; ++index) {
00568 copyArgumentType(optimum_optimalValue_gradient.first, theta);
00569 startValue = optimum_optimalValue_gradient.second;
00570 copyArgumentType(optimum_optimalValue_gradient.third,
00571 startGradient);
00572 optimum_optimalValue_gradient =
00573 this->doBfgs(theta, startValue, startGradient, functionCallCount,
00574 gradientCallCount, iterationCount);
00575 this->m_functionCallCount.push_back(functionCallCount);
00576 this->m_gradientCallCount.push_back(gradientCallCount);
00577 this->m_iterationCount.push_back(iterationCount);
00578 }
00579
00580 return std::make_pair(optimum_optimalValue_gradient.first,
00581 optimum_optimalValue_gradient.second);
00582 }
00583
00584 template <class Functor>
00585 Triple<typename Functor::argument_type, typename Functor::result_type,
00586 typename Functor::argument_type>
00587 OptimizerBFGS<Functor>::
00588 doBfgs(const argument_type& theta,
00589 const result_type& startValue,
00590 const argument_type& startGradient,
00591 size_t& numberOfFunctionCalls,
00592 size_t& numberOfGradientCalls,
00593 size_t& numberOfIterations)
00594 {
00595
00596 size_t dimensionality = theta.size();
00597 numberOfFunctionCalls = 0;
00598 numberOfGradientCalls = 0;
00599 numberOfIterations = 0;
00600
00601
00602 argument_type thetaLocal(theta.size());
00603 copyArgumentType(theta, thetaLocal);
00604 result_type currentValue = startValue;
00605
00606
00607 argument_type currentGradient;
00608 if(startGradient.size() != 0) {
00609 copyArgumentType(startGradient, currentGradient);
00610 } else {
00611 currentGradient = this->m_functor.gradient(thetaLocal);
00612 ++numberOfGradientCalls;
00613 }
00614
00615
00616
00617
00618 if(dotArgumentType(currentGradient, currentGradient) == 0) {
00619 return makeTriple(thetaLocal, currentValue, currentGradient);
00620 }
00621
00622
00623 if(currentGradient.size() != dimensionality) {
00624 std::ostringstream message;
00625 message << "startPoint has dimensionality " << dimensionality
00626 << " but objective function returns gradient with "
00627 << "dimensionality " << currentGradient.size() << ".";
00628 DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00629 message.str().c_str());
00630 }
00631
00632
00633 Array2D<double> inverseHessian(dimensionality, dimensionality);
00634 inverseHessian = 0.0;
00635 for(size_t index = 0; index < dimensionality; ++index) {
00636
00637 inverseHessian(index, index) = 1.0;
00638 }
00639
00640
00641 argument_type searchStep(dimensionality);
00642 for(size_t index = 0; index < dimensionality; ++index) {
00643 searchStep[index] = -(currentGradient[index]);
00644 }
00645
00646
00647 double thetaMagnitudeSquared = 0.0;
00648 for(size_t index = 0; index < dimensionality; ++index) {
00649 thetaMagnitudeSquared += thetaLocal[index] * thetaLocal[index];
00650 }
00651 double thetaMagnitude = std::sqrt(thetaMagnitudeSquared);
00652 double maximumStepMagnitude =
00653 (this->m_maximumStepMagnitudeFactor
00654 * std::max(thetaMagnitude, static_cast<double>(dimensionality)));
00655
00656
00657
00658
00659
00660 this->m_optimizerLineSearch.setObjectiveFunction(this->m_functor);
00661
00662 this->m_optimizerLineSearch.setParameters(
00663 this->m_lineSearchArgumentTolerance, this->m_lineSearchAlpha,
00664 maximumStepMagnitude);
00665
00666
00667 while(1) {
00668
00669 this->m_optimizerLineSearch.setStartPoint(thetaLocal, currentValue,
00670 currentGradient);
00671 this->m_optimizerLineSearch.setInitialStep(searchStep);
00672 argument_type thetaNew = this->m_optimizerLineSearch.optimum();
00673 currentValue = this->m_optimizerLineSearch.optimalValue();
00674 numberOfFunctionCalls +=
00675 this->m_optimizerLineSearch.getNumberOfFunctionCalls();
00676
00677
00678
00679 for(size_t index = 0; index < dimensionality; ++index) {
00680 searchStep[index] = thetaNew[index] - thetaLocal[index];
00681 thetaLocal[index] = thetaNew[index];
00682 }
00683
00684
00685 if(contextSensitiveScale(searchStep, thetaLocal)
00686 < this->m_argumentTolerance) {
00687
00688
00689 return makeTriple(thetaLocal, currentValue, argument_type());
00690 }
00691
00692
00693 Array1D<double> deltaGradient(dimensionality);
00694 for(size_t index = 0; index < dimensionality; ++index) {
00695 deltaGradient[index] = -currentGradient[index];
00696 }
00697
00698
00699 currentGradient = this->m_functor.gradient(thetaLocal);
00700 ++numberOfGradientCalls;
00701
00702
00703 if(currentGradient.size() != dimensionality) {
00704 std::ostringstream message;
00705 message << "dimensionality of gradient changed mid-stream from "
00706 << dimensionality << " to " << currentGradient.size() << ".";
00707 DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00708 message.str().c_str());
00709 }
00710
00711
00712
00713 for(size_t index = 0; index < dimensionality; ++index) {
00714 deltaGradient[index] += currentGradient[index];
00715 }
00716
00717
00718 if(this->gradientConvergenceMetric(thetaLocal, currentValue,
00719 currentGradient)
00720 < this->m_gradientTolerance) {
00721 return makeTriple(thetaLocal, currentValue, currentGradient);
00722 }
00723
00724
00725 Array1D<double> inverseHessianTimesDeltaGradient =
00726 matrixMultiply(inverseHessian, deltaGradient);
00727 double fac = dotArgumentType(deltaGradient, searchStep);
00728 if(fac < 0.0) {
00729 fac = 0.0;
00730 }
00731 double fae = dotArgumentType(
00732 deltaGradient, inverseHessianTimesDeltaGradient);
00733 double mag2DeltaGradient = dotArgumentType(deltaGradient, deltaGradient);
00734 double mag2SearchStep = dotArgumentType(searchStep, searchStep);
00735
00736
00737 if((fac * fac)
00738 > (this->m_numericEpsilon * mag2DeltaGradient * mag2SearchStep)) {
00739
00740 double oneOverFac = 1.0 / fac;
00741 double fad = 1.0 / fae;
00742
00743
00744 for(size_t index = 0; index < dimensionality; ++index) {
00745 deltaGradient[index] =
00746 (oneOverFac * searchStep[index]
00747 - fad * inverseHessianTimesDeltaGradient[index]);
00748 }
00749
00750
00751 for(size_t row = 0; row < inverseHessian.rows(); ++row) {
00752 for(size_t column = 0; column < inverseHessian.columns(); ++column) {
00753
00754
00755
00756
00757 double increment0 = (oneOverFac * searchStep[row]
00758 * searchStep[column]);
00759
00760 double decrement1 = (fad * inverseHessianTimesDeltaGradient[row]
00761 * inverseHessianTimesDeltaGradient[column]);
00762
00763
00764 double increment2 = (fae * deltaGradient[row]
00765 * deltaGradient[column]);
00766
00767 inverseHessian(row, column) += (increment0 - decrement1
00768 + increment2);
00769 }
00770 }
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789 }
00790
00791
00792 matrixMultiplyArgumentType(inverseHessian, currentGradient, searchStep);
00793 for(size_t index = 0; index < dimensionality; ++index) {
00794 searchStep[index] = -(searchStep[index]);
00795 }
00796
00797
00798 ++numberOfIterations;
00799 if(numberOfIterations > this->m_iterationLimit) {
00800
00801
00802 this->setOptimum(thetaLocal, currentValue, false);
00803
00804
00805 std::ostringstream message;
00806 message << "Iteration limit of " << this->m_iterationLimit
00807 << " exceeded.";
00808 DLR_THROW3(RunTimeException, "OptimizerBFGS<Functor>::doBfgs()",
00809 message.str().c_str());
00810 }
00811 }
00812 }
00813
00814 }
00815
00816 }
00817
00818 #endif // #ifndef _DLR_OPTIMIZERBFGS_H_