
/**********************************************************************
 * $Id: raysearch.c,v 1.2 92/11/30 11:30:56 drew Exp $
 **********************************************************************/

/**********************************************************************
 *   Copyright 1990,1991,1992,1993 by The University of Toronto,
 *		      Toronto, Ontario, Canada.
 * 
 *			 All Rights Reserved
 * 
 * Permission to use, copy, modify, distribute, and sell this software
 * and its  documentation for  any purpose is  hereby granted  without
 * fee, provided that the above copyright notice appears in all copies
 * and  that both the  copyright notice  and   this  permission notice
 * appear in   supporting documentation,  and  that the  name  of  The
 * University  of Toronto  not  be  used in  advertising or  publicity
 * pertaining   to  distribution   of  the  software without specific,
 * written prior  permission.   The  University of   Toronto makes  no
 * representations  about  the  suitability of  this software  for any
 * purpose.  It  is    provided   "as is"  without express or  implied
 * warranty.
 *
 * THE UNIVERSITY OF TORONTO DISCLAIMS  ALL WARRANTIES WITH REGARD  TO
 * THIS SOFTWARE,  INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF TORONTO  BE LIABLE
 * FOR ANY  SPECIAL, INDIRECT OR CONSEQUENTIAL  DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR  PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT  OF  OR  IN  CONNECTION WITH  THE  USE  OR PERFORMANCE  OF THIS
 * SOFTWARE.
 *
 **********************************************************************/

/**********************************************************************
 * Written by Tony Plate in June 1991, based on code written by
 * Ray Watrous
 *
 *	HISTORY
 *	-------
 *
 *		This is an auxiliary module for
 *	executing line search as a part of optimization.
 *
 *
 *	The source for this algorithm is R. Fletcher,
 *	Practical Methods for Optimization, John Wiley, 1980
 *	pp 25ff.
 *
 *
 *	In order to be fairly general, the following functions
 *	are required: the error function to be minimized, and
 *	gradient. Given is the initial function value, gradient
 *	vector and initial alpha. Returned is the minimizing
 *	value for alpha.
 *
 *
 *	9/23/87 - extensively modified based on further insight
 *	into Fletcher's descriptions, including errors.
 *
 *	A more useful reference being Dennis & Schnabel, Numerical
 *	Methods for Unconstrained Optimization and Nonlinear Equations,
 *	Prentice-Hall, 1983.
 *
 *
 *	12/5/87 - modified for vcc, function name args
 *
 *	12/15/87 - modified for VERBOSE mode switch
 *
 *	1/20/88 - modified for new gradient computation; leaves new
 *		gradient value at new_grad
 *
 *	4/4/89 - corrected convergence problem
 *
 *	4/6/89 - added modifications from Fletcher 2nd edition
 *
 *	3/14/90 - modified to use arrays of reals instead of vec structs
 *                  for compatibility with other code. Also, work space is now
 *                  passed in as an argument to find_min_along. - SJN
 *
 *        18 March 90
 *                - modified doubles to reals
 *                - extern declaration for find_min_along, fun and grad
 *		- F_MIN changed to p->fmm_min
 *		- VERBOSE defs changed to p->lsverbose
 *		- mfun added to find_min_along args
 *		- CKIW
 *	13 May 90
 *		- p->fmm_min changed to p->obj_fcn_min  RZ
 *	23 May 90
 *		- added comments  RZ
 *		- changed RHO, SIGMA, TAU to p->rho, p->sigma, p->tau
 *		- commented out dot_product (defined in matrix_prods)
 *		- added second level of verbosity (1 within find_min_along, 2 else)
 *
 *	20 Jul 90 
 *	       - added new way of estimating initial alpha
 *	       - need p->new_alpha
 *
 *	13 Jun 91
 *		- modified code style
 *		- changed internal variables back to doubles (vectors are
 *		  still Reals)
 *		- modified style of verbosity printouts
 *		- function declarations ansi style (to permit type-checking)
 *		- changed #ifdef MONOTONIC to mz->lsNoLocalMinima
 *		- changed #defined QUASINEWTON to mz->lsQuasiNewton
 *		- changed #defined WOLFE_TEST to !mz->lsNoWolfeTest
 *		- moved initial step size calculations into functions
 *		  raysInitalStep() and rudisInitialStep(), this removed
 *		  need for p->fprev, p->new_alpha, and p->obj_fcn_min
 *		- pass d as a parameter instead of g[]
 *		- changed rho in bound calculation to 1/mz->lsStepBound
 *		- changed #defined MAX_EXTRAPOL to mz->lsMaxExtrapol
 *		- changed #defined EXTENSOR to mz->lsExtensor
 *		- changed p->lsverbose & p->lsverbose2 to mz->lsVerbosity
 *		- changed p->rho to mz->minFuncReduction
 *		- changed p->sigma to mz->maxSlopeRatio
 *		- changed p->tau to mz->lsExtrapLimits
 *
 * 	Line Search Constraints:	(Ray's value)
 *
 *	RHO in (0, 1/2)			0.1	(minFuncReduction)
 *	SIGMA in (RHO, 1)		0.5	(maxSlopeRatio)
 *						0.1 ==> strict line search
 *						0.9 ==> weak line search
 *	TAU in (0, SIGMA)		0.05	(lsExtrapLimits)
 *
 **********************************************************************/

#include <stdio.h>
#include <math.h>
#include <errno.h>

#include <xerion/useful.h>
#include "minimize.h"
#include "linesearch.h"
#include "wobbly.h"
#include "nanstuff.h"

#include "raysearch.h"

#ifndef MIN
#define MIN(x,y)	((x) < (y) ? (x) : (y))
#endif

/* EPSILONS are used for sense-checking extrapolation & iterpolation bounds */

/*
#define EPSILON		0.000000000000000001
#define EPSILON10	0.000000000000000010
*/
#define EPSILON		0.00000000000001
#define EPSILON10	0.00000000000010

/*  QUADRATIC  */

/*
  quadratic match given values (a1, f1) and (a2, f2) and
  slope d1 at a1. Stores new point at res;
  returns flag=1 if minimum found.
  */

static int	quadratic (a1, f1, d1, a2, f2, res, vb)
  double	a1 ;
  double	f1 ;
  double	d1 ;
  double	a2 ;
  double	f2 ;
  double	*res ;
  int 		vb ;
{
  double delta, dx;
  
  delta = a2 - a1;
  dx = delta * d1;
  
  if ((f2 - f1) <= dx) {
    VB(3, vb, "Quadratic fails: use default");
    return(0);
  }
  *res = a1 + 0.5 * delta /(1 + (f1 - f2)/(dx));
  VB(3, vb, "Quadratic Match: %.6g\n", *res);
  return(1);
}


/*  CUBIC  */

/*
    Cubic match given points (a1, f1) and (a2, f2) and
    slopes d1 at a1, d2 at a2. New point written at res.
    Returns ok=1 for found minimum, 0 for no minimum.
    */

static int	cubic (a1, f1, d1, a2, f2, d2, res, vb)
  double	a1 ;
  double	f1 ;
  double	d1 ;
  double	a2 ;
  double	f2 ;
  double	d2 ;
  double	*res ;
  int		vb ;
{
  double delta, lambda, new, w, z, b;
  double rad;
  int ok;
  
  lambda = (a2 - a1);
  delta = (f2 - f1) / lambda;
  b = d1 + d2 - 2 * delta;
  
  ok = 1;
  if (b == 0) {
    /*	Quadratic form: if d1 == d2, (straight line) -
	Sign conditions for min/max dictate that for minimum,
	d2 > d1.
	*/
    VB(3, vb, "Degenerate Cubic: ");
    if (d2 <= d1) {
      ok = 0;
      VB(3, vb, "Quadratic match failed - use default\n");
    } else {
      new = (a1 * d2 - a2 * d1) / (d2 - d1);
      VB(3, vb, "Quadratic match - new alpha: %.6g\n", new);
    }
  } else {
    z = b - delta;
    rad = z * z - d1 * d2;
    /*
      If rad is 0, inflection is saddle point - extend by default action.
      If rad is negative, imaginary minima - also extend by default.
      */
    if (rad <=0) {
      ok = 0;
      VB(3, vb, "Saddle Point or Imaginary min: rad = %f\n", rad);
    } else {
      /*
	It turns out that the sign conditions for the inflection points
	guarantee that the negative choice for w always yields the
	minimum rather than the maximum. Why is this?
	*/
      w = sqrt(rad);
      new = a1 + lambda * (1.00 -(d2 - w + z)/(3 * b));
      VB(3, vb, "Cubic Match to new alpha: %.6g\n", new);
    }
  }
  *res = new;
  return(ok);
}


/*  EXTRAPOLATE  */
/*
  Extrapolation given points (a1, f1) and (a2, f2) and
  slopes d1 at a1, d2 at a2, and amax.
  */

static int	extrapolate(mz, a1, f1, d1, a2, f2, d2, amax, res, bracket, vb)
  Minimize	mz ;
  double	a1 ;
  double	f1 ;
  double	d1 ;
  double	a2 ;
  double	f2 ;
  double	d2 ;
  double	amax ;
  double	*res ;
  int		bracket ;
  int		vb ;
{
  double lambda, new;
  double low, high;
  int success;
  /*
    Check extrapolation preconditions
    */
  VB(3, vb, "Extrapolate from <%.6g, %.6g> less than %.6g\n", a1, a2, amax);
  
  lambda = (a2 - a1);
  if (lambda <= EPSILON) {
    if (bracket && (amax - a2) <= EPSILON10) {
      VB(3, vb, "Extrapolation failed - search space vanished\n");
      return(0);
    } else {
      if (bracket || (amax-a2) > (a2-a1))
	new = a2 + mz->extrapLimits * (amax - a2);
      else
	new = a2 + mz->extrapLimits * lambda;
      *res = new;
      VB(3,vb, "Extrapolation interval collapsed - try new guess %.6g\n",new);
      return(1);
    }
  }
  
  /* Get extrapolation value */
  
  success = cubic(a1, f1, d1, a2, f2, d2, &new, vb);
  if (new < 0 )
    success = 0;
  /*
   * How the extrapolation failure is treated turns out to have an
   * effect on average convergence counts for Rosenbrock's function.
   * The more conservative a2+1.3 * lambda works pretty well.
   */
  if (success == 0) {
    if (bracket)
      new = amax;
    else
      new = a2 +  mz->extensor * lambda;
    VB(3, vb, "Extrap failed: guess (brkt = %d) : %.6g\n", bracket,new);
  }

  /* check constraints on alpha new */

  if (bracket) {
    lambda = amax - a2;
    low = a2 + mz->extrapLimits * lambda;
    high = amax - mz->extrapLimits * lambda;
    VB(3, vb, "Bracket Extrap limits: low: %.6g, high: %.6g\n", low, high);
  } else {
    high = a2 + mz->maxExtrapol * lambda;
    if (amax < high)
      high = amax;
    low = a2 + mz->extrapLimits * lambda;
    if (high < low)
      low = high;
    VB(3, vb, "No Bracket Extrap limits: low: %.6g, high: %.6g\n", low, high);
  }
  if (new < low) {
    VB(3, vb, "Extrapolant (%.6g) too small: clamp to %.6g\n", new, low);
    new = low;
  } else if (new > high) {
    VB(3, vb, "Extrapolant (%.6g) too large: clamp to %.6g\n", new, high);
    new = high;
  } else {
    VB(3, vb, "Extrapolant (%.6g) within brackets\n", new);
  }
  *res = new;
  return(1);
}


/*  INTERPOLATE  */
/*
  Interpolation:
  inputs -
  (a1, f1) and slope d1 at a1.
  (a2, f2) and slope d2 at a2 if flag = 1
  */

static int	interpolate (mz, a1, f1, d1, a2, f2, d2, f2_flag, res, vb)
  Minimize	mz ;
  double	a1 ;
  double	f1 ;
  double	d1 ;
  double	a2 ;
  double	f2 ;
  double	d2 ;
  int		f2_flag ;
  double	*res ;
  int		vb ;
{
  double delta, new;
  double low, high, range;
  int success;
  
  /* Check Interpolation pre-conditions */
  
  delta = (a2 - a1);
  range = mz->interpLimits * delta;
  low = a1 + range;
  high = a2 - range;
  
  VB(3, vb, "Interpolate between %.6g and %.6g, limits (%g,%g)\n",
     a1, a2, low, high);
  
  /* Replaced EPSILON by fPrecision(mz, f1) - tap */
  if (-delta * d1 <= fPrecision(mz, f1)) {
    VB(1, vb, "Interpolate limits failed : D=%.6g f1=%g f2=%g\n",
       -delta * d1, f1, f2);
    return(0);
  }

  /* Now, interpolate point by quadratic or cubic methods based on f2_flag */
  
  if (f2_flag == 1)
    success = cubic(a1, f1, d1, a2, f2, d2, &new, vb);
  else
    success = quadratic(a1, f1, d1, a2, f2, &new, vb);
  
  /* Failed interpolation - go to center of interval */

  if (success == 0) {
    new = a1 + 0.5 * delta;
    VB(3, vb, "Failed interpolation: guess %.6g\n", new);
  }

  /* check interpolate post-conditions new value is
     within (a1, a2) and not with mz->lsInterpLimits*range of ends */

  if (new < low) {
    VB(3, vb, "Interpolant (%.6g) too small: clamp to %.6g\n", new, low);
    new = low;
  } else if (new > high) {
    VB(3, vb, "Interpolant (%.6g) too large: clamp to %.6g\n", new, high);
    new = high;
  } else {
    VB(3, vb, "Interpolant (%.6g) OK\n", new);
  }
  *res = new;
  return(1);
}

int	raysLineSearch(mz, n, s, w, x, g, 
		       fEval, gEval, fgEval, ap, fp, dp)
  Minimize		mz ;
  int			n ;
  Real			*s ;
  Real			*w ;
  Real			*x ;
  Real			*g ;
  RealVecFunc		fEval ;
  VecProc		gEval ;
  Real2VecFunc		fgEval ;
  double		*ap ;
  double		*fp ;
  double		*dp ;
{
  double d1 = *dp;
  double f = *fp;
  double a = *ap;

  double a1, a2;	/* lower and upper limits of search */
  double d;		/* slope at new x */
  double delta_f, a_hat;
  double lowf;
  int done, f2_ok, succeed, bracket, do_extrapolate;
  double f1, comp, d0, f0;
  double amin; /* lowest f value found so far */
  int i, ok;
  int vb = mz->lsVerbosity;
  int need_to_eval_g = 0;
  int low_delta_f = 0;
  int low_delta_f_count = 0;

  mz->lsnFuncEvals = 0;
  d0 = d1;

  if (d1 >= 0)
    return mz->lsResultCode = MZNOTDESCENT;
  if (a<=0)
    IErrorAbort("raysLineSearch: no initial step provided");
  
  lowf = MIN(mz->expectedFuncMin, mz->acceptableFuncMin);
  if (f<lowf) {
    VB(1, vb,
       "Returning because f<mz->expectedFuncMin and f<mz->acceptableFuncMin");
    return mz->lsResultCode = MZOKFMIN;
  }
  a1 = 0;
  a2 = (lowf - f) / d1 * mz->stepBound;

  if (mz->lsDebug) {
    fprintf(dout, "Search:\t");
    for (i=0; i<n; i++)
      fprintf(dout, "%.4f ", w[i]);
    fprintf(dout, "\n");
  }	    

  f1 = f;
  f0 = f1;

  initBest(mz, g);
  
  if (a > a2) {
    VB(1, vb, "Initial alpha out of bounds (%g) - setting to %g\n", a, a2);
    a = a2;
  }
  
  /*
    a should be 1.00 whenever possible, because the Newton
    step will result in faster convergence, near the solution,
    for BFGS methods
    */

  if (mz->quasiNewton && a > 1.0) {
    VB(1, vb, "Quasi newton - setting alpha to 1.0 from %g\n", a);
    a = 1.0;
  }

  VB(2, vb, "Begin Line Search - reduce %.6g with slope %.6g\n", f1, d1);
  VB(2, vb, "Stepsize: %.6g < alpha =  %.6g < %.6g\n", a1, a, a2);
  VB(1, vb, "a= %-12g f= %-12.8g f-f0= %-12g d= %-12g\n", 0.0, f1, f1-f0, d1);

  /*
    Loop conditions
    0 - still looking
    1 - successful
    2 - error, search failed to meet constraints
    */
  
  bracket = 0;
  done = 0;
  low_delta_f = 0;
  
  while (!done) {
    if (mz->stopFlag>1) {
      VB(2, vb, "Stopping line search: stopFlag set\n");
      ok = getBestByF(mz, n, s, w, x, g, fEval, gEval, fgEval, ap, fp,dp);
      return mz->lsResultCode = MZSTOPPED;
    }
    if (mz->lsnFuncEvals >= mz->lsMaxFuncEvals) {
      VB(2, vb, "Stopping line search: too many f evals in search\n");
      ok = getBestByF(mz, n, s, w, x, g, fEval, gEval, fgEval, ap, fp,dp);
      return mz->lsResultCode = (amin>0.0 ? MZMAXFLINE : MZMAXFLINEFAIL);
    }
    if (mz->maxFuncEvals>0
	&& (mz->lsnFuncEvals > (mz->maxFuncEvals + mz->lsFlexFuncEvals))) {
      VB(2, vb, "Stopping line search: too many f evals in total\n");
      ok = getBestByF(mz, n, s, w, x, g, fEval, gEval, fgEval, ap, fp,dp);
      return mz->lsResultCode = MZMAXF;
    }
    
    /* if the expected change is very small we want to evaluate the gradient */
    low_delta_f = 0;
    if (mz->wobbleTest || mz->wobbleWatch) {
      if (-a*d0 < fPrecision(mz, f0)) {
	VB(2, vb, "Expected low change in func value: requesting gradient\n");
	low_delta_f = 1;
      } else if (mz->lsnFuncEvals>0 && fabs(delta_f) < fPrecision(mz, f0)) {
	VB(2, vb, "Observed low change in func value: requesting gradient\n");
	low_delta_f = 1;
      }
    }
    low_delta_f_count += low_delta_f;

    if (mz->wobbleWatch) {
      int good, total;
      gradientsConsistent(mz, &good, &total);
      if (low_delta_f || good < total)
	fprintf(dout,
		"Wobbliness diagnostics: deltas: %s, %d slopes ok out of %d\n",
		low_delta_f ? "low" : "ok", good, total);
    }

    if (mz->wobbleTest && low_delta_f &&
	(   (mz->wobbleTest<2)
	 || (mz->wobbleTest<3 && mz->lsnFuncEvals>0)
	 || (mz->lsnFuncEvals>0 && !gradientsConsistent(mz, NULL, NULL)))) {
      VB(2, vb, "Stopping line search: f wobbly at level %d\n",mz->wobbleTest);
      ok = getBestByF(mz, n, s, w, x, g, fEval, gEval, fgEval, ap, fp,dp);
      return mz->lsResultCode = MZWOBBLY;
    }


    /* Step 1 : evaluate function at x + a * w */
    mz->nFuncEvals++;
    mz->lsnFuncEvals++;
    f2_ok = 0;

    moveInDirection(n, s, w, a, x);

    if (fEval && gEval) {
      f = fEval(mz, n, x);
      if (low_delta_f) {
	gEval(mz, n, g);
	d = dotProduct(n, g, w);
	need_to_eval_g = 0;
      } else {
	d = 0;
	need_to_eval_g = 1;
      }
    } else {
      f = fgEval(mz, n, x, g);
      d = dotProduct(n, g, w);
      need_to_eval_g = 0;
    }

    insertLSData(mz, mz->lsnFuncEvals, a, f, d, !need_to_eval_g, 0);

    if (isNaN(f) || (!need_to_eval_g && isNaN(d))) {
      a2 = a;
      a = a1 + (a-a1)/mz->backUpFactor;
      VB(2, vb, "Computed NaN f=%g d=%g - backing up by 1/%g to %g\n",
	 f, d, mz->backUpFactor, a);
      continue;
    }

    updateBest(mz, mz->lsnFuncEvals, g);

    if (need_to_eval_g)
      VB(1, vb, "a= %-12g f= %-12.8g f-f0= %-12g d= ?\n", a, f, f-f0);
    else
      VB(1, vb, "a= %-12g f= %-12.8g f-f0= %-12g d= %-12g\n", a, f, f-f0, d);

    delta_f = f - f0;
    comp = mz->minFuncReduction * a * d0;

    /*	
      step2: check actual delta_f against expected	
      (First Goldstein Criterion)  
      */
    
    if (   (!mz->localMinima && (delta_f <= comp) && (f < f1))
	|| (mz->localMinima && (delta_f <= comp))) {

      /* function value is meets criterion */
      
      if (need_to_eval_g) {
	gEval(mz, n, g);
	d = dotProduct(n, g, w);
	insertLSData(mz, mz->lsnFuncEvals, a, f, d, 1, 1);
	if (isNaN(d)) {
	  a2 = a;
	  a = a1 + (a-a1)/mz->backUpFactor;
	  VB(2, vb, "Computed NaN in d f= %g - backing up by 1/%g to %g\n",
	     f, mz->backUpFactor, a);
	  continue;
	}
	updateBest(mz, mz->lsnFuncEvals, g);
	need_to_eval_g = 0;
      }

      if (mz->lsDebug) {
	fprintf(dout, "Grad:\t");
	for (i=0; i<n; i++)
	  fprintf(dout, "%.4f ", g[i]);
	fprintf(dout, "\n");
      }	    

      VB(2, vb, "Function value accepted (reduction=%g) - new slope = %.6g\n",
	 delta_f/(a*d0), d);
      f2_ok = 1;
      /*
	Check termination conditions (Wolfe Test)
	*/
      do_extrapolate = 0;

      if (mz->wolfeTest) {
	/* do the Wolfe Test */
	if (fabs(d) <= mz->maxSlopeRatio * (-d0))
	  done = 1;
	else if (d > 0) {
	  VB(2, vb, "Positive slope rejected - try interpolation\n");
	  succeed = interpolate(mz, a1, f1, d1, a, f, d, f2_ok, &a_hat,vb);
	  if (succeed == 1) {
	    a2 = a;
	    a = a_hat;
	    bracket = 1;
	  } else {
	    done = 2;
	    VB(2, vb, "Extrapolation failed\n");
	  }
	} else
	  do_extrapolate = 1;
      } else if (d >= mz->maxSlopeRatio * d0)
	done = 1;
      else
	do_extrapolate = 1;

      if (do_extrapolate) {
	/*
	  Extrapolate to point within interval (a, a2)
	  */
	VB(2, vb, "Slope rejected (ratio=%g) - extrapolate\n", d/d0);
	VB(3, vb, "(%g/%g > %g)\n", d, d0, mz->maxSlopeRatio);
	succeed = extrapolate(mz, a1, f1, d1, a, f, d, a2, &a_hat, bracket,vb);
	if (succeed == 1) {
	  a1 = a;
	  d1 = d;
	  f1 = f;
	  a = a_hat;
	} else {
	  VB(2, vb, "Extrapolation failed\n");
	  done = 2;
	}
      }
    } else {
      /*
        Function value rejected
	Interpolate to new value within interval (a1, a)
	*/
      VB(2, vb, "Function value rejected (reduction=%g) - try interpolation\n",
	 delta_f/(a*d0));
      VB(3, vb, "(%g > %g) (%g * %g * %g)\n",
	 delta_f, comp, mz->minFuncReduction, a, d0);
      succeed = interpolate(mz, a1, f1, d1, a, f, d, f2_ok, &a_hat, vb);
      if (succeed == 1) {
	a2 = a;
	a = a_hat;
	bracket = 1;
      } else {
	VB(2, vb, "Interpolation failed\n");
	done = 2;
      }
    }
  }
  
  if (need_to_eval_g) {
    /* need the gradient at the end of the search for the direction method */
    gEval(mz, n, g);
    d = dotProduct(n, g, w);
    insertLSData(mz, mz->lsnFuncEvals, a, f, d, 1, 1);
    updateBest(mz, mz->lsnFuncEvals, g);
  }

  if (done == 1) {
    VB(2, vb, "Slope accepted (ratio=%g): line search successful.\n", d/d0);
    *fp = f; *ap = a; *dp = d;
    return mz->lsResultCode = MZSUCCEED;
  } else {
    VB(1, vb, "Line search failed.\n");
    return mz->lsResultCode = MZFAIL;
  }
}
