
/**********************************************************************
 * $Id: linesearch.c,v 1.2 92/11/30 11:30:48 drew Exp $
 **********************************************************************/

/**********************************************************************
 *   Copyright 1990,1991,1992,1993 by The University of Toronto,
 *		      Toronto, Ontario, Canada.
 * 
 *			 All Rights Reserved
 * 
 * Permission to use, copy, modify, distribute, and sell this software
 * and its  documentation for  any purpose is  hereby granted  without
 * fee, provided that the above copyright notice appears in all copies
 * and  that both the  copyright notice  and   this  permission notice
 * appear in   supporting documentation,  and  that the  name  of  The
 * University  of Toronto  not  be  used in  advertising or  publicity
 * pertaining   to  distribution   of  the  software without specific,
 * written prior  permission.   The  University of   Toronto makes  no
 * representations  about  the  suitability of  this software  for any
 * purpose.  It  is    provided   "as is"  without express or  implied
 * warranty.
 *
 * THE UNIVERSITY OF TORONTO DISCLAIMS  ALL WARRANTIES WITH REGARD  TO
 * THIS SOFTWARE,  INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF TORONTO  BE LIABLE
 * FOR ANY  SPECIAL, INDIRECT OR CONSEQUENTIAL  DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR  PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT  OF  OR  IN  CONNECTION WITH  THE  USE  OR PERFORMANCE  OF THIS
 * SOFTWARE.
 *
 **********************************************************************/

#include <stdio.h>
#include <math.h>

#include <xerion/useful.h>

#include "minimize.h"
#include "linesearch.h"

#ifndef MIN
#define MIN(x, y)       ((x) < (y) ? (x) : (y))
#endif

/***********************************************************************
 * Name:		vectorLength
 * Description:
 *	Computes the euclidean length of a vector.  Overflow and
 *	underflow are avoided by a generalization of the technique
 *	sqrt(x^2 + y^2) = x * sqrt(1 + (y/x)^2), where fabs(x)>fabs(y)
 *
 * Parameters:	
 *	int	n	- the number of elements in the vectors
 *	Real	*x	- a vector
 *
 * Return Value:	
 *	Real	vectorLength
 *			- the length of x
 ***********************************************************************/

#define SQR(x)	((x)*(x))
Real	vectorLength(n, x)
  int	n ;
  Real	*x ;
{
  int		i ;
  double	sum, scale, value ;

  scale	= 0.0 ;
  sum	= 1.0 ;
  for (i = 0 ; i < n ; ++i) {
    value = fabs(x[i]) ;
    if (value > scale) {
      /* rescale the sum: sum' = sum*scale^2/x[i]^2 */
      sum   = sum*SQR(scale/value) + 1.0 ;
      scale = value ;
    } else if (scale > 0.0) {
      sum  += SQR(value/scale) ;
    }
  }
  return sqrt(sum)*scale ;
}
#undef SQR

/***********************************************************************
 * Name:		dotProduct
 * Description:
 *	Computes the dotProduct of two vectors
 *
 * Parameters:	
 *	int	n	- the number of elements in the vectors
 *	Real	*x	- a vector
 *	Real	*y	- a vector
 *
 * Return Value:	
 *	double	dotProduct
 *			- the dot product of x and y
 ***********************************************************************/

double dotProduct (n, x, y)
  int	n ;
  Real	*x ;
  Real	*y ;
{
  double result = 0.0;
  int i;
  if (x==y)
    for (i=0; i<n; i++)
      result += x[i] * x[i];
  else
    for (i=0; i<n; i++)
      result += x[i] * y[i];
  return result;
}

/***********************************************************************
 * Name:		sameVector
 * Description:
 *	compares two vectors
 *
 * Parameters:	
 *	int	n	- the number of elements in the vectors
 *	Real	*x	- a vector
 *	Real	*y	- a vector
 *
 * Return Value:	
 *	int	sameVector
 *			- 1 if the vectors are identical, 0 otherwise
 ***********************************************************************/

int sameVector(n, x, y)
  int n;
  Real *x;
  Real *y;
{
  int i;
  for (i=0; i<n; i++)
    if (x[i]!=y[i])
      return  0;
  return 1;
}

/***********************************************************************
 * Name:		copyVector
 * Description:
 *	Copies one vector into another
 *
 * Parameters:	
 *	int	n	- the number of elements in the vectors
 *	Real	*source	- a vector
 *	Real	*dest	- this vector gets overwritten with source
 *
 * Return Value:	none
 ***********************************************************************/

void copyVector(n, source, dest)
  int n;
  Real *source;
  Real *dest;
{
  int i;
  for (i=0; i<n; i++)
    dest[i] = source[i];
}

/***********************************************************************
 * Name:		moveInDirection
 * Description:
 *	Adds a scalar times a direction vector to a start vector
 *
 * Parameters:	
 *	int	n	- number of elements in the vectors
 *	Real	*start	- a vector
 *	Real	*direction
 *			- a vector
 *	double	distance- the scalar value
 *	Real	*result	- the result
 *
 * Return Value:	
 *	Real	*moveInDirection
 *			- the result vector
 ***********************************************************************/
Real *moveInDirection(n, start, direction, distance, result)
  int n ;
  Real *start ;
  Real *direction ;
  double distance ;
  Real *result ;
{
  int i;
  for (i=0; i<n; i++)
    result[i] = start[i] + distance*direction[i];
  return result;
}

/***********************************************************************
 * Name:		machineEpsilon
 * Description:
 *	Computes the value of machine epsilon, i.e., the smallest
 *	eps such that 1.0+eps != 1.0, where 1.0 is represented as a
 *	Real.
 *
 * Parameters:		none
 *
 * Return Value:	
 *	double	machineEpsilon
 *			- the value of machine epsilon
 ***********************************************************************/

double machineEpsilon()
{
  Real x;
  double eps = 1.0;
  for (;;) {
    x = 1 + eps/2.0;
    if (x>1.0)
      eps /= 2.0;
    else
      break;
  }
  return eps;
}

void initBest (mz, grad)
  Minimize	mz;
  Real		*grad;
{
  mz->lsBestByF = 0;
  mz->lsBestByD = 0;
  copyVector(mz->n, grad, mz->bestGradientF);
  copyVector(mz->n, grad, mz->bestGradientD);
}

void updateBest (mz, i,  grad)
  Minimize	mz;
  int		i;	/* the current step */
  Real		*grad;
{
  double a, f, d, d0, amin, fmin, dmin;
  a = mz->lsStep[i];
  amin = mz->lsStep[mz->lsBestByF];
  f = mz->lsFuncValue[i];
  fmin = mz->lsFuncValue[mz->lsBestByF];
  d0 = mz->lsSlope[0];
  d = mz->lsSlope[i];
  dmin = mz->lsSlope[mz->lsBestByF];

  if (f<fmin || (f==fmin && a>amin)) {
    mz->lsBestByF = i;
    copyVector(mz->n, grad, mz->bestGradientF);
  }
  
  if (mz->lsHaveSlope[i]) {
    /* find the best looking slope */
    f = mz->lsFuncValue[i];
    d0 = mz->lsSlope[0];
    d = mz->lsSlope[i];
    dmin = mz->lsSlope[mz->lsBestByD];
    
    /*
      point qualifies for best slope if the slope ratio is lower, and
      the function value is not too far above the start value, where
      the amount it can be above the start value is higher for very
      low d-ratios
      */
    if (fabs(d/d0) < fabs(dmin/d0) && funcValueOK(mz, f, d/d0)) {
      mz->lsBestByD = i;
      copyVector(mz->n, grad, mz->bestGradientD);
    }
  }
}

int funcValueOK(mz, f,  dratio)
  Minimize mz;
  double f;
  double dratio;
{
  double flex;
  flex = fPrecision(mz, mz->lsFuncValue[0]) *
    (dratio==0
     ? mz->wobbleFlex
     : MIN(mz->maxSlopeRatio/fabs(dratio),mz->wobbleFlex));
  return (   f <= flex+mz->lsFuncValue[0]
	  && f <= flex+mz->lsFuncValue[mz->lsBestByF]);
}

/***
 * getBestByD - get the best set of weights as judged by slope ratios
 * return:
 *   1 if a good set was found,
 *   0 if the original weights were restored
 */
int getBestByD (mz, n, start, search, x, grad, ap, fp, dp)
  Minimize		mz ;
  int			n ;
  Real			*start ;
  Real			*search ;
  Real			*x ;
  Real			*grad ;
  double		*ap ;
  double		*fp ;
  double		*dp ;
{
  *ap = mz->lsStep[mz->lsBestByD];
  *fp = mz->lsFuncValue[mz->lsBestByD];
  *dp = mz->lsSlope[mz->lsBestByD];
  copyVector(mz->n, mz->bestGradientD, grad);
  moveInDirection(mz->n, start, search, *ap, x);
  if (mz->lsBestByD==0) {
    return 0;
  } else if (!mz->lsHaveSlope[mz->lsBestByD]) {
    fprintf(dout, "Warning: slope and gradients for best point by D missing.\n");
    return 0;
  } else {
    return 1;
  }
}

/***
 * getBestByF - get the best set of weights as judged by function value
 * return:
 *   1 if a good set was found,
 *   0 if the original weights were restored
 */
int getBestByF (mz, n, start, search, x, grad, fEval, gEval, fgEval, 
		ap, fp, dp)
  Minimize		mz ;
  int			n ;
  Real			*start ;
  Real			*search ;
  Real			*x ;
  Real			*grad ;
  RealVecFunc		fEval ;
  VecProc		gEval ;
  Real2VecFunc		fgEval ;
  double		*ap ;
  double		*fp ;
  double		*dp ;
{
  *ap = mz->lsStep[mz->lsBestByF];
  *fp = mz->lsFuncValue[mz->lsBestByF];
  moveInDirection(mz->n, start, search, *ap, x);
  if (mz->lsHaveSlope[mz->lsBestByF]) {
    *dp = mz->lsSlope[mz->lsBestByF];
    copyVector(mz->n, mz->bestGradientF, grad);
  } else {
    /* if we don't have the slope we have to calculate the gradient */
    /* try to find out if the last x vector given to the network is */
    /* the same as the one we want */
    /* But, for the moment that's too much work, just reevaluate */
    if (fgEval) {
      fgEval(mz, n, x, grad);
    } else {
      fEval(mz, n, x);
      gEval(mz, n, grad);
    }
    *dp = dotProduct(n, grad, search);
    insertLSData(mz, mz->lsnFuncEvals, *ap, *fp, *dp, 1, 1);
    updateBest(mz, mz->lsnFuncEvals, grad);
  }
  if (mz->lsBestByF==0) {
    return 0;
  } else {
    return 1;
  }
}

double fPrecision(mz,  f)
  Minimize mz;
  double f;
{
  Real tol = mz->funcPrecision>0 ? mz->funcPrecision : 1.0;
  f = fabs(f);
  if (mz->funcValueScale>0 && f<mz->funcValueScale)
    return mz->machinePrecision * mz->funcValueScale * tol;
  else
    return mz->machinePrecision * f * tol;
}

void	plotLineSearch (mz, n, start, search, fEval, gEval, fgEval, mid_a)
  Minimize		mz ;
  int			n ;
  Real			*start ;
  Real			*search ;
  RealVecFunc		fEval ;
  VecProc		gEval ;
  Real2VecFunc		fgEval ;
  double		mid_a ;
{
  int n_points = mz->lsPlotPoints;
  double top_a = mz->lsStep[mz->lsOrder[mz->lsnFuncEvals]];
  double f0 = mz->lsFuncValue[0];
  double a, f, d, frac;
  Real *x = mz->bestGradientF;	/* temporary vectors */
  Real *g = mz->bestGradientD;	/* temporary vectors */
  int i, lo_points, hi_points;
  int vb = mz->lsVerbosity;
  /* mid_a is the chosen step, top_a the investigated point */

  if (n_points<4)
    n_points = 4;

  /* decide how many points above and below a */
  if (mid_a==top_a)
    lo_points = 4.0/5.0 * n_points;
  else
    lo_points = a/top_a * n_points;
  if (n_points-lo_points<=n_points*0.13 || lo_points==n_points)
    lo_points = n_points - 1;
  if (lo_points<(n_points-n_points/2))
    lo_points = (n_points-n_points/2);
  hi_points = n_points-lo_points;
  VB(2, vb, "lsPlot: (0,%g,%g) %d low points %d high points\n",
     a, top_a, lo_points, hi_points);

  frac = 1.0/sqrt(2.0);
  for (i=1; i<=n_points; i++) {
    if (i<=lo_points) {
      a = (1.0-frac)*mid_a;
      frac /= sqrt(2.0);
    } else {
      if (top_a>mid_a)
	a = frac * (top_a-mid_a) + mid_a;
      else
	a = (1.0+frac) * mid_a;
      frac *= sqrt(2.0);
    }

    moveInDirection(n, start, search, a, x);

    if (fEval && gEval) {
      f = fEval(mz, n, x);
      gEval(mz, n, g);
    } else {
      f = fgEval(mz, n, x, g);
    }
    d = dotProduct(n, g, search);

    VB(1, vb, "a= %-12g f= %-12.8g f-f0= %-12g d= %-12g (lsPlot)\n",
       a, f, f-f0, d);
  }
}

/***********************************************************************
 *  Name:		insertLSData
 *  Description:
 *	Records the line search data in the arrays in the minimize
 *	record.  The position in the order of stepsizes is also
 *	updated.  For step number 0 the arrays are also cleared.
 *			
 *  Parameters:	
 *IO	Minimize	mz,	- minimize record
 *I	int		k,	- number of the step.
 *I	double		a	- step size
 *I	double		f	- function value
 *I	double		d	- slope
 *I	int		have_slope
 *I	int		only_slope (filling in slope from before)
 *
 *  Return Value: none
 *
 ***********************************************************************/

void insertLSData (mz, k, a, f, d, have_slope, only_slope)
  Minimize	mz ;
  int		k ;
  double	a ;
  double	f ;
  double	d ;
  int		have_slope ;
  int		only_slope ;
{
  int i, j;
  if (k==0 && !only_slope) {
    for (i=0; i<mz->lsnPoints; i++) {
      mz->lsStep[i] = mz->lsFuncValue[i] = mz->lsSlope[i] = 0.0;
      mz->lsOrder[i] = 0;
    }
    mz->lsStep[0] = a;
    mz->lsFuncValue[0] = f;
    mz->lsSlope[0] = d;
    mz->lsHaveSlope[0] = have_slope;
  } else if (k<mz->lsnPoints) {
    if (!only_slope) {
      mz->lsStep[k] = a;
      mz->lsFuncValue[k] = f;
      mz->lsSlope[k] = d;
      mz->lsHaveSlope[k] = have_slope;
      /* find where this step should be in order */
      for (i=0; i<k; i++)
	if (mz->lsStep[mz->lsOrder[i]] > a)
	  break;
      for (j=k; j>i ; j--)
	mz->lsOrder[j] = mz->lsOrder[j-1];
      mz->lsOrder[i] = k;
    } else {
      mz->lsSlope[k] = d;
      mz->lsHaveSlope[k] = 1;
    }
  }
}

