
/**********************************************************************
 * $Id: direction.c,v 1.3 92/11/30 11:30:45 drew Exp $
 **********************************************************************/

/**********************************************************************
 *   Copyright 1990,1991,1992,1993 by The University of Toronto,
 *		      Toronto, Ontario, Canada.
 * 
 *			 All Rights Reserved
 * 
 * Permission to use, copy, modify, distribute, and sell this software
 * and its  documentation for  any purpose is  hereby granted  without
 * fee, provided that the above copyright notice appears in all copies
 * and  that both the  copyright notice  and   this  permission notice
 * appear in   supporting documentation,  and  that the  name  of  The
 * University  of Toronto  not  be  used in  advertising or  publicity
 * pertaining   to  distribution   of  the  software without specific,
 * written prior  permission.   The  University of   Toronto makes  no
 * representations  about  the  suitability of  this software  for any
 * purpose.  It  is    provided   "as is"  without express or  implied
 * warranty.
 *
 * THE UNIVERSITY OF TORONTO DISCLAIMS  ALL WARRANTIES WITH REGARD  TO
 * THIS SOFTWARE,  INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF TORONTO  BE LIABLE
 * FOR ANY  SPECIAL, INDIRECT OR CONSEQUENTIAL  DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR  PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT  OF  OR  IN  CONNECTION WITH  THE  USE  OR PERFORMANCE  OF THIS
 * SOFTWARE.
 *
 **********************************************************************/

#include <stdio.h>
#include <math.h>

#include <xerion/useful.h>
#include "minimize.h"
#include "direction.h"

/***********************************************************************
 *  Name:		steepestDescent
 *  Description:
 *	Computes a steepest descent search direction
 *			
 *  Parameters:	
 *I	Minimize	mz,	- minimize record
 *I	int		n,	- dimensionality of the vectors
 *O	Real		*search	- returns new search direction
 *I	Real		*grad,	- gradient
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void steepestDescent (mz, n, search, grad, dp)
  Minimize	mz ;
  int		n ;
  Real		*search ;
  Real		*grad ;
  double	*dp ;
{
  int i;
  double d = 0.0;
  for (i=0; i<n; i++) {
    search[i] = -grad[i];
    d += search[i]*grad[i];
  }
  *dp = d;
}  

/***********************************************************************
 *  Name:		momentumDirection
 *  Description:
 *	Computes a steepest descent with momentum search direction
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *IO	Real		*s	- current search direction,
 *				  returns new direction
 *I	Real		*grad,	- gradient
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void		momentumDirection (mz, n, s, grad, dp)
  Minimize	mz ;
  int		n ;
  Real		*s ;
  Real		*grad ;
  double	*dp ;
{
  int 		i;
  double	d = 0.0;
  Real		alpha = mz->alpha ;

  if (!mz->continueDirection) {
    VB(2, mz->mzVerbosity, "Starting new momentum direction\n");
    for (i=0; i<n; i++)
      s[i] = 0.0;
  }
  for (i=0; i<n; i++) {
    s[i] = alpha*s[i] - grad[i];
    d += s[i]*grad[i];
  }
  *dp = d;
}  

/***********************************************************************
 *  Name:		quickPropDirection
 *  Description:
 *	Computes a quick prop direction.  This involves an independent
 *	quadratic fit for each parameter.
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *IO	Real		*update	- last update, returns new update
 *I	Real		*grad,	- gradient
 *I	Real		*pgrad,	- previous gradient
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void		quickPropDirection(mz, n, update, grad, pgrad, dp)
  Minimize	mz ;
  int		n ;
  Real		*update ;
  Real		*grad ;
  Real		*pgrad ;
  double	*dp ;
{
  int 		i;
  double 	d = 0.0;
  double	f;
  Real		epsilon = mz->epsilon ;

  if (!mz->continueDirection) {
    VB(2, mz->mzVerbosity, "Starting new quickprop direction\n");
    for (i=0; i<n; i++) {
      update[i] = - mz->qpEpsilon * grad[i];
      d += update[i]*grad[i];
    }
  } else {
    for (i=0; i<n; i++) {
      if (grad[i]==pgrad[i]) {
	if (grad[i]!=0.0)
	  f = mz->maxGrowthFactor;
      } else {
	f = grad[i]/(pgrad[i] - grad[i]) * epsilon;
	if (grad[i]*pgrad[i]>=0.0) {
	  /* both slopes are in same direction */
	  if (f<=0) /* slope has increased */
	    if (pgrad[i]==0.0)
	      f = 1;
	    else
	      f = mz->maxGrowthFactor;
	  else /* move in same direction as before */
	    if (f>mz->maxGrowthFactor)
	      f = mz->maxGrowthFactor;
	} else {
	  /* slope is in opposite direction to previous */
	  if (f< - mz->maxGrowthFactor)
	    f = - mz->maxGrowthFactor;
	}
      }
      if (f>0.0)	/* add some steepest descent in */
	update[i] = f*update[i] - mz->qpEpsilon * grad[i];
      else
	update[i] = f*update[i];
      d += update[i]*grad[i];
      /* fprintf(dout, "f=%-12g update=%-12g grad=%-12g pgrad=%-12g\n",
	 f, update[i], grad[i], pgrad[i]); */
    }
  }
  *dp = d;
}  

/***********************************************************************
 *  Name:		deltaBarDeltaDirection
 *  Description:
 *	Computes a delta-bar-delta direction.  This involves increasing
 *	the gain for parameters whose gradients have the same sign as
 *	the previous search direction, and decreasing the gain otherwise.
 *	Two state vectors are needed - the momentum vector and the gain
 *	vector.
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *O	Real		*s	- returns new search direction
 *I	Real		*grad	- gradient
 *IO	Real		*gain	- previous gain (returns new)
 *IO	Real		*momentum - previous momentum (returns new)
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void deltaBarDeltaDirection (mz, n, s, grad, gain, momentum, dp)
  Minimize	mz ;
  int		n ;
  Real		*s ;
  Real		*grad ;
  Real		*gain ;
  Real		*momentum ;
  double	*dp ;
{
  int 		i;
  double 	d = 0.0;
  double 	u;
  Real		alpha = mz->alpha ;
  if (!mz->continueDirection) {
    VB(2, mz->mzVerbosity, "Starting new delta-bar-delta direction\n");
    for (i=0; i<n; i++) {
      momentum[i] = 0.0;
      gain[i] = 1.0;
      s[i] = -grad[i];
    }
  } else {
    for (i=0; i<n; i++) {
      u = - grad[i] * momentum[i];
      if (u>0)
	gain[i] += mz->gainIncrement;
      else if (u<0)
	gain[i] *= mz->gainDecrement;
      momentum[i] = alpha*momentum[i] - grad[i];
      s[i] = gain[i] * momentum[i];
      d += s[i]*grad[i];
    }
  }
  *dp = d;
}

/***********************************************************************
 *  Name:		conjugateGradient
 *  Description:
 *	Computes a conjugate gradient direction (following "Numerical
 *	Recipes in C")
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *IO	Real		*s	- current search direction, returns new
 *I	Real		*g	- gradient
 *I	Real		*h	- previous gradient
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void conjugateGradient (mz, n, s, g, h, dp)
  Minimize	mz ;
  int		n ;
  Real		*s ;
  Real		*g ;
  Real		*h ;
  double	*dp ;
{
  double d, c1, c2, u1;
  int i;
  if (!mz->continueDirection) {
    VB(3, mz->mzVerbosity, "Starting new conjugate gradient direction\n");
    d = 0;
    for (i=0; i<n; i++) {
      s[i] = -g[i];
      d += s[i]*g[i];
    }
  } else {
    d = c1 = c2 = 0.0;
    for (i=0; i<n; i++) {
      c1 += (g[i]-h[i]) * g[i];
      c2 += h[i] * h[i];
    }
    u1 = c1/c2;
    for (i=0; i<n; i++) { 
      s[i] = u1*s[i]-g[i];
      d += s[i]*g[i];
    }
    VB(3, mz->mzVerbosity, "c1=%g c2=%g u1=%g d=%g\n", c1, c2, u1, d);
  }
  *dp = d;
}
/***********************************************************************
 *  Name:		rudisConjugateGradient
 *  Description:
 *	Computes a conjugate gradient direction according to Rudi
 *	Mathon's method.
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *I	double		a	- the previous step taken
 *IO	Real		*s	- current search direction, returns new
 *I	Real		*g	- gradient
 *IO	Real		*h	- previous gradient (gets destroyed)
 *O	double		*dp	- returns slope of new search direction
 *
 *  Return Value: none
 *
 ***********************************************************************/

void rudisConjugateGradient (mz, n, a, s, g, h, dp)
  Minimize	mz ;
  int		n ;
  double	a ;
  Real		*s ;
  Real		*g ;
  Real		*h ;
  double	*dp ;
{
  double d, c1, c2, u1, u2, u3, z;
  int i;
  if (!mz->continueDirection) {
    VB(3, mz->mzVerbosity, "Starting new conjugate gradient direction\n");
    d = 0;
    for (i=0; i<n; i++) {
      s[i] = -g[i];
      d += s[i]*g[i];
    }
  } else {
    if (a==0)
      IErrorAbort("rudisConjugateGradient: previous step size zero");
    if (a!=1.0)
      for (i=0; i<n; i++)
	s[i] *= a;
    d = c1 = c2 = u1 = u2 = 0.0;
    for (i=0; i<n; i++) {
      z = h[i] = g[i]-h[i];
      c1 += z*z;
      c2 += z*s[i];
      u1 += s[i]*g[i];
      u2 += z*g[i];
    }
    VB(3, mz->mzVerbosity, "c1=%g c2=%g u1=%g u2=%g\n", c1, c2, u1, u2);
    u3 = c2/c1;
    u2 = u2/c1-(u1+u1)/c2;
    u1 /= c1;
    VB(3, mz->mzVerbosity, "u1=%g u2=%g u3=%g\n", u1, u2, u3);
    for (i=0; i<n; i++) { 
      s[i] = u1*h[i]+u2*s[i]-u3*g[i];
      d += s[i]*g[i];
    }
  }
  *dp = d;
}

/***********************************************************************
 *  Name:		conjugateGradientRestart
 *  Description:
 *	Computes a conjugate gradient direction, with restarts
 *			
 *  Parameters:	
 *I	Minimize	mz,	- the minimize record
 *I	int		n,	- the dimensionality of the vectors
 *I	double		a	- the previous step taken
 *IO	Real		*s	- current search direction, returns new
 *I	Real		*g	- gradient
 *I	Real		*h	- previous gradient
 *	Real		*t	- work vector (neither input nor output)
 *IO	Real		*rstd	- state vector (must be kept)
 *IO	Real		*rsty	- state vector (must be kept)
 *O	double		*dp	- returns slope of new search direction
 *I	double		g_len	- length of gradient vector
 *
 *  Return Value: none
 *
 ***********************************************************************/

void conjugateGradientRestart (mz, n, a, s, g, h, t, rstd, rsty, dp, g_len)
  Minimize	mz ;
  int		n ;
  double	a ;
  Real		*s ;
  Real		*g ;
  Real		*h ;
  Real		*t ;
  Real		*rstd ;
  Real		*rsty ;
  double	*dp ;
  double	g_len ;
{
  double d, z, u1, u2, u3, u4;
  double c = mz->c, c1 = mz->c1, c2 = mz->c2;
  int i;
  if (!mz->continueDirection) {
    VB(3, mz->mzVerbosity,
       "Starting new conjugate gradient with restarts direction\n");
    d = 0;
    for (i=0; i<n; i++) {
      s[i] = -g[i];
      d += s[i]*g[i];
    }
    c = c1 = c2 = 0.0;
    mz->nrs = 0;
  } else {
    if (a==0)
      IErrorAbort("conjugateGradientRestart: previous step size zero");
    if (a!=1.0)
      for (i=0; i<n; i++)
	s[i] *= a;
    z = 0.0;
    for (i=0; i<n; i++)
      z += g[i]*h[i];
    if (fabs(z/(g_len*g_len)) > 0.2)
      mz->nrs = 0; 
    if (mz->nrs == 0) {
      c1 = c2 = 0.0;
      for (i=0; i<n; i++) {
	z = rsty[i] = g[i]-h[i];
	c1 += z*z;
	c2 += z*s[i];
	rstd[i] = s[i];
      }
      c = c2/c1;
    }
    u1 = u2 = 0.0; 
    for (i=0; i<n; i++) {
      u1 += rstd[i]*g[i];
      u2 += rsty[i]*g[i];
    }
    u2 = u2/c1-(u1+u1)/c2;
    u1 /= c1;
    for (i=0; i<n; i++)
      t[i]=u1*rsty[i]+u2*rstd[i]-c*g[i];
    if ((mz->nrs)++ > 0) {
      u1 = u2 = u3 = u4 = 0.0;
      for (i=0; i<n; i++) {
	z = g[i] - h[i];
	u1 -= z*rstd[i];
	u2 -= z*rsty[i];
	u3 += z*s[i];
      }
      u2 = u2/c1-(u1+u1)/c2;
      u1 /=c1;
      for (i=0; i<n; i++) {
	z = g[i] - h[i];
	h[i] = c*z+u1*rsty[i]+u2*rstd[i];
	u4 += z*h[i];
      }
      u1 = u2 = 0.0;
      u4 = u4/u3+1.0;
      for (i=0; i<n; i++) {
	u1 -= s[i]*g[i];
	u2 += (u4*s[i]-h[i])*g[i];
      }
      u1 /= u3;
      u2 /= u3;
      for (i=0; i<n; i++)
	t[i] -= u1*h[i]+u2*s[i];
    }
    d = 0.0;
    mz->nrs %= n;
    for (i=0; i<n; i++) {
      s[i] = t[i];
      d += s[i]*g[i];
    }
  }
  *dp = d;
  mz->c = c;
  mz->c1 = c1;
  mz->c2 = c2;
}

