# cg 14-Feb-92 15:38
#
# optwts 14-Feb-92 15:38
#	Solve neural net least squares problem by scaled conjugate gradients.
#	Return weight vector, error, |g|/|w|.
#	Stop if any of the following is true (return value as ierr)
#		0) rmserr <= egoal
#		1) Used itermax iterations.
#		2) Size of gradient vector < GWRATIO * size of weight vector
#		3) Error hasn't gone down by EFACTOR in nfreq iterations
#		4) Number right hasn't improved in nfreq iterations
#
subroutine optwts(itermax, num, w, rmserr, gw, iter, ierr)
	real w(num)						# the weights
#
include sizcom.h
include netcom.h
include errcom.h
include filcom.h
#
real gm	 (MAXWSIZE)			# negative grad of error wrt w's
real wnew(MAXWSIZE)			# new weights
real p	 (MAXWSIZE)			# direction vector
real r	 (MAXWSIZE)			# remembered gm
real s	 (MAXWSIZE)			# second derivative info along p direction
integer icount				# number of steps since last restart
integer fcount				# number of consecutive failures
logical success

equivalence(ee, ii)
#
parameter (XLSTART = 0.01)		# starting value for xl
#

if (itermax > 0)
{	write(fpstd, 999) itermax, num
	write(fprun, 999) itermax, num
}

# get inital error and gradient
call func(itermax > 0, num, w, error, gm)
wsiz = snrm2(num, w, 1)
#
if (itermax <= 0) iter = -1
else			  iter = 0
call confuse((itermax <= 0), iter, ' ', error, fprun, fpstd, nright)
rmserr = sqrt(error * 2)
nrtprv = nright
#
if (itermax <= 0)
	return
#
ierr = 1
sigma = 1.e-4		# relative distance for numerical derivative
xl = XLSTART		# lambda_k
xlb = 0				# lambda_k bar
deltak = 0
success = .true.
#
rmsold = rmserr
do n = 1, num
{	p(n) = gm(n)
	r(n) = gm(n)
}
# 	how often to restart the algorithm
iover = num				
#
icount = 0			# number of iterations since last restart
fcount = 0			# number of failed iterations in a row
ncount = 0			# number of iterations since last convergence check
iter = itermax
k = 0
while (k < itermax)
{	
	icount = icount + 1
	psiz = snrm2(num, p, 1)
	psq = psiz**2

	if (success)		# get second derivative info
	{	sigmak = sigma * wsiz / psiz
		do n = 1, num
			wnew(n) = w(n) + sigmak * p(n)
		# get error and gradient 2nd derivative information
		call func(.true., num, wnew, enew, s)
		# dE/d(dist) along p is (enew-error)/sigmak
		do n = 1, num
			s(n) = (gm(n) - s(n)) / sigmak
		deltak = sdot(num, s, 1, p, 1)
	}
	c = xl - xlb
	if (c != 0.0)
	{	do n = 1, num
			s(n) = s(n) + c * p(n)
		deltak = deltak + c * psq
	}
	# maybe need to make "Hessian" positive definite
	if (deltak <= 0)
	{	c = xl - 2 * deltak / psq
		do n = 1, num
			s(n) = s(n) + c * p(n)
		xlb = 2 * (xl - deltak / psq)
		deltak = - deltak + xl * psq
		xl = xlb
	}

	# get the right step size
	xmu = sdot(num, p, 1, r, 1)
	alpha = xmu / deltak
#
	do n = 1, num
		wnew(n) = w(n) + alpha * p(n)
	# get new error and gradient
	call func(.true., num, wnew, enew, gm)

	delta = 2 * deltak * (error - enew) / xmu**2
#
	if (delta >= 0.0)			# successful step
	{	k = k + 1
		ncount = ncount + 1
			#may want additional reporting here
		# if (mod(k, nfreq)   !=   0)
		#	call confuse(.false., k, ' ', error, fprun, fpstd, nright)
		fcount = 0
		do n = 1, num
			w(n) = wnew(n)
		wsiz = snrm2(num, w, 1)
		error = enew
		xlb = 0
		success = .true.
		if (mod(icount, iover) == 0)		# restart
		{	do n = 1, num
				p(n) = gm(n)
		}
		else						# find conjugate direction
		{	beta = (sdot(num, gm, 1, gm, 1) - sdot(num, gm, 1, r, 1)) / xmu
			do n = 1, num
				p(n) = gm(n) + beta * p(n)
		}
		do n = 1, num
			r(n) = gm(n)
		if (delta >= 0.75)				# trustworthy
		{	xl = xl / 2					# maybe try something else
		}
	}						# end if (delta >= 0)
	else							# unsuccessful step
	{	xlb = xl
		success = .false.
		fcount = fcount + 1
		if (fcount > 2)
		{	if (icount > fcount)		# at least one good step since restart
			{	#Dwrite(fpstd, 773) k
				#Dwrite(fprun, 773) k
				#D773 format(' restart cg', i5)
				do n = 1, num
					p(n) = gm(n)
				xl = XLSTART		# lambda_k
				xlb = 0				# lambda_k bar
				success = .true.
				delta = 1.0
				aveprog = 0.0
				icount = 0
				fcount = 0
			}
			else 
			{	ierr = 3
				iter = k
				break
			}
		}
	}
	# If not nearly as good as predicted, increase xl
	if (delta < 0.25)
	{	xl = 4.0 * xl
		# maybe try xl = xl + deltak * (1 - delta) / psq
	}
	#
	rmserr = sqrt(error * 2)
	gsiz = snrm2(num, r, 1)

	if (success & ncount >= nfreq)
	{	ncount = 0
		call confuse(.false., k, ' ', error, fprun, fpstd, nright)
		# Terminate if convergence too slow
		if (rmserr > errdel * rmsold)
		{	ierr = 3
			iter = k
			break
		}
		rmsold = rmserr
			# Terminate if number OK not improving
		if (nright < nrtprv + nokdel)
		{	ierr = 4
			iter = k
			break
		}
		nrtprv = nright
	}
	# Terminate when error satisfactory
	if (rmserr < egoal)
	{	ierr = 0
		iter = k
		break
	}
	# Terminate when gradient is too small
	if (gsiz < gwgoal * max(1.0, wsiz))
	{	ierr = 2
		iter = k
		break
	}
}
gw = gsiz / wsiz
#
		# print matrices
call confuse(.true., iter, 'F', error, fprun, fpstd, nright)
#
return
999 format(/' Conjgrad: doing ', i6, ' iterations; ', i5, ' variables')
end
##
## end of cg.r
