# bp.r 14-Feb-92 15:38
#
# optwts 14-Feb-92 15:38
#	Solve neural net least squares problem by back propagation.
#	Return weight vector,  error, and |g| / |w|.
#	Stop if any of the following is true (return value as ierr)
#		0) rmserr <= goal
#		1) Used itermax iterations.
#		2) Size of gradient vector < GWRATIO * size of weight vector
#		3) Error hasn't gone down by EFACTOR in nfreq iterations
#		4) Number right hasn't improved in nfreq iterations
#
subroutine optwts(itermax, num, w, rmserr, gw, iter, ierr)
	real w(*)						# the weights
#
include sizcom.h
include netcom.h
include errcom.h
include filcom.h
#
real g (MAXWSIZE)			# negative grad of error wrt w's
real dw(MAXWSIZE)			# new last delta(w)

	# The next two parameters are so small that the actual stopping
	# criterion will be the number right.
parameter (GWRATIO = 1.e-12)		# for stopping with small gradient
parameter (EFACTOR = 1.e-06)		# for stopping with slow convergence
#
# NOTE: Our eta is the usual eta divided by the number of patterns and by
# the number of outputs.
# The current definition allows eta to remain the same even if the number
# of patterns changes. 
#
if (itermax > 0)
{	write(fpstd, 999) itermax, num, eta, alpha
	write(fprun, 999) itermax, num, eta, alpha
}
eta = eta * nout
#
do n = 1, MAXWSIZE
	dw(n) = 0.0
# get initial error and gradient
call func(itermax > 0, num, w, error, g)
#
if (itermax <= 0) iter = -1
else			  iter = 0
call confuse((itermax <= 0), iter, ' ', error, 
	fprun, fpstd, nright)	
rmserr = sqrt(error * 2)
nrtprv = nright
#
if (itermax <= 0)
	return
#
ierr = 1
rmsold = rmserr
#
for (k =  1; k <= itermax; k = k + 1)
{	# adjust the weights with back propagation.
	call backward(w, w(num1+1), error, gsq, dw, dw(num1+1), g, g(num1+1))
		# check error and do new gradient
	call func(.true., num, w, error, g)
#
	rmserr = sqrt(error * 2 )
#
	if (mod(k, nfreq)   ==   0)
	{	call confuse(.false., k, ' ', error, fprun, fpstd, nright)	
		# Terminate if convergence too slow
		rmserr = sqrt(2 * error)
		if (rmserr > (1.0 - EFACTOR) * rmsold)
		{	ierr = 3
			iter = k
			break
		}
		rmsold = rmserr
			# Terminate if number OK not improving
		if (nright <= nrtprv)
		{	ierr = 4
			iter = k
			break
		}
		nrtprv = nright
	}
	# Terminate when error satisfactory
	if (rmserr < goal)
	{	ierr = 0
		break
	}
	# Terminate when gradient is too small
	gsiz = snrm2(num, g, 1)
	wsiz = snrm2(num, w, 1)
	##print *, ' g2 ', gsiz, ' g2/w2 ', gsiz/wsiz
	if (gsiz < GWRATIO * max(1.0, wsiz))
	{	ierr = 2
		break
	}
}
gw = gsiz / wsiz
iter = min(k, itermax)
#
		# print matrices
call confuse(.true., iter, 'F', error, fprun, fpstd, nright)	
#
return
999 format(/' Backprop: doing ', i6, ' iterations; ', i5, ' variables; ',
			'eta ', f7.4, '; alpha ', f7.4)
end
#
# backward 14-Feb-92 15:38
#		back propagation to adjust weights
#
subroutine backward(w1, w2, error, gsq, dw1, dw2, gm1, gm2)
	real w1	(nhid, ninp + 1)
	real w2	(nout, nhid + 1)
	real dw1(nhid, ninp + 1)
	real dw2(nout, nhid + 1)
	real gm1(nhid, ninp + 1)
	real gm2(nout, nhid + 1)
include netcom.h
#
integer h, i, j
#
do j = 1, nout
{	do h = 1, nhid
	{	tmp = eta * gm2(j,h) + alpha * dw2(j,h)
		w2(j,h) = w2(j,h) + tmp
		dw2(j,h) = tmp
	}
}
#
do h = 1, nhid
{	do i = 1, ninp
	{	tmp = eta * gm1(h,i) + alpha * dw1(h,i)
		w1(h,i) = w1(h,i) + tmp
		dw1(h,i) = tmp
	}
}
return
end
##
## end of bp.r
