\documentstyle[fullpage,11pt]{article}

\title{Untitled}
\author{Andrew William Moore}

\begin{document} 
 
\maketitle 
 
\noindent Assume a scalar state and action, and assume that desired state and
action are zero ($x_{d} = u_{d} = 0$).  Assume linear dynamics:

\begin{equation}
x_{k+1} = a x_{k} + b u_{k}     \label{eq:dyn}
\end{equation}

Write $V_{k}^{\ast}(x) =$ the minimum possible sum of future costs
assuming we are at timestep $k$.  Assume the system stops at time $k =
N$, and the stopping cost is $q {x_{N}}^{2}$.  For all other times
(i.e. $k < N$) the cost is $q {x_{k}}^{2} + r {u_{k}}^{2}$.

Then
\begin{eqnarray}
V_{k}^{\ast}(x) & = & \sum_{j = k}^{N - 1} \left( q {x_{j}}^{2} + r {u_{j}}^{2} \right) + q {x_{N}}^{2}
\hspace{1cm} \mbox{assuming $u_{k}, u_{k + 1}, \ldots, u_{N - 1}$ chosen optimally.}
\label{eq:opt} \\
V_{N}^{\ast}(x) & = & q {x_{N}}^{2} \label{eq:base} \\
V_{k}^{\ast}(x)
  & = & \stackrel{\mbox{argmin}}{u_{k}} \left(q {x_{k}}^{2} + r {u_{k}}^{2} + V_{k + 1}^{\ast}(x_{k + 1}) \right) 
\label{eq:ind}
\end{eqnarray}

\noindent by the principal of optimality, which says that your best bet for minimal costs is to minimize
over your first step for the cost of that step plus the minimum possible costs of future steps.

This hypothesis will be proved by induction: $V_{k}^{\ast}(x)$ is a quadratic in $x$, with the
quadratic coefficient dependent on $k$: $V_{k}^{\ast}(x) = p_{k}(x)$ for some 
$p_{0}, p_{1}, \ldots, p_{N}$.

Base case: $p_{N} = q$ from Equation~\ref{eq:base}.

Inductive step: Assume $V_{k + 1}^{\ast}(x) = p_{k + 1} x^{2}$; we'll prove 
$V_{k}^{\ast}(x) = p_{k} x^{2}$ for some $p_{k}$.

From here on, all that remains is trivial algebra.

To begin with we use Equation~\ref{eq:dyn} in Equation~\ref{eq:ind}: we substitute in the next state
using our state transition model:

\begin{equation}
V_{k}^{\ast}(x) = \stackrel{\mbox{argmin}}{u_{k}}
   \left(q {x_{k}}^{2} + r {u_{k}}^{2} + V_{k + 1}^{\ast}(a x_{k} + b u_{k}) \right) 
\label{eq:usedyn}
\end{equation}

Then we use the inductive assumption $V_{k + 1}^{\ast}(x) = p_{k + 1} x^{2}$

\begin{equation}
V_{k}^{\ast}(x) = \stackrel{\mbox{argmin}}{u_{k}}
   \left(q {x_{k}}^{2} + r {u_{k}}^{2} + p_{k + 1}{(a x_{k} + b u_{k})}^2 \right) 
\label{eq:useind}
\end{equation}

Next we simplify with three new variables, $\alpha, \beta, \gamma$:
\begin{eqnarray}
V_{k}^{\ast}(x) & = & \stackrel{\mbox{argmin}}{u_{k}}
   \left(\alpha {x_{k}}^{2} + 2 \beta x_{k}u_{k} + \gamma {u_{k}}^2 \right) 
   \hspace{1cm} \mbox{where} \label{eq:simp} \\
\alpha & = & q + p_{k + 1} a^{2} \label{eq:alpha} \\
\beta & = & p_{k + 1} a b \label{eq:beta} \\
\gamma & = & r +  p_{k + 1} b \label{eq:gamma} 
\end{eqnarray}

To minimize Equation~\ref{eq:simp} with respect to $u$ we differentiate and set to zero the
bracketed expression giving:

\begin{equation}
2 \beta x  + 2 \gamma u_{k}^{\ast} = 0 \label{eq:zero}
\end{equation}

\noindent where $u_{k}^{\ast}$ is the optimal action.  Thus

\begin{equation}
u_{k}^{\ast} = - (\beta / \gamma) x_{k} \label{eq:ukopt}
\end{equation}

Since $u_{k}^{\ast}$ minimizes Equation~\ref{eq:simp} we have

\begin{equation}
V_{k}^{\ast}(x)  =  \alpha x^{2} + 2 \beta x u_{k}^{\ast} + \gamma {\left( u_{k}^{\ast} \right)}^{2}
\label{eq:insert}
\end{equation}

So from Equation~\ref{eq:ukopt}

\begin{equation}
V_{k}^{\ast}(x)  =  \alpha x^{2} + 2 \beta x ( - \beta / \gamma) x +
  \gamma {( - \beta / \gamma) }^{2} x^{2} = 
  \left( \alpha - 2 \beta^{2} / \gamma + \beta^{2} / \gamma \right) x^{2}
\end{equation}

And thus we have

\begin{equation}
V_{k}^{\ast}(x)  =  \left( \alpha - \beta^{2} / \gamma \right) x^{2} 
\end{equation}

\noindent so

\begin{equation}
p_{k}  =  \left( \alpha - \beta^{2} / \gamma \right) \label{eq:done}
\end{equation}

Inserting back the substitutions of Equations~\ref{eq:alpha}, \ref{eq:beta}, \ref{eq:gamma}
into Equation~\ref{eq:ukopt} and Equation~\ref{eq:done}

\begin{equation}
u_{k}^{\ast} = \left( \frac{ - p_{k + 1} a b}{r + p_{k + 1} b^{2}} \right) x_{k}
\end{equation}

\begin{equation}
V_{k}^{\ast}(x)  =  p_{k} x^{2} \mbox{where $p_{k} = q + a^{2} p_{k + 1} 
     \left( 1 - \frac{p_{k + 1} b^{2}}{r + p_{k + 1} b^{2}} \right)$}
\end{equation}


\end{document}

