\documentclass[12pt]{article}

\usepackage{amsmath}
\usepackage{epsfig, graphics}
\usepackage{latexsym}
\usepackage{fullpage}
\usepackage{subfigure}
\usepackage[parfill]{parskip}

\title{10708 Graphical Models: Homework 5\\
{\small Due November 24th, beginning of class\\
\textbf{Note: Due sooner than usual}}
}
\date{November 12, 2008}

\renewcommand{\labelenumi}{(\alph{enumi})}
\renewcommand{\theenumi}{(\alph{enumi})}
%\renewcommand{\labelenumii}{\alph{enumii}}

\begin{document}

\maketitle

{\bf Instructions}: There are three questions on this assignment. Each
question has the name of one of the TAs beside it, to whom you should
direct any inquiries regarding the question. The last problem involves
coding. Do {\it not} attach your code to the writeup. Instead, copy
your implementation to
\begin{center}
\begin{verbatim}
/afs/andrew.cmu.edu/course/10/708/your_andrew_id/HW5
\end{verbatim}
\end{center}
Refer to the web page for policies regarding collaboration, due dates,
and extensions.

{\bf Note}: Please put your name and Andrew ID on the first page of
your writeup.


\section{Sampling From a Markov Network {\small [30 pts][Amr]}}

In this question we will explore different techniques to sample from
a Markov Network.

\begin{enumerate}
\item[1.] [10 pts] Given a \emph{calibrated} clique tree $\cal{T}$ with clique
potentials $P(C_i)$ and sepset potentials $P(S_{ij})$
\begin{enumerate}
\item[(a)] If clique $C_i$ is a neighbour of clique $C_j$ in the
tree with a sepset $S_{ij}$, show how to calculate $P(C_j|C_i=c_i)$,
that is the probability distribution over $C_j$ given an assignment
to $C_i$. (\emph{Hint}: use Bayes rule and the independence
assertion imposed by $S_{ij}$).
\item[(b)] Show how to use forward sampling to generate a sample
from  the probability $P(X)$ encoded by the calibrated tree
$\cal{T}$ (\emph{Hint}: start by directing the tree by picking an
arbitrary root).
\end{enumerate}
\item[2.][5 pts] Show how to use forward sampling to generate a sample
from a \emph{chordal} Markov network with Maximal Clique size $w$.
What is the cost of this operation? (\emph{Hint}: What is the
relationship between chordal MNs and clique trees?)
\item[3.][5 pts] Given a general MN (not necessarily chordal), show
how to use forward sampling to generate a sample from it. Is this
always efficient?
\item[4.][5 pts] It seems that forward sampling might be
prohibitive in MN. Importance sampling is a technique used when
sampling from a distribution $\cal{P}$ is expensive but evaluating
$P(X=x)$ is not expensive. Why this technique might be expensive as
well for a MN? (Hint: your answer should be really 1-2 sentences)
\item[5.][5 pts] Finally, in Gibbs sampling, the major task is to efficiently
compute $P(X_i|\mathrm{MB}(X_i))$. Briefly explain why this
operation can be computed efficiently in a MN? (\emph{Hint}: your
answer should be just 1-2 sentences with the high-level intuition)

\end{enumerate}



\section{Generalized Belief Propagation {\small [25 pts][Amr]}}

In Generalized Belief Propagation (GBP) we pass messages between
clusters of nodes, rather than individual nodes, which can lead to
better approximations. For this question, refer to (K\&F Section
10.3) .

\begin{figure}[!h]
\begin{center}
\includegraphics[bb=0 0 595 381,width=2in]{gbp.png}
\caption{Markov Network for Generalized Belief Propagation Question}
\label{fig:gbp}
\end{center}
\end{figure}

\begin{enumerate}
\item[(a)] [6 pts]Draw the region graph for the undirected model in
Figure~\ref{fig:gbp}, assuming overlapping clusters of four nodes.
(\emph{Hint}: see Figure 10.12)
\item[(b)] [9 pts]\label{sec:beq} Assume that this pairwise Markov Random
Field has node potentials $\phi_a$ for all $a \in \{A,B,\ldots,L\}$,
and edge potentials $\psi_{ab}$ for all $(a,b) \in E$, the edge set
of the model. Write down the belief equations for
$\beta[\mathrm{G}]$, $\beta[\mathrm{CG}]$, $\beta[\mathrm{BCFG}]$.
These equations should be in terms of node potentials, edge
potentials, and messages from regions to their subregions.
(\emph{Hint}: use Equation 10.36)
\item[(c)] [5 pts] Write down the message sent from region $CG$ to region $G$. (\emph{Hint:} use Equation 10.37)
\item[(d)] [5 pts] Use the belief equations you derived in part (b) as well as the
marginalization consistency condition for beliefs (if $r \rightarrow
r'$ then $\sum_{C_r-C_{r'}}{\beta_r[C_r]}=\beta_{r'}[C_{r'}]$), to
derive the message sent from region $CG$ to region $G$.
\end{enumerate}



\section{Iterative Proportional Fitting {\small [45 pts] [Dhruv]}}

We continue with the binary segmentation problem from homework 4.  In
the previous homework, you were given the parameters of the Markov
Random Field and asked to produce a segmentation. In this question,
you will learn the spatial prior $\Psi$. In a typical setting, we are given a set of training 
images, along with manually labelled ground-truth segmentations. We
use these training images to learn parameters of our model and then proceed to segment 
test images with these parameters. 
For simplicity, in this exercise, we will only learn parameters from a single training
image, shown in figure~\ref{fig:ipf}.

Recall that we have a pairwise Markov Random Field where each node
corresponds to a superpixel. The observed image is denoted $y = \{y_i\}$
and $x = \{x_i\}$, $x_i \in \{1,2\}$ is the segmentation. The Gibbs
distribution of this model is
\begin{align*}
P(x,y) = \frac{1}{Z}\prod_{i \in V} \Phi(x_i,y_i) \prod_{(i,j) \in E} \Psi(x_i,x_j)
\label{eqn:mrf}
\end{align*}
where the potentials are defined as follows:
\begin{align*}
\Phi(X_i=\text{fg},y_i) &= P(y_i | \text{GMM}_{\text{fg}})\\
\Phi(X_i=\text{bg},y_i) &= P(y_i | \text{GMM}_{\text{bg}}) \\
\Psi(x_i,x_j) &= \left[ \begin{matrix}
            \theta_1 & \theta_2 \\
            \theta_3 & \theta_4
            \end{matrix} \right].
\end{align*}
For convenience we denote $\theta =
(\theta_1,\theta_2,\theta_3,\theta_4)$. This MRF is not decomposable,
and therefore we cannot estimate the potentials in closed form.

\begin{figure}[h]
\begin{center}
\subfigure[Input image]
    {\includegraphics[width=0.30 \columnwidth]{carlos-beach.jpg}\label{fig:carlos}}
\subfigure[Ground Truth Segmentation]
    {\includegraphics[width=0.30 \columnwidth]{carlos-beach-gt.png}\label{fig:gt}}
\end{center}
\caption{Training Data}
\label{fig:ipf}
\end{figure}

\begin{enumerate}
\item Assume that $\Phi$ is known (computed in a manner similar to HW 4, only now all pixels
are labelled).
Write down the IPF update equation
for $\Psi(x_i,x_j)$. What is the cost of computing
$\Psi^{(t+1)}(x_i,x_j)$ ? \label{pest:part1}

\item Using the equation from part~\ref{pest:part1} implement IPF for
$\Psi$ using the images and data provided.
Report the final value of $\theta$. Use loopy belief propagation to
compute any required probabilities. You may use your implementation
or our solution from homework 4 (\texttt{lbp.m}). \emph{Hint}: We
discussed how to compute pairwise distributions $P(x_i,x_j)$ using
the messages from loopy belief propagation in class.

Notes:
\begin{enumerate}
\item You will need to re-learn the foreground and background GMMs. Instead of using the pixel Luv vectors like last time (which will take a lot of time now), train the GMM on the mean Luv vectors for superpixels. These are stored in the variable \texttt{modes}.

\item In our setup above, all edges have the same edge potentials. This is known as 
\emph{parameter sharing}, and requires us to compute average edge probabilities.

\item You will experience first-hand an interesting phenomenon in graphical models, which is that 
learning requires inference. For example, in the above problem, you will need 20-30 iterations of IPF,
and each iteration requires loopy BP which is an iterative algorithm itself! This is why fast algorithms for inference result in fast algorithms for learning.

\end{enumerate}

\end{enumerate}


\end{document}
