\documentclass[12pt]{article}
\usepackage{amsmath}
\usepackage{epsfig, graphics}
\usepackage{latexsym}
\usepackage{fullpage}
\usepackage{subfigure}
\usepackage[parfill]{parskip}
\usepackage{mysymbols}
\title{10708 Graphical Models: Homework 4\\
{\small Due November 12th, beginning of class}
}
\date{October 29, 2008}
\begin{document}
\maketitle
{\bf Instructions}: There are six questions on this assignment. Each
question has the name of one of the TAs beside it, to whom you should
direct any inquiries regarding the question. Please submit
your homework in two parts, one for each TA. Also, please put the
TA's name on top of the homework.
The last problem involves
coding. Do {\it not} attach your code to the writeup. Instead, copy
your implementation to
\begin{center}
\begin{verbatim}
/afs/andrew.cmu.edu/course/10/708/your_andrew_id/HW4
\end{verbatim}
\end{center}
Refer to the web page for policies regarding collaboration, due dates,
and extensions.
{\bf Note}: Please remember to put your name and Andrew ID on the first page of
your writeup.
\section{Clique Tree Calibration Fixed-Point {\small Dhruv [5 pts]}}
Prove that the clique beliefs $\pi({\bf C}_i) = P({\bf C}_i)$ and
edge beliefs $\mu_{ij}({\bf S}_{ij}) = P({\bf S}_{ij})$ form a fixed
point for the belief propagation algorithm for a clique
tree, $\ie$, if we start BP with these beliefs, no messages
will change them.
\section{Markov Network Representations {\small Amr [5 pts]}}
Figure~\ref{fig:simplemrf} is a Markov Random Field where the
potentials are defined on all cliques of three variables.
\begin{figure}[!h]
\begin{center}
\includegraphics[bb=0 0 1197 552,width=3.5in]{simplemrf.png}
\caption{A chordal (triangulated) Markov network}
\label{fig:simplemrf}
\end{center}
\end{figure}
\begin{enumerate}
\item[(a)] Convert the triangle graph on $(A,B,C)$ with potential
$\Psi(A,B,C)$ into a pairwise Markov Random Field by introducing a new
variable X. Show the graph, as well as the node and edge potentials in
table form ({\it i.e.}, compute the values of the potentials in the
pairwise MRF)
\item[(b)] Convert the graph on $(A,B,C,D)$ with potentials
$\Psi(A,B,C)$ and $\Psi(B,C,D)$ into a pairwise Markov Random
Field. Is the graph chordal ? ({\it Note}: You do not have to compute
the pairwise MRF potentials in your solution).
\end{enumerate}
\section{Hammersley-Clifford and Non-Positive Distributions {\small Amr [10 pts] }}
Complete the analysis of Example 4.3.5 (Koller \& Friedman),
showing that the distribution $P$ defined in the example does not
factorize over ${\cal H}$. ({\it Hint}: Use a proof by
contradiction).
\section{The Uniqueness of Minimal I-maps for MN {\small Amr [10 pts] }}
Unlike directed models, where the minimal I-map for a given
distribution is not unique, there exists a construction (Theorem
4.3.19 in Koller \& Friedman) that results in the minimal I-map
Markov Network (MN) for a positive distribution $\cal{P}$. The
construction is as follows:
\textbf{Theorem 4.3.19} Let $\cal{P}$ be a positive distribution.
For each node $X$, let $MB(X)$ be a \emph{minimal} set of nodes $U$
satisfying Eq.~\ref{eq:MB}. We define a graph $\cal{H}$ by
introducing an edge \{$X, Y$\} for all $X$ and all $Y \in MB_P(X)$.
Then the Markov network $\cal{H}$ is the unique minimal I-map for
$\cal{P}$.
\begin{equation}\label{eq:MB}
X \perp {\cal{X}}-\{X\} - U |U
\end{equation}
Below we will prove the uniqueness and minimality of this
construction. Consider some specific node $X$, and let $\cal{U}$ be
the set of all subsets $U$ satisfying Eq.~\ref{eq:MB}. Define
$\mathbf{U}^*$ to be the intersection of all $U \in \cal{U}$
\begin{enumerate}
\item[(a)] Prove that $\mathbf{U}^* \in \cal{U}$. Conclude that $MB_P(X)=\mathbf{U}^*$.
\item[(b)] Prove that if $P \models (X \perp Y | {\cal{X}} - \{X,Y\})$, then $Y \notin MB_P(X)$.
\item[(c)] Prove that if $Y \notin MB_P(X)$, then $P \models (X \perp Y | {\cal{X}} - \{X,Y\})$.
\item[(d)] Conclude that $MB_P(X)$ is precisely the set of neighbors of $X$ in the graph $\cal{H}$ defined above, showing that the
construction above also produces a minimal I-map.
\end{enumerate}
\section{Variational Inference {\small Amr [30 pts] }}
In this problem, you will investigate {\it mean field} approximate
inference algorithms (Koller \& Friedman 10.5). Consider the Markov
network in Figure \ref{fig:mfield}. Define edge potentials
$\phi_{ij}(x_i,x_j)$ for all edges $(x_i,x_j)$ in the graph. We can
write
\begin{align*}
P(x_1,\ldots,x_{12}) = \frac{1}{Z}\prod_{(i,j) \in E}\phi_{ij}(x_i,x_j)
\end{align*}
\begin{figure}[!h]
\begin{center}
\subfigure[Pairwise MRF]{\includegraphics[bb=0 0 1007 649, width=0.4\linewidth]{mfield.png}\label{fig:mfield}}\hspace{0.05\linewidth}
\subfigure[Fully Factored Mean Field]{\includegraphics[bb=0 0 1007 649,width=0.4\linewidth]{mfield-ff.png}\label{fig:mfield-ff}}
\hspace{0.05\linewidth} \subfigure[Structured Mean Field]{\includegraphics[bb=0 0 1007 649,width=0.4\linewidth]{mfield-smf.png}\label{fig:mfield-smf}}
\caption{A pariwise Markov Random Field and the structure of two
mean field approximations}\end{center}
\end{figure}
\begin{enumerate}
\item[(a)] Assume a fully factored mean field approximation $Q$ (Figure~\ref{fig:mfield-ff}),
parameterized by node potentials $Q_i$.
\begin{enumerate}
\item[(i)] Write down the update formula for $Q_1(x_1)$.
\item[(ii)] Write down the update formula for $Q_6(x_6)$.
\end{enumerate}
In both cases, please expand out any expectations in the formulas (your answer should
be in terms of $Q_i$ and $\phi_{ij}$).
\item[(b)] Now we consider a structured mean field approximation $Q$
(Figure~\ref{fig:mfield-smf}), parameterized by edge potentials $\psi_{ij}(x_i,x_j)$ for
each edge $(x_i,x_j)$ in Figure~\ref{fig:mfield-smf}.
Using Theorem 10.5.15 (Koller \& Friedman), the update equation for
potential $\psi_j$ is,
\begin{align*}
\psi_j(\mathbf{c}_j) \propto \exp \Big[ \sum_{\phi \in A_j} E_Q
\big[ \ln \phi | \mathbf{c}_j \big] - \sum_{ \psi_k \in B_j} E_Q
\big[ \ln \psi_k | \mathbf{c}_j \big] \Big],
\end{align*}
where $A_j = \{ \phi \in \mathcal{F} :$ scope($\phi$) is not
independent of $\mathbf{C}_j$ in $Q \}$ and $B_j = \{ \psi_k :$
$\mathbf{C}_k$ and $\mathbf{C}_j$ are not independent in $Q \}$.
\begin{enumerate}
\item[(i)] Write down the update formula for $\psi_{34}(x_3,x_4)$
\item[(ii)] Write down the update formula for $\psi_{67}(x_6,x_7)$
In both cases, write the required formula up to a proportionality
constant. You can write it in terms of expected values, but do not
include unnecessary terms.
\item[(iii)] Write out the formula for
$E_{Q}[\ln\phi_{12}(X_1,X_2)|x_3,x_4]$. Make sure to show how you
would calculate the distribution that this expectation is over.
\item[(iv)] Repeat for $E_{Q}[\ln\phi_{15}(X_1,X_5)|x_3,x_4]$.
\item[(v)] Repeat for $E_{Q}[\ln\phi_{37}(X_3,X_7)|x_3,x_4]$.
\end{enumerate}
\item[(c)] For an $n \times n$ grid with $k$-ary variables:
\begin{enumerate}
\item[(i)] What is the computational complexity of a single potential
update (like $Q_6(x_6)$ ) in the fully factored model?
\item[(ii)]What is the computational complexity of
a single potential update (like $\psi_{67}(x_6,x_7)$) in the
structured mean field model?
({\it Note}: Do not include the cost of computing the normalization
constant in your answer).
\item[(iii)] Assuming No caching, what is the total cost in each case for a full
iteration, i.e., that is computing the updates for all the
potentials?
\end{enumerate}
\item[(d)] We would like to use caching to speed up computations in
the structured mean field approach in Figure~\ref{fig:mfield-smf}.
\begin{enumerate}
\item[(i)] What are the (conditional) marginal distributions under Q
needed to calculate the update for $\psi_{34}(x_3,x_4)$.
\item[(ii)]Repeat for $\psi_{34}(x_3',x_4')$.
\item[(iii)]Repeat for $\psi_{12}(x_1,x_2)$.
\item[(iv)]Using the above intuition, sketch a scheme to schedule
the updates over all $\psi_{XY}(X,Y)$ for all possible assignments
to $X$ and $Y$. You may use any exact inference algorithm as a
subroutine.
\item[(v)] For an $n \times n$ grid with $k$-ary variables, what is
the computational complexity for a single full iteration in your new
scheme?
\end{enumerate}
\end{enumerate}
\section{Semi-Supervised Image Segmentation with Loopy BP {\small Dhruv [40 pts]}}
Given an image of $l \times w$ pixels a K-ary segmentation is a
clustering that assigns each pixel to one of $K$-classes, typically
under the assumption that neighbouring pixels are more likely to
belong to the same class.
In this question, you will implement an application of Loopy BP to the
problem of interactive (or semi-supervised) image segmentation, where
the goal is to produce a binary segmentation given user-provided scribbles
or strokes on an image. Consider the image shown in figure~\ref{fig:carlos}.
The user (let's call him Carlos) wants to cut himself out of this picture
and paste it against a different background (he prefers mountains to beaches).
Unfortunately, Carlos is also lazy, and is only willing to use a coarse
paint-like interface and give us foreground/background labels for a few pixels.
In figure~\ref{fig:scribbles}, the green scribble corresponds to foreground
and the blue scribble corresponds to background.
\begin{figure}[h]
\begin{center}
\subfigure[Input image]
{\includegraphics[bb=0 0 413 243,width=0.45 \columnwidth]{./images/carlos-beach.jpg}\label{fig:carlos}}
\subfigure[Scribbles]
{\includegraphics[bb=0 0 624 370,width=0.45 \columnwidth]{./images/carlos-beach_scribble.png}\label{fig:scribbles}}
\caption{Interactive image segmentation}
\end{center}
\end{figure}
The most common graphical model approach for image segmentation
represents the image as a grid-graph pairwise Markov random field
(Figure~\ref{fig:mrf}) where each node corresponds to a pixel. Note
that the value of a node is the cluster it belongs to.
\begin{figure}[h]
\begin{center}
\includegraphics[bb= 0 0 514 431,width=2.5in]{mrf.png}
\caption{An example of a Markov Random Field for image segmentation}
\label{fig:mrf}
\end{center}
\end{figure}
Formally, the observed image is denoted $Y = \{Y_i\}$ and $X =
\{X_i\}$, $X_i \in \{1 \ldots K\}$ is the segmentation. The Markov
random field has distribution
\begin{align}
P(X,Y) = \frac{1}{Z}\prod_i \Phi(x_i,y_i) \prod_{(i,j) \in E} \Psi(x_i,x_j)
\label{eqn:mrf}
\end{align}
where $\Phi$ is the node potential\footnote{Also called the
observation model or likelihood.}, the effect that pixel $y_i$ has on
the label of $x_i$; $\Psi$ is the edge potential,
how the label of $x_i$ is influenced by the labels
of its neighbours.
One common problem with this models is that even a (relatively) small image
of size $300\times300$ pixels would contain $9\times10^{4}$ nodes in the MRF,
and the size of the adjacency matrix for this graph would be $81\times10^8$ (although
it would be sparse). A commonly used trick to get around this problem is to ``over-segment"
the image into small segments (called superpixels), and construct an MRF where the nodes
correspond to superpixels instead of pixels. All the pixels in
this superpixel image which have the same colour belong to the same superpixel, and will
have the same foreground/background label because our MRF will assign labels on
superpixels. Figure~\ref{fig:sp} shows a visualization of these
superpixels where all the superpixels have been filled with random colours. Figure~\ref{fig:sp_mrf}
shows what the MRF structure over superpixels would look like, where the nodes
correspond to superpixels, and neighbouring (adjoining) superpixels have been connected by an
edge. \textit{Note:} The figure is a simplified visualization, and your resultant graph will have a lot more
nodes and edges.
\begin{figure}[h]
\begin{center}
\subfigure[Input image]
{\includegraphics[bb=0 0 413 243,width=0.30 \columnwidth]{./images/carlos-beach.jpg}\label{fig:carlos}}
\subfigure[Superpixels]
{\includegraphics[bb=0 0 413 243,width=0.30 \columnwidth]{./images/carlos-beach_sp_colour.png}\label{fig:sp}}
\subfigure[Neighbourhood graph]
{\includegraphics[bb = 0 0 1387 817,width=0.30 \columnwidth]{./images/carlos-beach_sp_mrf.png}\label{fig:sp_mrf}}
\caption{MRF on superpixels}
\end{center}
\end{figure}
\subsection{Segmentation}
This section describes what you need to implement. Please read the the readme file in the provided
code for more details about the code provided.
For this section we will redefine $X = \{X_i\}$ as the set of (binary) random variables representing
class labels for superpixels and $Y =
\{Y_i\}$ as the set of features extracted on superpixels ($i \in \{1,2,\ldots,S\}$, where $S$ is the number of superpixels in the image) . The MRF distribution will stay the same (but the edge set $E$ will change):
\begin{align}
P(X,Y) = \frac{1}{Z}\prod_i \Phi(x_i,y_i) \prod_{(i,j) \in E} \Psi(x_i,x_j)
\label{eqn:mrf}
\end{align}
\begin{enumerate}
\item \textbf{[Pixel GMMs]} We have provided you with a scribble
mask image (where a pixel has value 1 when it lies on the foreground scribble, 2
when it lies on the background scribble, and 0 otherwise). You will use the foreground and background
pixels to learn Gaussian Mixture Models for each class. Set the number of components in each GMM to be 5. Your features for pixels will be Luv colour vectors. You may use external implementations for GMMs. A matlab implementation is available here:
\begin{center}
\begin{verbatim}
http://www.it.lut.fi/project/gmmbayes/
\end{verbatim}
\end{center}
Consider looking at the function \texttt{gmmb\_create}.
\textbf{Report} the mean vectors (in the form of two $3\times5$ tables) for the foreground and background GMMs \textit{in your writeup}.
\item \textbf{[Superpixel MRF structure]} We have provided you with a superpixel map,
where each pixel holds the index of the superpixel it belongs to.
Write a function called \texttt{convert\_sp\_labels\_to\_adj\_mat.m} that takes in this matrix of superpixel
labels and returns the adjacency matrix for the superpixel neighbourhood graph. If the image
has been broken into $S$ superpixels, this adjacency matrix will be of size $S\times S$, and will
contain 1 whenever two superpixels are adjacent in the image (and 0 otherwise).
\textit{Hint:} Consider scanning rows and columns of the superpixel map to find transitions.
\textbf{Display} the adjacency matrix using the function \texttt{imagesc} and \textbf{include} a snapshot \textit{in your writeup}.
\item \textbf{[Superpixel MRF parameters]} Your features at superpixels ($Y_i$) will be the average colour (Luv vectors) of contained pixels. The node potential will be defined as the likelihood of these
feature vectors under foreground and background GMMs:
\begin{align*}
\Phi(X_i=\text{fg},y_i) &= P(y_i | \text{GMM}_{\text{fg}})\\
\Phi(X_i=\text{bg},y_i) &= P(y_i | \text{GMM}_{\text{bg}})
\end{align*}
Consider looking at the function \texttt{gmmb\_pdf}.
The edge potential\footnote{Also called the Potts Model.} will be defined as
\begin{align*}
\Psi(x_i,x_j) &= \exp\left\{-\beta \times I\left(x_i \neq x_j\right)\right\},
\end{align*}
where $I$ is an indicator function.
\item \textbf{[Loopy BP]} Write a function \texttt{lbp.m} that takes as input the graph structure (adjacency matrix), the node potentials, and the edge potentials, and returns the MAP estimates of the states of the nodes via (sum-product) Loopy Belief Propagation.
Initialize $m_{ij}(x_i) = 1$
for all $i \neq j$. Stop running loopy belief propagation once the
maximum absolute difference between an old message and a new message
is less than $10^{-5}$. Remember to compute the messages in log-space, for numerical stability.
Also remember to normalize messages as listed on the lecture slides.
You do not need to dampen messages to ensure convergence on this image.
\item \textbf{[Plotting Segmentations]} Vary $\beta$ in the range $[0, 10]$ (take steps of size 2)
and propagate superpixel
labels back to pixel-level foreground/background segmentations (where all pixels in the image
have been assigned to either foreground (1) or background (0)).
\textbf{Include} plots of these segmentations \textit{in your writeup}. Comment on the behaviour as $\beta$ increases. Why is $\beta=0$ special? What would happen as $\beta \rightarrow \infty$?
\end{enumerate}
\subsection{Extra Credit {\small [8 pts]}}
Show us your creative side. What can you do with a cut-out of Carlos? Paste Carlos against
interesting backgrounds (smiling next to an alien ship or standing next to important presidents), and submit the images.
\end{document}