\documentclass[10pt]{article}
\usepackage{epsfig}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amstext}
\usepackage{amscd}
\usepackage{amsmath}
\usepackage{xspace}
\usepackage{theorem}
\usepackage{float}
%\usepackage{layout}% if you want to see the layout parameters
% and now use \layout command in the body
% This is the stuff for normal spacing
\makeatletter
\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{0in}
\setlength{\evensidemargin}{0in}
\setlength{\topmargin}{0.25in}
\setlength{\textheight}{8.25in}
\setlength{\headheight}{0pt}
\setlength{\headsep}{0pt}
\setlength{\marginparwidth}{59pt}
\setlength{\parindent}{0pt}
\setlength{\parskip}{5pt plus 1pt}
\setlength{\theorempreskipamount}{5pt plus 1pt}
\setlength{\theorempostskipamount}{0pt}
\setlength{\abovedisplayskip}{8pt plus 3pt minus 6pt}
\setlength{\intextsep}{15pt plus 3pt minus 6pt}
\renewcommand{\section}{\@startsection{section}{1}{0mm}%
{2ex plus -1ex minus -.2ex}%
{1.3ex plus .2ex}%
{\normalfont\Large\bfseries}}%
\renewcommand{\subsection}{\@startsection{subsection}{2}{0mm}%
{1ex plus -1ex minus -.2ex}%
{1ex plus .2ex}%
{\normalfont\large\bfseries}}%
\renewcommand{\subsubsection}{\@startsection{subsubsection}{3}{0mm}%
{1ex plus -1ex minus -.2ex}%
{1ex plus .2ex}%
{\normalfont\normalsize\bfseries}}
\renewcommand\paragraph{\@startsection{paragraph}{4}{0mm}%
{1ex \@plus1ex \@minus.2ex}%
{-1em}%
{\normalfont\normalsize\bfseries}}
\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
{2.0ex \@plus1ex \@minus .2ex}%
{-1em}%
{\normalfont\normalsize\bfseries}}
\makeatother
\newcounter{thelecture}
\newenvironment{proof}{{\bf Proof: }}{\hfill\rule{2mm}{2mm}}
\newenvironment{proofof}[1]{{\bf Proof of #1: }}{\hfill\rule{2mm}{2mm}}
\newenvironment{proofofnobox}[1]{{\bf#1: }}{}
\newenvironment{example}{{\bf Example: }}{\hfill\rule{2mm}{2mm}}
%\renewcommand{\theequation}{\thesection.\arabic{equation}}
%\renewcommand{\thefigure}{\thesection.\arabic{figure}}
\newtheorem{fact}{Fact}
\newtheorem{lemma}[fact]{Lemma}
\newtheorem{theorem}[fact]{Theorem}
\newtheorem{definition}[fact]{Definition}
\newtheorem{corollary}[fact]{Corollary}
\newtheorem{proposition}[fact]{Proposition}
\newtheorem{claim}[fact]{Claim}
\newtheorem{exercise}[fact]{Exercise}
% math notation
\newcommand{\R}{\ensuremath{\mathbb R}}
\newcommand{\Z}{\ensuremath{\mathbb Z}}
\newcommand{\N}{\ensuremath{\mathbb N}}
\newcommand{\B}{\ensuremath{\mathbb B}}
\newcommand{\F}{\ensuremath{\mathcal F}}
\newcommand{\SymGrp}{\ensuremath{\mathfrak S}}
\newcommand{\prob}[1]{\ensuremath{\text{{\bf Pr}$\left[#1\right]$}}}
\newcommand{\expct}[1]{\ensuremath{\text{{\bf E}$\left[#1\right]$}}}
\newcommand{\size}[1]{\ensuremath{\left|#1\right|}}
\newcommand{\ceil}[1]{\ensuremath{\left\lceil#1\right\rceil}}
\newcommand{\floor}[1]{\ensuremath{\left\lfloor#1\right\rfloor}}
\newcommand{\ang}[1]{\ensuremath{\langle{#1}\rangle}}
\newcommand{\poly}{\operatorname{poly}}
\newcommand{\polylog}{\operatorname{polylog}}
% anupam's abbreviations
\newcommand{\e}{\epsilon}
\newcommand{\eps}{\epsilon}
\newcommand{\half}{\ensuremath{\frac{1}{2}}}
\newcommand{\junk}[1]{}
\newcommand{\sse}{\subseteq}
\newcommand{\union}{\cup}
\newcommand{\meet}{\wedge}
\newcommand{\dist}[1]{\|{#1}\|_{\text{dist}}}
\newcommand{\hooklongrightarrow}{\lhook\joinrel\longrightarrow}
\newcommand{\embeds}[1]{\;\lhook\joinrel\xrightarrow{#1}\;}
\newcommand{\mnote}[1]{\normalmarginpar \marginpar{\tiny #1}}
\newcommand{\OPT}{\textsf{OPT}}
\newcommand{\That}{\widehat{T}}
\newcommand{\Ghat}{\widehat{G}}
\newcommand{\chat}{\widehat{c}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Document begins here %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\hwheadings}[3]{
{\bf Approximation Algorithms} \hfill {{\bf Homework #1}}\\
{{\bf Date:} #2} \hfill {{\bf Due:} #3} \\
\rule[0.1in]{\textwidth}{0.025in}
%\thispagestyle{empty}
}
\begin{document}
\hwheadings{4}{Oct 12, 2005}{Oct 19, 2005}
\textbf{Note:} In all these problems: if we ask you to show an
approximation ratio of $\rho$, you should tell us about the best
approximation ratio you did prove, be it better or worse than $\rho$.
Also, please refrain from consulting any external sources (papers,
lecture notes, books, surveys) in attempting to solve these problems.
\begin{enumerate}
\item \textbf{(Directed Spanning Trees via the Local Ratio Method)} In
this problem, we are going to use the Local Ratio method to devise an
{\em exact} algorithm for the minimum-cost directed spanning tree
problem. In this problem, given a {\em directed} graph $G = (V, A)$
with nonnegative costs on the arcs $c : A \to \R_{\geq 0}$ and a root
node $r$, the objective is to find a \emph{directed spanning tree} $T
= (V, A')$ rooted at $r$; in such a tree, there is a directed path
\emph{from} every node to $r$, and the tree has exactly $|V| - 1$
arcs. (While we will not say ``directed'' repeatedly, all spanning
trees in this problem are directed.)
As always, the first step is to decide the cost function $\bar{c}$:
for this, we consider the subgraph $Z = (V, A_Z)$ consisting of all
the zero-cost edges in $G$. (I.e, $A_Z = \{e \in A \mid c_e = 0\}$.)
\begin{itemize}
\item If each vertex in $V$ can reach the root in $Z$, then there is a
zero cost solution, and we are done.
\item Suppose there is some strongly connected component $C$ of $Z$
that has no zero-cost edges coming out of it. (Note that $C$ is may
consist of a single node.) In that case, we define $\alpha = \min_{e
\in \partial^+(C)} c_e$, and we set $\bar{c}_e = \alpha$ for $e
\in \partial^+(C)$ and zero otherwise. (Recall that $\partial^+(S)$
is the set of arcs going out of $S \sse V$.)
To get a reduced graph $G'$, we contract the strongly connected
component $C$ into a new supernode $v_C$. We now set $c'_e = c_e -
\bar{c}_e$ for all arcs, and find the optimal spanning tree on $(G',
c')$. While returning from the induction in this case, we have an
optimal spanning tree $T'$ of the graph $G'$: suppose the arc $(v_C,
u)$ in this tree out of the supernode $v_C$ corresponds to the arc
$(v, u)$ in the original graph $G$, with $v \in C$. We now
``unshrink'' the supernode $v_C$, and add to the tree $T'$ a
spanning tree $T_C$ of zero-cost edges within the strongly connected
component $C$ rooted at $v$ obtain a spanning tree of the whole
unshrunk graph.
\end{itemize}
Your task is now to use the above idea to obtain an exact algorithm
with a proof.
\begin{enumerate}
\item Write out the above outline as a recursive procedure for finding
a spanning tree rooted at $r$. (You may want to try your algorithm
on small examples to make sure it works.)
\item Argue that the set of edges output is a feasible spanning tree
by induction. Show carefully the inductive step of obtaining a
spanning tree rooted at any node from a strongly connected subgraph.
\item Argue that the cost of \emph{any} minimum spanning tree on graph
$(G, \bar{c})$ is at least $\alpha$.
\item Using the previous part and induction, prove that the cost of
the tree output is the minimum possible.
\item Analyze the running time of the above algorithm and prove the
most efficient bound you can.
\end{enumerate}
\item \textbf{(Approximation via Randomized Simplification)} In this
part we study the application of probabilistic embeddings of a metric
into spanning trees to approximation algorithms.
Given a set $X$ of {\em points}, a {\em distance function} on $X$ is a
map $d: X \times X \to \R_+$ that is symmetric, and satisfies $d(i,i)
= 0$ for all $i \in X$. The distance is said to be a {\em metric} if
the {\em triangle inequality} holds, i.e.,
\[
d(i,j) \leq d(i,k) + d(k,j) \qquad\qquad \forall i,j,k \in X.
\]
We informally think of metrics as a complete graph on $X$ with the
edge $\{i,j\}$ having a length $d(i,j)$.
We say that the metric $(X,d)$ \emph{$\alpha$-probabilistically embeds
into a probability distribution $\cal D$ of trees} if
\begin{itemize}
\item Each tree $T = (V_T,E_T)$ in the support of the distribution
$\cal D$ has $V_T = X$.
\item The distances in each such tree $T$ {\em dominate}
those in $d$; i.e., $d_T(x,y) \geq d(x,y)$ for all $x,y \in X$.
\item Given a pair of vertices $x, y \in X$, the expected distance is
``not too much larger'' than $d(x,y)$: formally
\[ \expct{\, d_T(x,y) \,} \leq \alpha\, d(x,y). \]
\end{itemize}
The following result due to Fakcharoenphol, Rao and Talwar (2003)
gives the best-possible results of type. (We will see a proof later in
class.)
\begin{theorem}
\label{thm:frt} Any $n$-point metric $\alpha$-probabilistically
embeds into a distribution of trees ${\cal D}_{FRT}$ with $\alpha =
O(\log n)$; furthermore, samples from this distribution can be
generated in polynomial time.
\end{theorem}
Our goal is to use this result to design approximation algorithms for
problems defined on metrics by first designing exact or near-optimal
solutions to the same problem on trees, and then using the above
result to translate such an algoruthm to arbitrary metrics with a loss
of $\alpha = O(\log n)$ in the performance ratio.
\begin{enumerate}
\item Consider the $K$-median problem that we discussed in class:
given an $n$-point metric $(X,d)$, and a number $K$, find a set $C$
of size $K$ that minimizes $\Phi(C) = \sum_{x \in X} d(x,C)$.
Suppose you are given a tree $T$ with nonnegative edge lengths as
your input to the $K$-median problem, show how to solve the
$k$-median problem in polynomial time for this class of input
graphs. (Hint: Use Dynamic Programming).
\item To solve the $K$-median problem on a general metric, we apply
Theorem~\ref{thm:frt} to the input metric to derive a probability
distribution over tree metrics. We then sample a tree according to
this distribution and apply your polynomial time algorithm from the
previous part to it. We then use the same set of $K$ nodes as the
solution to the $K$-median problem on the original metric.
\begin{enumerate}
\item Show that an optimal $K$-median solution $C^*$ in the original
metric gives a solution whose expected cost is at most $\alpha \cdot
\Phi(C^*)$. Thus the optimal solution in the sampled tree has
expected cost no more than this.
\item Show how \emph{any} $K$-median solution of cost $D$ in the
sampled tree $T$ has cost no more than $D$ in the original metric.
\end{enumerate}
Mention how these two claims imply an $\alpha$-approximation for the
$K$-median problem on any metric.
\item Look at your analysis in the previous part and outline for what
kinds of problems on metrics does this method apply. E.g., does it
work for the TSP o a metric? How about the $K$-center problem? Why
or why not?
\end{enumerate}
\end{enumerate}
\end{document}