\documentclass[10pt]{article}

\usepackage{epsfig}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amstext}
\usepackage{amscd}
\usepackage{amsmath}
\usepackage{xspace}
\usepackage{theorem}
\usepackage{float}

%\usepackage{layout}% if you want to see the layout parameters
                     % and now use \layout command in the body

% This is the stuff for normal spacing
\makeatletter
 \setlength{\textwidth}{6.5in}
 \setlength{\oddsidemargin}{0in}
 \setlength{\evensidemargin}{0in}
 \setlength{\topmargin}{0.25in}
 \setlength{\textheight}{8.25in}
 \setlength{\headheight}{0pt}
 \setlength{\headsep}{0pt}
 \setlength{\marginparwidth}{59pt}

 \setlength{\parindent}{0pt}
 \setlength{\parskip}{5pt plus 1pt}
 \setlength{\theorempreskipamount}{5pt plus 1pt}
 \setlength{\theorempostskipamount}{0pt}
 \setlength{\abovedisplayskip}{8pt plus 3pt minus 6pt}
 \setlength{\intextsep}{15pt plus 3pt minus 6pt}

 \renewcommand{\section}{\@startsection{section}{1}{0mm}%
                                   {2ex plus -1ex minus -.2ex}%
                                   {1.3ex plus .2ex}%
                                   {\normalfont\Large\bfseries}}%
 \renewcommand{\subsection}{\@startsection{subsection}{2}{0mm}%
                                     {1ex plus -1ex minus -.2ex}%
                                     {1ex plus .2ex}%
                                     {\normalfont\large\bfseries}}%
 \renewcommand{\subsubsection}{\@startsection{subsubsection}{3}{0mm}%
                                     {1ex plus -1ex minus -.2ex}%
                                     {1ex plus .2ex}%
                                     {\normalfont\normalsize\bfseries}}
 \renewcommand\paragraph{\@startsection{paragraph}{4}{0mm}%
                                    {1ex \@plus1ex \@minus.2ex}%
                                    {-1em}%
                                    {\normalfont\normalsize\bfseries}}
 \renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
                                       {2.0ex \@plus1ex \@minus .2ex}%
                                       {-1em}%
                                      {\normalfont\normalsize\bfseries}}
\makeatother

\newcounter{thelecture}

\newenvironment{proof}{{\bf Proof:  }}{\hfill\rule{2mm}{2mm}}
\newenvironment{proofof}[1]{{\bf Proof of #1:  }}{\hfill\rule{2mm}{2mm}}
\newenvironment{proofofnobox}[1]{{\bf#1:  }}{}
\newenvironment{example}{{\bf Example:  }}{\hfill\rule{2mm}{2mm}}

%\renewcommand{\theequation}{\thesection.\arabic{equation}}
%\renewcommand{\thefigure}{\thesection.\arabic{figure}}

\newtheorem{fact}{Fact}
\newtheorem{lemma}[fact]{Lemma}
\newtheorem{theorem}[fact]{Theorem}
\newtheorem{definition}[fact]{Definition}
\newtheorem{corollary}[fact]{Corollary}
\newtheorem{proposition}[fact]{Proposition}
\newtheorem{claim}[fact]{Claim}
\newtheorem{exercise}[fact]{Exercise}

% math notation
\newcommand{\R}{\ensuremath{\mathbb R}}
\newcommand{\Z}{\ensuremath{\mathbb Z}}
\newcommand{\N}{\ensuremath{\mathbb N}}
\newcommand{\B}{\ensuremath{\mathbb B}}
\newcommand{\F}{\ensuremath{\mathcal F}}
\newcommand{\SymGrp}{\ensuremath{\mathfrak S}}
\newcommand{\prob}[1]{\ensuremath{\text{{\bf Pr}$\left[#1\right]$}}}
\newcommand{\expct}[1]{\ensuremath{\text{{\bf E}$\left[#1\right]$}}}
\newcommand{\size}[1]{\ensuremath{\left|#1\right|}}
\newcommand{\ceil}[1]{\ensuremath{\left\lceil#1\right\rceil}}
\newcommand{\floor}[1]{\ensuremath{\left\lfloor#1\right\rfloor}}
\newcommand{\ang}[1]{\ensuremath{\langle{#1}\rangle}}
\newcommand{\poly}{\operatorname{poly}}
\newcommand{\polylog}{\operatorname{polylog}}

% anupam's abbreviations
\newcommand{\e}{\epsilon}
\newcommand{\eps}{\epsilon}
\newcommand{\half}{\ensuremath{\frac{1}{2}}}
\newcommand{\junk}[1]{}
\newcommand{\sse}{\subseteq}
\newcommand{\union}{\cup}
\newcommand{\meet}{\wedge}
\newcommand{\dist}[1]{\|{#1}\|_{\text{dist}}}
\newcommand{\hooklongrightarrow}{\lhook\joinrel\longrightarrow}
\newcommand{\embeds}[1]{\;\lhook\joinrel\xrightarrow{#1}\;}
\newcommand{\mnote}[1]{\normalmarginpar \marginpar{\tiny #1}}

\newcommand{\OPT}{\textsf{OPT}}
\newcommand{\That}{\widehat{T}}
\newcommand{\Ghat}{\widehat{G}}
\newcommand{\chat}{\widehat{c}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Document begins here %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\hwheadings}[3]{
{\bf Approximation Algorithms} \hfill {{\bf Homework #1}}\\
{{\bf Date:} #2} \hfill {{\bf Due:} #3} \\
\rule[0.1in]{\textwidth}{0.025in}
%\thispagestyle{empty}
}

\begin{document}

\hwheadings{4}{Oct 12, 2005}{Oct 19, 2005}

\textbf{Note:} In all these problems: if we ask you to show an
approximation ratio of $\rho$, you should tell us about the best
approximation ratio you did prove, be it better or worse than $\rho$.
Also, please refrain from consulting any external sources (papers,
lecture notes, books, surveys) in attempting to solve these problems.

\begin{enumerate}
  
\item \textbf{(Directed Spanning Trees via the Local Ratio Method)} In
  this problem, we are going to use the Local Ratio method to devise an
  {\em exact} algorithm for the minimum-cost directed spanning tree
  problem. In this problem, given a {\em directed} graph $G = (V, A)$
  with nonnegative costs on the arcs $c : A \to \R_{\geq 0}$ and a root
  node $r$, the objective is to find a \emph{directed spanning tree} $T
  = (V, A')$ rooted at $r$; in such a tree, there is a directed path
  \emph{from} every node to $r$, and the tree has exactly $|V| - 1$
  arcs. (While we will not say ``directed'' repeatedly, all spanning
  trees in this problem are directed.)
  
  As always, the first step is to decide the cost function $\bar{c}$:
  for this, we consider the subgraph $Z = (V, A_Z)$ consisting of all
  the zero-cost edges in $G$. (I.e, $A_Z = \{e \in A \mid c_e = 0\}$.)
  \begin{itemize}
  \item If each vertex in $V$ can reach the root in $Z$, then there is a
    zero cost solution, and we are done.
    
  \item Suppose there is some strongly connected component $C$ of $Z$
    that has no zero-cost edges coming out of it. (Note that $C$ is may
    consist of a single node.) In that case, we define $\alpha = \min_{e
      \in \partial^+(C)} c_e$, and we set $\bar{c}_e = \alpha$ for $e
    \in \partial^+(C)$ and zero otherwise. (Recall that $\partial^+(S)$
    is the set of arcs going out of $S \sse V$.)
    
    To get a reduced graph $G'$, we contract the strongly connected
    component $C$ into a new supernode $v_C$. We now set $c'_e = c_e -
    \bar{c}_e$ for all arcs, and find the optimal spanning tree on $(G',
    c')$. While returning from the induction in this case, we have an
    optimal spanning tree $T'$ of the graph $G'$: suppose the arc $(v_C,
    u)$ in this tree out of the supernode $v_C$ corresponds to the arc
    $(v, u)$ in the original graph $G$, with $v \in C$. We now
    ``unshrink'' the supernode $v_C$, and add to the tree $T'$ a
    spanning tree $T_C$ of zero-cost edges within the strongly connected
    component $C$ rooted at $v$ obtain a spanning tree of the whole
    unshrunk graph.
  \end{itemize}  
  
  Your task is now to use the above idea to obtain an exact algorithm
  with a proof.
  \begin{enumerate}
  \item Write out the above outline as a recursive procedure for finding
    a spanning tree rooted at $r$. (You may want to try your algorithm
    on small examples to make sure it works.)
    
  \item Argue that the set of edges output is a feasible spanning tree
    by induction. Show carefully the inductive step of obtaining a
    spanning tree rooted at any node from a strongly connected subgraph.
    
  \item Argue that the cost of \emph{any} minimum spanning tree on graph
    $(G, \bar{c})$ is at least $\alpha$.
    
  \item Using the previous part and induction, prove that the cost of
    the tree output is the minimum possible.
    
  \item Analyze the running time of the above algorithm and prove the
    most efficient bound you can.
  \end{enumerate}

  \item \textbf{(Approximation via Randomized Simplification)} In this
  part we study the application of probabilistic embeddings of a metric
  into spanning trees to approximation algorithms.

  Given a set $X$ of {\em points}, a {\em distance function} on $X$ is a
  map $d: X \times X \to \R_+$ that is symmetric, and satisfies $d(i,i)
  = 0$ for all $i \in X$. The distance is said to be a {\em metric} if
  the {\em triangle inequality} holds, i.e.,
  \[
  d(i,j) \leq d(i,k) + d(k,j) \qquad\qquad \forall i,j,k \in X.
  \]
  We informally think of metrics as a complete graph on $X$ with the
  edge $\{i,j\}$ having a length $d(i,j)$.
  
  We say that the metric $(X,d)$ \emph{$\alpha$-probabilistically embeds
    into a probability distribution $\cal D$ of trees} if
  \begin{itemize}
  \item Each tree $T = (V_T,E_T)$ in the support of the distribution
    $\cal D$ has $V_T = X$.
  \item The distances in each such tree $T$ {\em dominate}
    those in $d$; i.e., $d_T(x,y) \geq d(x,y)$ for all $x,y \in X$.
  \item Given a pair of vertices $x, y \in X$, the expected distance is
    ``not too much larger'' than $d(x,y)$: formally
    \[ \expct{\, d_T(x,y) \,} \leq \alpha\, d(x,y). \]
  \end{itemize}
  The following result due to Fakcharoenphol, Rao and Talwar (2003)
  gives the best-possible results of type. (We will see a proof later in
  class.)
  \begin{theorem}
    \label{thm:frt} Any $n$-point metric $\alpha$-probabilistically
    embeds into a distribution of trees ${\cal D}_{FRT}$ with $\alpha =
    O(\log n)$; furthermore, samples from this distribution can be
    generated in polynomial time.
  \end{theorem}
  Our goal is to use this result to design approximation algorithms for
  problems defined on metrics by first designing exact or near-optimal
  solutions to the same problem on trees, and then using the above
  result to translate such an algoruthm to arbitrary metrics with a loss
  of $\alpha = O(\log n)$ in the performance ratio.
  \begin{enumerate}
  \item Consider the $K$-median problem that we discussed in class:
    given an $n$-point metric $(X,d)$, and a number $K$, find a set $C$
    of size $K$ that minimizes $\Phi(C) = \sum_{x \in X} d(x,C)$.
    Suppose you are given a tree $T$ with nonnegative edge lengths as
    your input to the $K$-median problem, show how to solve the
    $k$-median problem in polynomial time for this class of input
    graphs. (Hint: Use Dynamic Programming).
    
  \item To solve the $K$-median problem on a general metric, we apply
    Theorem~\ref{thm:frt} to the input metric to derive a probability
    distribution over tree metrics. We then sample a tree according to
    this distribution and apply your polynomial time algorithm from the
    previous part to it. We then use the same set of $K$ nodes as the
    solution to the $K$-median problem on the original metric. 
    \begin{enumerate}
    \item Show that an optimal $K$-median solution $C^*$ in the original
      metric gives a solution whose expected cost is at most $\alpha \cdot
      \Phi(C^*)$.  Thus the optimal solution in the sampled tree has
      expected cost no more than this.
      
    \item Show how \emph{any} $K$-median solution of cost $D$ in the
      sampled tree $T$ has cost no more than $D$ in the original metric.
    \end{enumerate}
    Mention how these two claims imply an $\alpha$-approximation for the
    $K$-median problem on any metric.
    
  \item Look at your analysis in the previous part and outline for what
    kinds of problems on metrics does this method apply. E.g., does it
    work for the TSP o a metric? How about the $K$-center problem?  Why
    or why not?
  \end{enumerate}
\end{enumerate}

\end{document}
