\documentclass[11pt]{article}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\usepackage{epsfig}
%\usepackage{pgf}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{url}
\usepackage{xyling}
\usepackage[all]{xy}
%\usepackage{prof}
%\usepackage{pgf}
%\usepackage{tikz}
%\usepackage{tkz-berge}
\oddsidemargin 0.0in
\evensidemargin 0.0in
\textwidth 6.5in
\headheight 0.0in
\headsep 0.0in
\topmargin 0.0in
\textheight=9.0in
\title{10-601 Machine Learning: Assignment 4}
% by andrew and tom
\date{}
\begin{document}
\maketitle
\vspace{-.4in}
\begin{itemize}
\item The assignment is due at 3:00pm (beginning of class) on \textbf{Monday, February 25, 2008}.
\item Since you only have five days to work on the assignment, it will be worth \textbf{30 points}.
\item Write your name at the top right-hand corner of each page submitted.
\item Each student must hand in a hard-copy writeup. See the course webpage for collaboration policies.
\end{itemize}
\section*{Q1: Probably approximately correct (PAC) learning [15 pts]}
Consider a variant of a decision tree learning algorithm that
considers only examples described by boolean features $\langle X_1,
\ldots X_n \rangle$, learns only boolean-valued functions ($Y \in \left\{+, -\right\}$), and outputs
only `regular, depth-2 decision trees.' A `regular, depth-2 decision
tree' is a depth two decision tree (a tree with four leaves) in which
the left and right child of the root are {\em required to test the
same attribute}. For instance, the following tree is a `regular,
depth-2 decision tree':
% X7
% / \
% X3 X3
% / \ / \
% + - - +
\begin{figure}[htp]
\begin{center}
\Tree[1]{&&&\K{X7}\B{dl}_{+}\B{dr}^{-} \\
&&\K{X3}\B{dl}_{+}\B{d}^{-} && \K{X3}\B{d}_{-}\B{dr}^{+} \\
&\K{Y = +} & \K{Y = -} && \K{Y = -} & \K{Y = +}\\
}
\caption{Example of regular boolean depth-2 decision tree.}
\label{fig:decision_tree}
\end{center}
\end{figure}
\begin{enumerate}
\item Suppose you have noise-free training data for target concept $c$ which you
know can be described by a regular, depth-2 decision tree. How many
training examples must you provide the learning algorithm in order to
assure that with probability .99 the learner will output a tree whose
true accuracy is at least .97? Assume you have data with 20
attributes in total (though of course you believe only two of these
twenty will be needed to describe the correct tree).
\item $\left[$\textbf{Entirely optional, extra credit}] Now suppose you modify the algorithm to
allow for instances to contain real-valued attributes instead of
boolean attributes, and you allow each decision tree node to perform a
boolean threshold test of the form $X_i > a$ where $a$ is allowed to
take on any real value. The tree is further constrained such that the second level nodes,
which share the same attribute, must also use the same threshold. In this case, re-answer the above question:
How many training examples must you provide the learning algorithm in
order to assure that with probability .99 the learner will output a
tree whose true accuracy is at least .97? In this case, assume each
example has only two attributes, so the tree will end up using both.
\end{enumerate}
%%%%%%%%%%%%%%%%
\section*{Q2: Bayes nets [15 pts]}
For the following questions, assume $M_{indp}$ and $M_{dep}$ are as defined in \textbf{Question 3} of \textbf{Homework 2}, but with \textbf{n = 4} instead of $10$:
\begin{enumerate}
\item Draw a Bayes net representing the conditionally \textit{independent} na\"{\i}ve Bayes model $M_{indp}$. How many total rows are in all of the fully specified conditional probability tables (cpt's) represented by this graph?
\item Draw a Bayes net representing the conditionally \textit{dependent} model $M_{dep}$. How many total rows are in all of the fully specified cpt's represented by this graph?
\item Compare the number of cpt rows you found in each graph you drew above to the number of parameters you previously estimated for $M_{indp}$ and $M_{dep}$ in \textbf{Questions 3.1 and 3.5} of \textbf{Homework 2}. Did the number of cpt rows in your graph grow at the same rate as the number of parameters in your models? Why might this be? What does this imply about a Bayesian network's ability to succinctly represent a model's conditional probability distributions?
\end{enumerate}
\end{document}