\documentclass[11pt]{article}
\input{math_macros}
\usepackage{amssymb,amsmath,amsthm}
\usepackage[margin=1in]{geometry}
\begin{document}
\begin{flushleft}
\fbox{
\begin{minipage}{\textwidth}
{\bf CS 174: Combinatorics and Discrete Probability} \hfill Fall 2012 \bigskip \\
\centering{\Large Homework 10} \medskip \\
\centering{Due: Thursday, November 29, 2012 by {\bf 9:30am}}
\end{minipage}
} \bigskip \\
\end{flushleft}
\noindent{\it \textbf{Instructions}}: {\it You should upload your homework
solutions on bspace. You are strongly encouraged to type out your solutions
using \LaTeX . You may also want to consider using mathematical mode typing in
some office suite if you are not familiar with \LaTeX . If you must handwrite
your homeworks, please write clearly and legibly. We will not grade homeworks
that are unreadable. You are encouraged to work in groups of 2-4, but you {\bf
must} write solutions on your own. Please review the homework policy carefully
on the class homepage.} \medskip \\
\noindent {\bf Note}: You \emph{must} justify all your answers. In particular, you will get
no credit if you simply write the final answer without any explanation. \medskip
\\
\noindent {\bf Problem 1}. {\it (Exercise 10.6 from MU -- 8 points)} The problem
of counting the number of solutions to a knapsack instance can be defined as
follows: Given items with sizes $a_1, \ldots, a_n > 0$ and an integer $b > 0$,
find the number of vectors $(x_1, x_2, \ldots, x_n) \in \{0, 1\}^n$, such that
$\sum_{i=1}^n a_i x_i \leq b$. The number $b$ can be thought of as the size of a
knapsack, and the $x_i$ denote whether or not each item is put into the
knapsack. Counting solutions corresponds to counting the number of different
sets of items that can be placed in the knapsack without exceeding its capacity.
\begin{enumerate}
\item[(a)] A na\"{i}ve way of counting the number of solutions to this problem
is to repeatedly choose $(x_1, \ldots, x_n) \in \{0, 1\}^n$ uniformly at random.
If $f$ is the fraction of valid solutions, then return $f\cdot 2^n$. Argue why this
is not a good strategy in general; in particular, argue that it will work poorly
when each $a_i$ is $1$ and $b = \sqrt{n}$.
%%
\item[(b)] Consider a Markov chain, $X_0, X_1, \ldots, $ on vectors $(x_1,
\ldots, x_n) \in \{0, 1\}^n$. Suppose that $X_j$ is $(x_1, \ldots, x_n)$. At
each time step, the Markov chain chooses $i \in \{1, \ldots, n\}$ uniformly at
random. If $x_i = 1$, then $X_{j+1}$ is obtained from $X_j$ by setting $x_i$ to
$0$. If $x_i = 0$, then $X_{j+1}$ is obtained from $X_j$ by setting $x_i$ to $1$
if doing so maintains the restriction $\sum_{i=1}^n a_i x_i \leq b$. Otherwise,
$X_{j+1} = X_j$.
Argue that this Markov chain has a uniform stationary distribution whenever
$\sum_{i=1}^n a_i > b$. Be sure to argue that the chain is irreducible and
aperiodic.
%%
\item[(c)] Argue that, if we have an FPAUS for the knapsack problem, then we can
derive an FPRAS for the problem. To set up the problem properly, assume without
loss of generality that $a_1 \leq a_2 \leq \cdots \leq a_n$. Let $b_0 = 0$ and $b_i =
\sum_{j=1}^i a_i$. Let $\Omega(b_i)$ be the set of vectors $(x_1, \ldots, x_n)
\in \{0, 1\}^n$ that satisfy $\sum_{i=1}^n a_i x_i \leq b_i$. Let $k$ be the
smallest integer such that $b_k \geq b$. Consider the equation
%
\[ |\Omega(b)| = \frac{|\Omega(b)|}{|\Omega(b_{k-1})|} \times
\frac{|\Omega(b_{k-1}|}{|\Omega(b_{k-2})|} \times \cdots \times
\frac{|\Omega(b_1)|}{|\Omega(b_0)|} \times |\Omega(b_0)| \]
%%
You will need to argue that $|\Omega(b_{i-1})|/|\Omega(b_{i})|$ is not too
small. Specifically, argue that $|\Omega(b_i)| \leq (n+1) |\Omega(b_{i-1})|$.
\end{enumerate} \medskip
\noindent {\bf Problem 2}. {\it (Exercise 10.7 from MU -- 6 points)} An
alternative definition of an $\epsilon$-uniform sample of $\Omega$ is as
follows: A sampling algorithm generates an $\epsilon$-uniform sample $w$ if,
for all $x \in \Omega$,
%
\[ \frac{|\Pr(w = x) - 1/|\Omega||}{1/|\Omega|} \leq \epsilon. \]
%
Show that an $\epsilon$-uniform sample under this definition yields an
$\epsilon$-uniform sample as given in Definition 10.3. \medskip \\
\noindent {\bf Problem 3}. {\it (Exercise 10.12 from MU -- 6 points)} The
following generalization of the Metropolis algorithm is due to Hastings.
Suppose that we have a Markov chain on a state space $\Omega$ given by the
transition matrix ${\bf Q}$ and that we want to construct a Markov chain on this
state space with a stationary distribution $\pi_x = b(x)/ B$, where for all $x
\in \Omega$, $b(x) > 0$, and $B = \sum_{x \in \Omega} b(x)$ is finite. Define a
new Markov chain as follows: When $X_n = x$, generate a random variable $Y$ with
$\Pr(Y = y) = Q_{x, y}$. Notice that $Y$ can be generated by simulating one step
of the original Markov chain. Set $X_{n+1}$ to $Y$ with probability
%
\[ \min\left(\frac{\pi_y Q_{y,x}}{\pi_x Q_{x,y}}, 1 \right), \]
%
and otherwise set $X_{n+1}$ to $X_n$. Argue that, if this chain is aperiodic and
irreducible, then it is also time reversible and has a stationary distribution
given by the $\pi_x$. \medskip \\
%% You will find the solutino to this in fa01 (hw7)
\noindent {\bf Problem 4}. {\it (10 points)} In this problem we will use a
different fingerprinting technique to solve the pattern matching problem. The
idea is to map any bit string $s$ into a $2 \times 2$ matrix ${\bf M}(s)$ as
follows:
\begin{itemize}
%
\item For the empty string $\epsilon$, ${\bf M}(\epsilon) = \left[
\begin{array}{cc} 1 & 0 \\ 0 & 1 \end{array}\right]$.
%
\item ${\bf M}(0) = \left[ \begin{array}{cc} 1 & 0 \\ 1 & 1 \end{array}\right]$.
%
\item ${\bf M}(1) = \left[ \begin{array}{cc} 1 & 1 \\ 0 & 1 \end{array}\right]$.
%
\item For non-empty strings $x$ and $y$, ${\bf M}(xy) = {\bf M}(x) \times {\bf
M}(y)$.
%
\end{itemize}
Show that this fingerprint function has the following properties.
\begin{enumerate}
\item ${\bf M}(x)$ is well-defined for all $x \in \{0, 1\}^*$.
\item ${\bf M}(x) = {\bf M}(y) \Rightarrow x = y$.
\item For $x \in \{0, 1\}^n$, the entries in ${\bf M}(x)$ are bounded by
Fibonacci number, $F_n$. (Where the Fibonacci numbers are defined by the
recurrence, $F_0 = F_1 = 1$, and $F_{n} = F_{n-1} + F_{n-2}$. You may have to
use a slightly clever induction to prove this.)
\end{enumerate}
By considering the matrices ${\bf M}(x)$ modulo a suitable prime $p$, show how
you would perform efficient randomized pattern matching.
\end{document}