\documentclass[11pt]{article}
\input{math_macros}
\usepackage{amssymb,amsmath,amsthm}
\usepackage[margin=1in]{geometry}
\begin{document}
\begin{flushleft}
\fbox{
\begin{minipage}{\textwidth}
{\bf CS 174: Combinatorics and Discrete Probability} \hfill Fall 2012 \bigskip \\
\centering{\Large Homework 8} \medskip \\
\centering{Due: Thursday, November 1, 2012 by {\bf 9:30am}}
\end{minipage}
} \bigskip \\
\end{flushleft}
\noindent{\it \textbf{Instructions}}: {\it You should upload your homework
solutions on bspace. You are strongly encouraged to type out your solutions
using \LaTeX . You may also want to consider using mathematical mode typing in
some office suite if you are not familiar with \LaTeX . If you must handwrite
your homeworks, please write clearly and legibly. We will not grade homeworks
that are unreadable. You are encouraged to work in groups of 2-4, but you {\bf
must} write solutions on your own. Please review the homework policy carefully
on the class homepage.} \medskip \\
\noindent {\bf Note}: You \emph{must} justify all your answers. In particular, you will get
no credit if you simply write the final answer without any explanation. \medskip
\\
\noindent {\bf Problem 1}. {\it (8 points)} We consider a problem motivated by
recommendation systems used by online merchants such as Amazon and Netflix.
Given two sets of integers $A$, $B$ of size $n$, we would like to quickly
determine if $A = B$, or if $|A \cap B|$ is very small, say $|A \cap B| <
0.01n$. (In the intermediate case, where $A \cap B$ is of moderate size, we do
not care what the output is.) In the case of Amazon's recommendation system, $A$
and $B$ could be the list of books purchased by different consumers, and $n$
could be very large.
%
\begin{enumerate}
\item[(a)] Sketch a simple deterministic algorithm that computes $|A \cap B|$
exactly using $O(n \log n)$ comparisons.
\end{enumerate}
%
Our aim is to beat this algorithm, using randomization and exploiting the fact
that we only want to distinguish the case where $A = B$ from the case where they
are very different. Specifically, we seek an algorithm with the following
properties:
%
\begin{itemize}
\item[-] if $A = B$, then the algorithm should output \emph{yes} with
probability at least $3/4$.
\item[-] if $|A \cap B| \leq 0.01n$, then the algorithm should output \emph{no}
with probability at least $3/4$.
\item[-] the algorithm uses $O(\sqrt{n} \log n)$ comparisons.
\end{itemize}
%
(The value $3/4$ here is for convenience only; it can easily be boosted to value
$1 - \delta$ for any desired $\delta$ using only $O(\log(1/\delta))$ repeated
trials.)
Here is the proposed algorithm, where the constant $c$ is to be determined:
\begin{enumerate}
\item[(1)] choose a subset $X$ of $A$ by picking each element of $A$
independently with probability $c/\sqrt{n}$.
\item[(2)] choose a subset $Y$ of $B$ by picking each element of $B$
independently with probability $c/\sqrt{n}$.
\item[(3)] if $|X| > 2 c \sqrt{n}$ or $|Y| > 2 c \sqrt{n}$, output \emph{yes}.
\item[(4)] compute $|X \cap Y|$; if $|X \cap Y| \geq 0.1c^2$, output \emph{yes},
else output \emph{no}.
\end{enumerate}
In the rest of this problem, we will show that the algorithm achieves the
required properties for a sufficiently large constant $c$.
\begin{enumerate}
\item[(b)] Show that the algorithm does indeed use only $O(\sqrt{n}\log n)$
comparisons, assuming that $c$ is constant.
\item[(c)] Suppose $A = B$. Show that the algorithm outputs \emph{yes} with
probability at least $1 - e^{-0.81c^2/2}$.
\item[(d)] Suppose $|A \cap B| \leq 0.01n$. Show that the algorithm outputs
\emph{yes} with probability at most $e^{-0.81c^2/11} + 2
e^{-\Omega(\sqrt{n})}$.
\item[(e)] Indicate briefly how to choose the constant $c$ so as to achieve the
$1/4$ error probabilities specified earlier. (You do not need to actually
perform the calculation.)
\end{enumerate}\medskip
\noindent {\bf Problem 2}. {\it (Exercise 7.2 from MU -- 5 points)} Consider the
two-state Markov chain with the following transition matrix.
\[ {\bf P} = \left[ \begin{array}{cc} p & 1 - p \\ 1 - p & p \end{array}\right].
\]
Find a simple expression for $P^t_{0, 0}$. \medskip \\
\noindent {\bf Problem 3}. {\it (Exercise 7.3 from MU -- 5 points)} Prove that
the communicating relation defines an equivalence relation. \medskip \\
\noindent {\bf Problem 4}. {\it (Exercise 7.6 from MU -- 5 points)} In studying
the 2-SAT algorithm, we considered a $1$-dimensional random walk with a
completely reflecting boundary at $0$. That is, whenever position $0$ is
reached, with probability $1$ the walk moves to position $1$ at the next step.
Consider now a random walk with a partially reflecting boundary at $0$. Whenever
position $0$ is reached, with probability $1/2$ the walk moves to position $1$
and with probability $1/2$ the walk stays at $0$. Everywhere else the random
walk moves either up or down $1$, each with probability $1/2$. Find the expected
number of moves to reach $n$, starting from position $i$ and using a random walk
with a partially reflecting boundary. \medskip \\
\noindent {\bf Problem 5}. {\it (7 points)} A property of states in a Markov
chain is called a \emph{class property} if, whenever states $i$ and $j$
communicate, (\ie each is reachable from the other), either both states have the
property or neither do. Show that being periodic is a class property.
\end{document}