\documentclass[11pt]{article}
\input{math_macros}
\usepackage{amssymb,amsmath,amsthm}
\usepackage[margin=1in]{geometry}
\begin{document}
\begin{flushleft}
\fbox{
\begin{minipage}{\textwidth}
{\bf CS 174: Combinatorics and Discrete Probability} \hfill Fall 2012 \bigskip \\
\centering{\Large Homework 11} \medskip \\
\centering{This homework is not for submissiom }
\end{minipage}
} \bigskip \\
\end{flushleft}
\noindent {\it In this homework, we will see a few of the numerous applications
of the weighted majority algorithm. It is remarkable how powerful a simple
algorithm, such as the multiplicative update rule, can be. The problem
statements here may appear long, but that is mainly because this homework is
combining some aspects of lecture notes and practice problems. It is suggested
that you attempt these problems in preparation for the final exam. The solutions
to these problems will be posted in the middle of next week.} \medskip \\
\noindent {\bf Problem 1}. ({\it von Neumann's Minmax Theorem}). The first
problem is one in game theory. We consider a game between two players, where
player $1$ can choose from among $n$ options and player $2$ can choose from
among $m$ options. The payoff to player $1$ is given by a payoff matrix, $A_{n
\times m}$, where $A_{ij}$ is the payoff received by player $1$ if she plays $i$
and player $2$ plays $j$. We will consider \emph{zero-sum} games, thus, the
payoff matrix from the second player will simply be $-A$. We will assume that
each entry of the matrix $A$ is bounded in the range $[-M, M]$.
The players may play according to \emph{mixed strategies}, \ie instead of
choosing a single option, they choose a distribution over the available options.
Suppose, player $1$ chooses a distribution, $\vecx \in \Delta_n$, and player $2$
chooses a distribution, $\vecy \in \Delta_n$, then the expected payoff received
by player $1$ is $\vecx^T A \vecy$.
In order to consider the value of the game, it is useful to think of one player
going first and announcing her strategy, and the second player using \emph{best
response}. Suppose player $1$ plays first, then she guarantee that her expected
payoff is $v_1 = \max_{\vecx \in \Delta_n} \min_{\vecy \in \Delta_m} \vecx^T A
\vecy$. This is the \emph{max-min} value, denoted by $v_1$. Here, the second
player tries to minimize the expected payoff of player $1$ (since it is a
zero-sum game). One way to think of this as follows, suppose player $1$ is going
to use $\vecx$ as her strategy and she announces it. In that case, player $2$ will
choose $\vecy$ that minimizes, $\vecx^T A \vecy$. Player $1$ should choose the
vector $\vecx$ that maximizes this \emph{minimum value}.
Similarly, if player $2$ goes first, and announces $\vecy$. Then the maximum
expected value that player $1$ can obtain is $\min_{\vecy \in \Delta_m}
\max_{\vecx \in \Delta_n} \vecx^T A \vecy$. This is the \emph{min-max} value,
denoted by $\bar{v}_1$. The min-max theorem, states that $v_1 = \bar{v}_1$, or
that the max-min and min-max values are the same. Thus, who plays first is
irrelevant.
We will prove the min-max theorem using the repeated $n$-decision problem.
Suppose, the two players play the same game for time steps $t = 1, \ldots, T$.
Player $1$ goes first and plays using the weighted majority algorithm. Player
$2$ plays best response, \ie if player $1$ plays $\vecx^t \in \Delta_n$ at time
step $t$, player $2$ chooses $\vecy^t$, that minimizes $(\vecx^t)^T A \vecy^t$.
%
\begin{enumerate}
%
\item First show that the \emph{max-min} value is smaller than the
\emph{min-max} value, \ie $v_1 \leq \bar{v}_1$. (This should obviously be the
case, since going second is an advantage.)
%
\item Show that the average payoff of player $1$, $\frac{1}{T}
\sum_{t=1}^T (\vecx^t)^T A \vecy^t$ is at most ${v_1}$.
%
\item Show that the average payoff that player $1$ could have obtained in
hindsight is at least $\bar{v}_1$.
%
\item Use the weighted majority theorem in the limit as $T \rightarrow \infty$
to conclude the fact that $v_1 = \bar{v}_1$.
%
\end{enumerate} \medskip
\noindent {\bf Problem 2}. ({\it Linear Programming}) A typical linear program has
the following form:
%
\begin{align*}
\min &~~\vecc^T \cdot \vecx \\
\mbox{subject to:} & \\
A\vecx &\leq \vecb \\
\vecx &\geq 0
\end{align*}
%
where $\vecx \in \reals^m$, $x_i \geq 0$ for $i = 1, \ldots, n$. $A$ is an $n
\times m$ matrix, $\vecb \in \reals^n$. Thus, there are $n$ constraints; and
$\vecc \in \reals^m$. Let $\veca_i$ denote the $i\th$ row of $A$ and $b_i$
denote the $i\th$ entry of the vector $\vecb$. Thus, $\veca_i^T \vecx \leq b_i$
is simply the $i\th$ constraint of a linear program. One way to solve a linear
program is to guess the value of the objective function, say $z^*$ (by binary
search) and try to find a vector in the set ${\mathcal P} = \{ \vecx ~|~ \vecx
\geq 0, \vecc^T \vecx = z^* \}$. The superscript, $^T$, here means the transpose.
%
\begin{enumerate}
%
\item Let $\veca \in \reals^m$ and $b \in \reals$. Show that if there is only
one constraint, $\veca^T\vecx \leq b$, then it is easy to determine if there
exists $\vecx \in {\mathcal P}$ for which the constraint is satisfied.
%
\item Now, let $\rho = \max\{ \max_{i, \vecx \in {\mathcal P}} |\veca_i^T \vecx
- b_i|, 1\}$. We will set up a repeated $n$-decisions problem. Each constraint
is thought of as one of $n$ choices. Suppose $\vecw^t$ is the distribution
over the $n$ constraints obtained by playing weighted majority at time step
$t$. Let $\veca^t = (\vecw^t)^T A$ and let $b^t = \vecw^T \vecb$. We consider
the constraint $(\veca^t)^T \vecx \leq b^t$. Show that if there is no $\vecx
\in {\mathcal P}$ that satisfies $(\veca^t)^T \vecx \leq b_i$, then the
original program is \emph{infeasible}, \ie there is no $\vecx \in {\mathcal P}$,
such that $A \vecx \leq \vecb$.
%
\item Otherwise, let $\vecx^t$ be any vector in the set ${\mathcal P}$ that
satisfies the constraint, $(\veca^t)^T \vecx^t \leq b_i$. The payoff for action
(constraint) $i$ is $(\veca_i)^T \cdot \vecx^t - b_i$. Show, that the payoff of
the algorithm is positive at each round, unless the program was declared
\emph{infeasible}.
%
\item Let $\vecx^* = \frac{1}{T}\sum_{t=1}^T \vecx^t$ and suppose $T = 16 \rho^2
\ln(n)/ \epsilon^2$. Then show that, $\veca_i^t \vecx^* \leq b_i + \epsilon$ for
each $i$.
%
\end{enumerate} \medskip
\noindent {\bf Remark}: What we have shown is that we have \emph{almost} solved
the linear program. Note that each constraint may be violated slightly (by
$\epsilon$), and a tighter guarantee may be obtained using a larger $T$. This is
not the most optimal method to solve linear programs. More involved algorithms,
such as the ellipsoid algorithm, do give \emph{truly} polynomial time algorithms
for linear programming. None the less, the above approach can be used to obtain
interesting polynomial time approximation algorithms, when it is sufficient to
find an \emph{approximately} feasible solution to a linear program. \medskip \\
\noindent {\bf Problem 3}. ({\it Sleeping Experts}). A variation of the standard
$n$-decision problem, is the \emph{sleeping experts problem}. Here, each of the
$n$ decisions is just some expert advice. However, at any given time step $t$,
the decision-maker (your algorithm) may only have access to a subset, $S^t
\subseteq [n]$ of experts. The remaining experts may be \emph{sleeping}.
The notion of \emph{regret} in hindsight is more involved in this case. Note
that the payoff of the best expert makes little sense, because some experts may
not be available (awake) on every round. We consider the following to be the
best \emph{strategy} in hind-sight: we consider a ranking over the experts. A
ranking, $\sigma$ is simply a permutation of $n$ elements. At time-step $t$, the
ranking strategy according to $\sigma$, is implemented as follows: let
$\sigma(S^t)$ denote the \emph{highest-ranked} expert that is in $S^t$ according
to ranking $\sigma$. For example, if $n = 5$, $\sigma = (3, 2, 4, 1, 5)$ and
$S^t = \{1, 4\}$, then the strategy would be to follow the advice of expert $4$,
since the $4\th$ expert is awake and ranked higher than other awake experts (in
this case just expert $1$). Thus the regret is measured with respect to the best
\emph{ranking} in hindsight (when the payoffs for all experts are known). We
assume that the payoffs for each expert lie in the range $[-M, M]$.
Then,
%
\[ \mathrm{regret} = \max_{\sigma \in \mathrm{perm}(n)} \frac{1}{T} \sum_{t=1}^T
p^t_{\sigma(S^t)} - \frac{1}{T} \sum_{t=1}^T p^t_{d^t}, \]
%
where $d^t$ is the expert (from the set $S^t$), chosen by the decision-maker,
and $\sigma(S^t)$ is the expert that would have been chosen by the ranking
$\sigma$.
Show that it is possible to obtain an algorithm that after $T$ time-steps has
regret $O(M \sqrt{\frac{n \log(n)}{T}})$. Show that in fact you can assume that
each of the $n!$ possible rankings is a new \emph{meta-expert}. Now implement
weighted-majority algorithm as if you were playing a repeated $n!$-decision game
with $n!$ experts. You may assume that at the end of the round, the
\emph{entire} payoff vector is revealed, including the payoffs that experts who
were sleeping (and were not available to the decision-maker) would have
received. What can you say about the running time of your algorithm? \medskip \\
\end{document}