\documentstyle[12pt]{book}
%\setlength{\oddsidemargin}{-.1 in}
%\setlength{\evensidemargin}{-.3 in}
\setlength{\evensidemargin}{.0 in}
\addtolength{\topmargin}{-1in}
\setlength{\textwidth}{6.5in}
\setlength{\textheight}{8.5in}

%
% The following macro is used to generate the header.
%
%

\newcommand{\handout}[5]{
   \pagestyle{headings}
   \thispagestyle{plain}
   \newpage
%   \setcounter{chapter}{#1}
   \setcounter{page}{#2}
%  \set\thechapter{#3}
   \noindent
   \begin{center}
   \framebox{
      \vbox{
    \hbox to 6.28in { {\bf Reinforcement Learning
                        \hfill Fall Semester, 1999/2000} }
       \vspace{4mm}
       \hbox to 6.28in { {\Large \hfill #1: #3  \hfill} }
       \vspace{2mm}
       \hbox to 6.28in { {\it Lecturer: #4 \hfill} }
      }
   }
   \end{center}
   \markboth{Handout #1: #3}{Handout #1: #3}
   \vspace*{4mm}
}

%
% Use these macros for organizing sections of your notes.
% Each command takes two arguments: (1) the title of the section and and
% (2) a keyword for that section to appear in the index.  (See examples.)
% Please don't use \section, \subsection, and \subsubsection directly!
%

\newcommand{\topic}[2]{\section{#1} \index{#2} \markright{#1}}
\newcommand{\subtopic}[2]{\subsection{#1} \index{#2}}
\newcommand{\subsubtopic}[2]{\subsubsection{#1} \index{#2}}
 
%
% Convention for citations is first author's last name followed by other
% authors' last initials, followed by the year.  For example, to cite the
% seventh entry in the course bibliography, you would type: \cite{BurnsL80}
% (To avoid bibliography problems, for now we redefine the \cite command.)
%
     
\renewcommand{\cite}[1]{[#1]}

%
% These are just to make things a little easier:
%
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\newcommand{\be}{\begin{enumerate}}
\newcommand{\ee}{\end{enumerate}}
\newcommand{\blank}{\vspace{1ex}}   % generates a blank line in the output

%
% Use these for theorems, lemmas, proofs, etc.
%
\newtheorem{theorem}{Theorem}[chapter]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{corollary}[theorem]{Corollary}
\newenvironment{proof}{{\em Proof:}}{\hfill\rule{2mm}{2mm}}

%
% Use the following for definitions.
% \bigdef is for definitions to be set off by themselves; \smalldef is for
% definitions given in the middle of a paragraph.
%
\newenvironment{dfn}{{\vspace*{1ex} \noindent \bf Definition }}{\vspace*{1ex}}
\newcommand{\bigdef}[2]{\index{#1}\begin{dfn} {\rm #2} \end{dfn}}
\newcommand{\smalldef}[1]{\index{#1} {\em #1}}


\begin{document}
%\handout{**HANDOUT-NUMBER**}{**1ST-PAGE**}{**DATE**}{**LECTURER**}
\handout{Homework 2}{1}{Nov. 25}{Yishay Mansour}

\section*{Homework number 2.}

\noindent{\bf Question 1}

Show an example of an MDP with $n$ states and two actions, where 
the Policy Iteration algorithm requires $n$ iterations to converge to the
optimal policy. \\
\noindent ({\bf Open problem:} Show an example that requires more than 
$n$ iterations.)\\

\noindent{\bf Question 2}

Let $\pi_1$ and $\pi_2$ be two deterministic
stationary policies for a given MDP $M$.
Construct a new policy $\pi$ in the following way,
\[
\pi(s)= \left\{\begin{array}{ll}
				\pi_1(s) & \mbox{ if } v^{\pi_1}_\lambda(s)\geq v^{\pi_2}_\lambda(s)\\
				\pi_2(s) & \mbox{ if } v^{\pi_1}_\lambda(s) < v^{\pi_2}_\lambda(s)
				\end{array} \right.
\]
Show that $v^{\pi}_\lambda \geq \max\{v^{\pi_1}_\lambda,
v^{\pi_2}_\lambda\}$.\\

\noindent{\bf Question 3}

We can use the approximate value 
function, computed in the Value Iteration algorithm, to define a policy. 

Prove that if the number of states and actions are finite,
then after a finite number of iterations this policy equals the optimal
policy and never changes.

Give an example of an MDP where the number of iterations until the
policy defined by value iteration converges to the optimal policy is
at least $1/(1-\lambda)$.
(Hint: build an MDP such that from the start state you can branch two
two different states, once you reach a state you remain in it.)\\




\noindent{\bf The homework is due in two weeks}

\end{document}


