\documentclass[10pt]{article}
\newtheorem{define}{Definition}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}
\usepackage{amssymb,amsmath}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\begin{document}
\input{preamble.tex}

\lecture{9}{May 20, 2009}{Ronitt Rubinfeld}{Mateus de Oliveira Oliveira and Daniel Shahaf}

%%%% body goes in here %%%%

%Blah Blah Blah

\def\zo{\{\,0,1\,\}}
\def\pmo{\{\,\pm1\,\}}
\def\GF{{\rm GF}}
% for source readability
\let\lxor\oplus
\let\biglxor\bigoplus
\def\fail{\textsf{fail}}
\def\pass{\textsf{pass}}
\def\symdiff{\mathbin{\triangle}}
\def\hatf{{\hat f}}

\section{The Boolean Function}

\[ f : \zo^n \to \zo \]
\[ f : \pmo^n \to \pmo \]

Can be viewed as: a truth table, a circuit, a 2-coloring of the $n$-dimensional
discrete cube, an indicator of a set ($f(x) = 1 \iff x \in S$).

% voting
% Fourier/harmonic analysis
% $\GF(2^n)$, linearity

Some concepts that are studied: ``simple'' functions --- $k$-juntas and
dictatorships (only $k$~inputs, respectively one input, affect the function's
value); fairness (each input bit has the `same' influence over the output) and
noise-sensitivity (behavior under flipping of some input bits); symmetry
(behavior under permutations of inputs).

\subsection{Linear (homomorphic) functions}

Our goal today: linearity testing (homomorphism testing): to decide whether
a function~$f$, given as a blackbox (oracle), is \emph{linear}
(\emph{homomorphic}).

\begin{definition}
    A function $f : \zo^n \to \zo$ is  \emph{linear} (\emph{homomorphic}) 
    if \[ f(x) + f(y) = f(x+y) \] for every $x,y \in \zo^n$.
\end{definition}
(The addition $x+y$ is addition modulo~$2$ in the vector space $(\Z_2)^n$;
that is, $x+y = (x_1, \dots, x_n) + (y_1, \dots, y_n) = (x_1 \lxor y_1,
\dots, x_n \lxor y_n)$.)

\paragraph{Examples}
\begin{itemize}
    \item The constant function $f(x) \equiv 0$ is homomorphic ($f(x) + f(y) = 0+0 = 0 = f(x+y)$).
    \item The constant function $f(x) \equiv 1$ is not ($1+1 \ne 1$).
    \item The projection function $f(x) = x_i$ (for some fixed~$i$) is a homomorphism.
    \item As is the function $f(x) = \biglxor_{i=1}^n x_i$.
\end{itemize}
\begin{claim}
    A function $f : \zo^n \to \zo$ is homomorphic iff it is one of the functions
    $f_S(x) = \biglxor_{i\in S} x_i$ (for $S \subset [n]$).
\end{claim}
\begin{proof-sketch}
    Every homomorphic function is uniquely determined by the values on the
    vectors $e_i = (0, \dots, 0, 1, 0, \dots, 0)$ (with $1$ at the $i$th
    coordinate, $1 \le i \le n$).  Since there are $2^n$ possible settings
    for the values $f(e_i)$ ($1 \le i \le n$), there are $2^n$ linear functions.
    It is easy to see that all functions $f_S$ are linear, and there are 
    $\abs{\set{S : S \subset [n]}} = 2^n$ of them.
    \qed
\end{proof-sketch}
\paragraph{Note}  If $S=\emptyset$ then $f_S(x) \equiv 1$, i.e., $f_\emptyset$
is a constant function.

\subsection{Testing for linearity}

Given a function $f$ as a blackbox, in order to check whether or not it is
linear, we have to query it on \emph{every} possible input.  For example, the
function given by $f(x) = 1$ if $x = e_{17}$ and $f(x) = 0$ otherwise is not
homomorphic, but agrees with the zero homomorphism everywhere except at $x
= e_{17}$.

\begin{definition}
    A function $f$ is \emph{$\epsilon$-close to linear} if there exists
    a linear function~$g$ that agrees with $f$ on all but an $\epsilon$-fraction
    of the domain; that is,
    \[ \Pr_x[f(x) = g(x)] = \frac{\abs{\set{x : f(x) = g(x)}}}{2^n} \ge 1-\epsilon. \]
    Otherwise, $f$ is \emph{$\epsilon$-far from linear}.
\end{definition}

% $\epsilon$ is a small constant

\subsubsection{Proposed tester}
\label{proposedTester}
\begin{itemize}
    \item Repeat $r = \bigO(\frac{1}{\epsilon} \log \frac{1}{\delta^*})$ times:
    \begin{itemize}
        \item Pick $x, y \in_R \zo^n$ independently and uniformly.
        \item If $f(x) + f(y) \ne f(x+y)$:
        \begin{itemize}
            \item Output \fail{} and halt.
        \end{itemize}
    \end{itemize}
    \item Output \pass{}.
\end{itemize}

\subsubsection{Analysis}
\begin{claim}\label{l9:linear:iff:Prob:eq:1}
    $f$ is linear if and only if $\Pr[\pass] = 1$.
\end{claim}
\begin{claim}
    If $\Pr_{x,y}[f(x) + f(y) \ne f(x+y)] \ge \epsilon$
    then $\Pr[\fail] \ge 1-\delta^*$.
\end{claim}
\begin{claim}\label{l9:claim:epsclose:then:lowfailprob}
    If $f$ is $\epsilon$-close to linear, then the test fails with probability
    at most $3\epsilon$. 
\end{claim}
\begin{proof-idea}
    Let $A_x$ denote the event $f(x) \ne g(x)$.  Then
    $\Pr_x[A_x] \le \epsilon$ and thus
    $\Pr[\fail] \le \Pr_{x,y}[A_x \lor A_y \lor A_{x+y}] \le 3\epsilon$ by union bound.
\end{proof-idea}

\paragraph{Plan}
By claim~\ref{l9:linear:iff:Prob:eq:1}, the test \fail{}s with probabiliy zero
iff the distance of $f$ from linear is zero.  By
claim~\ref{l9:claim:epsclose:then:lowfailprob}, a similar relation also holds
--- in one direction --- if we say `small' instead of `zero'.  The remainder of
this lecture shows the converse of
claim~\ref{l9:claim:epsclose:then:lowfailprob}.

\subsection{Notational switch}

We now consider boolean functions as $f:\pmo^n \to \pmo$ rather than
$f:\zo^n\to\zo$:  we map $0 \mapsto +1$ and $1 \mapsto -1$, and write the
operation as multiplication ($x\cdot y = (x_1 y_1, \dots, x_n y_n)$ for $x,
y \in \pmo^n$) rather than addition ($x + y = (x_1 \lxor y_1, \dots, x_n \lxor
y_n)$ for $x,y \in \zo^n$).  (In other words, we switch our representation from
the group $\Z_2$ of integers modulo~$2$ to the group~$\mu_2$ of square roots of
unity.)

\paragraph{Example}  The homomorphic functions are now written as $f_S(x)
= \prod_{i\in S} x_i$.  The rejection condition of the proposed linearity tester
is $f(x) \cdot f(y) \ne f(x\cdot y)$, where $x\cdot y$ is as defined in the
previous paragraph (and \emph{not} an inner product).

\subsection{Rejection probability}

It will be more convenient to represent the rejection condition of the proposed
tester in terms of equality rather than inequality.  We have the equivalence:
\[ f(x)f(y) \ne f(xy) \iff \bigl(f(x) f(y)\bigr) f(xy) = -1 \]
which suggests to consider the following indicator:
\[
    \frac{1 - f(x)f(y)f(xy)}{2} =
    \begin{cases}
        0, & \mbox{if the test accepts;} \\
        1, & \mbox{if the test rejects.}
    \end{cases}
\]
We also define
\[ \delta = \E_{x,y} \left[\frac{1 - f(x)f(y)f(xy)}{2}\right] \]
as the \emph{rejection probability} of one loop-iteration of the proposed
tester.  This gives the \emph{acceptance probability} of one loop-iteration of
that tester as:
\[ 1-\delta = \E_{x,y} \left[\frac{1 + f(x)f(y)f(xy)}{2}\right]. \]

\section{Basics of Fourier analysis of parity functions}

$\mathcal{G} = \set{g : \pmo^n \to \R}$ is a $2^n$-dimensional vector space (over
the field~$\R$, i.e., linear combinations are to be taken with real
coefficients).  This space is equipped with the inner product
\[ \ip{f,g} = \frac{1}{2^n} \sum_{x \in \pmo^n} f(x) g(x). \]

\subsection{Looking for a basis}

We look for a convenient basis of~$\mathcal{G}$.
\begin{itemize}
    \item The first idea is the \emph{indicator functions}: the functions~$e_a$
    (for $a \in \pmo^n$) given by $e_a(x) = 1$ if $a=x$ and $0$ otherwise.

    It is easy to see that $\set{e_a : a \in \pmo^n}$ is a basis, and that $g
    = \sum_a g(a) \cdot e_a$ (i.e., $g(x) = \sum_a g(a) \cdot e_a(x)$) for any
    function~$g$.

    \item However, the basis of \emph{character functions} $\chi_S(x) = \prod_{i
    \in S} x_i$ will be more convenient.  (These are the functions we used to
    call~$f_S$.)
\end{itemize}

\clearpage
\begin{lemma}
    $\set{\chi_S : S \subset [n]}$ is an orthonormal basis.
\end{lemma}
\begin{proof}
    Let $S \ne T$ be two distinct subsets of~$[n]$, and let $j \in S \symdiff
    T = \set{x : (x \in S) \ne (x \in T)}$.  Denote ``$x$ with the $j$th bit
    flipped'' by `$x^{\lxor j}$'.  Then
    \[ \ip{\chi_S, \chi_S} = \frac{1}{2^n} \sum_x \underbrace{\chi_S(x)^2}_{=1} = 1 \]
    and
    \begin{displaymath}
    \begin{split}
        \ip{\chi_S, \chi_T}
            &= \frac{1}{2^n} \sum_x \chi_S(x) \chi_T(x)
             = \frac{1}{2^n} \sum_x \biggl(\prod_{i \in S} x_j \cdot \prod_{j \in T} x_j\biggr)
          \\&= \frac{1}{2^n} \sum_x \prod_{i \in S \symdiff T} x_i
               \qquad\mbox{(because $\set{x_i:i\in S\cap T}$ cancel out)}
          \\&= \frac{1}{2^n} \sum_{\set{x,x^{\lxor j}}}
                             \biggl( \prod_{i \in S \symdiff T} x_i
                                    +\prod_{i \in S \symdiff T} (x^{\lxor j})_i \biggr)
          \\&= \frac{1}{2^n} \sum_{\set{x,x^{\lxor j}}}
                             \biggl(           x_j  \cdot \prod_{j\ne i \in S \symdiff T} x_i
                                    +\overline{x_j} \cdot \prod_{j\ne i \in S \symdiff T} (x^{\lxor j})_i \biggr)
          \\&= \frac{1}{2^n} \sum_{\set{x,x^{\lxor j}}}
                             \biggl(           x_j  \cdot \prod_{j\ne i \in S \symdiff T} x_i
                                    +\overline{x_j} \cdot \prod_{j\ne i \in S \symdiff T} x_i \biggr)
          \\&= \frac{1}{2^n} \sum_{\set{x,x^{\lxor j}}}
                             (x_j + \overline{x_j})
                             \biggl( \prod_{i \in S \symdiff T, i\ne j} x_i \biggr)
             = \frac{1}{2^n} \sum 0 = 0.
    \end{split}
    \end{displaymath}
\end{proof}

\begin{remark}
    The technique of separating out $x_j$ and its complement is an example of a \emph{pairing argument}.
    It considers together all pairs of words that differ only on a specific
    coordinate; for instance,
    $(+1, +1, -1, +1)$ with $(+1, +1, +1, +1)$,
    $(+1, +1, -1, -1)$ with $(+1, +1, +1, -1)$,
    $(-1, -1, -1, +1)$ with $(-1, -1, +1, +1)$,
    etc.
\end{remark}

\begin{corollary}
    We can write every function $f$  as $f = \sum_{S \subset [n]} \hatf(S)
    \chi_S$, where $\hatf(S) = \ip{f,\chi_S}$.
\end{corollary}


For example, if $f:x\mapsto x_i$ is the projection function,  we have that $f=\chi_i$, thus the Fourier coefficients of $f$ 
are $\hat{f}(S)=\langle \chi_{i}, \chi_S \rangle$ which is equal to $1$ if $S=\{i\}$ and $0$ otherwise. 
Similarly, if $f:x\mapsto 1$ is the constant function, then $f=\chi_{\emptyset}$ and $\hat{f}(S)$ will be equal to 
$1$ if $S=\emptyset$ and $0$ otherwise. 



\subsection{Some useful facts about the Fourier Transform}

\begin{lemma} 
$\chi_S\cdot \chi_T = \chi_{S\Delta T}$
\end{lemma}

\begin{lemma}{Fourier Coefficient of any parity function}
\label{lemma:Coefficient}
$$f(x)=\chi_S(x) \Leftrightarrow \forall Z\subseteq [n],\hspace{0.2cm} \hat{f}(Z) = \left\{\begin{array}{ll} 1 & \mbox{when $S=Z$} \\ 0 & \mbox{Otherwise} \end{array}\right. $$
\end{lemma}

\begin{lemma}{Agreement with linear functions vs max Fourier coefficient}
\label{lemma:Agreement}
$$ \hat{f}(S) = 1 - 2 \Pr[f(x) \neq \chi_S(x)] \Leftrightarrow \DIST(f,\chi_S) = \frac{1-\hat{f}(S)}{2} $$ 

or equivalently

$$ \hat{f}(S) = -1 + 2 \Pr[f(x) \neq \chi_S(x)] \Leftrightarrow \DIST(f,\chi_S) = \frac{1-\hat{f}(S)}{2} $$ 
\end{lemma}
\begin{proof}

Its enough to prove that 

$$\DIST(f, \chi_S) = \Pr_{x\in \{\pm 1\}^n}[f(x) -\chi_S(x)].$$

The proof of this fact proceeds as follows: 

\begin{equation}
\begin{array}{rcl}
\hat{f}(S) & = & \frac{1}{2^n} \sum_{x}f(x)\chi_S(x) \\
	   &   & \\
	   & = & \frac{1}{2^n} \left[ \sum_{x,f(x)=\chi_S(x)} 1  +  \sum_{x,f(x)\neq\chi_S(x)} -1\right] \\
	   &   & \\
	   & = & (1- \DIST(f,\chi_S))\cdot 1  + \DIST(f,\chi_S)\cdot(-1) \\  
	   &   & \\
	   & = & 1 - 2 \DIST(f,\chi_S) \\
\end{array}
\end{equation}
\end{proof}

\begin{lemma}
If $S\neq T$ then $\DIST(\chi_S, \chi_T) = \frac{1}{2}$. 
\end{lemma}
\begin{proof}
Let  $f=\chi_T$. Then 
\begin{equation}
\begin{array}{rcl}
\hat{f}(S) & = & 0 \mbox{\hspace{0.4cm}( by lemma \ref{lemma:Coefficient})} \\
	   &   & \\
	   & = & 1-2\DIST(f,\chi_S)  \mbox{ \hspace{0.4cm}(by lemma \ref{lemma:Agreement})} \\ 
	   &   & \\
	   & \Rightarrow & \DIST(f,\chi_S)=\frac{1}{2}  \\  
	   &   & \\
	   & \Rightarrow & \DIST(\chi_T,\chi_S)= \frac{1}{2} \\
\end{array}
\end{equation}

\end{proof}

A very important theorem in Fourier Analysis is the following: 

\begin{theorem}[Plancherel's theorem]
Let $f,g:\{\pm 1\}\rightarrow \mathbb{R}$. Then 
$$\langle f, g \rangle = \E_{x\in \{\pm 1\}^n}[f(x)g(x)]= \sum_{S\subseteq [n]} \hat{f}(S)\hat{g}(S).$$
\end{theorem}
\begin{proof}
$$
\begin{array}{rcl}
\langle f, g\rangle  & = &  \langle \sum_{S}\hat{f}(S)\chi_S , \sum_{T} \hat{g}(T)\chi_{T}  \rangle\\
	   	     &   & \\
	   	     & = & \sum_{S}\sum_{T} \hat{f}(S)\hat{g}(T) \langle \chi_S, \chi_T \rangle \mbox{\hspace{0.4cm} by bilinearity of $\langle,\rangle $}\\
	   	     &   & \\
		     & = & \sum_{S} \hat{f}(S)\hat{g}(S)  \mbox{\hspace{0.3cm} (because $\langle \chi_S,\chi_T\rangle=1$ if $S=T$ and $0$ if $S\neq T$)}
\end{array}
$$
\end{proof}

We call special attention to the following corollary of Plancherel's theorem: 

\begin{corollary}[Parseval's Theorem]
If $f:\{\pm 1\}^n \rightarrow \mathbb{R}$ then $\langle f,f\rangle= \E[f(x)^2] = \sum_S \hat{f}(S)^2$. 
\end{corollary}

Which for boolean functions $f:\{0,1\}^n\rightarrow \{\pm 1\}$ reduces to the next corollary, by observing that 
in this case $f(x)^2=1$ for every $x$.

\begin{corollary}[Boolean Parseval's Theorem]
If $f:\{\pm 1\}^n\rightarrow\{\pm 1\}$ then $\sum_S \hat{f}(S)^2=1$.
\end{corollary}



\begin{lemma}
$\E[f]=\E[f(x)\cdot 1] = \hat{f}(\emptyset)\chi_{\emptyset}(\emptyset)= \hat{f}(\emptyset)$. 
\end{lemma}

\begin{lemma}
$\E[\chi_S(x)] = \left\{
\begin{array}{cl} 
1 & \mbox{if $S=\emptyset$} \\ 
0 & \mbox{Otherwise} \\
\end{array}
\right.$
\end{lemma}


%\begin{figure}[htp]
%\centering
%\includegraphics[scale=0.8]{inspectionQuestion2}
%\caption{Question 2}\label{figure:question2}
%\end{figure}

\section{Linearity Testing}

The goal of this section is to prove the converse of claim~\ref{l9:claim:epsclose:then:lowfailprob}, i.e, to show that if $f$ is $\epsilon$-far from linear, then the probability that 
the algorithm described in subsection \ref{proposedTester} finds two $x,y$ for which $f(x + y) \neq f(x) + f(y) $
is high. More precisely,

$$\Pr[f(x)f(y)f(x\cdot y)= -1] \geq \epsilon $$

\begin{lemma}[Main Lemma]
$$ 1- \delta = \Pr[f(x)f(y)f(x y) = 1] = \frac{1}{2} + \frac{1}{2}\sum_{S\in [n]} \hat{f}(s)^3 $$
\end{lemma}
\begin{proof}
$$1-\delta = \E_{xy}\left[ \frac{1+ f(x)f(y)f(xy)}{2}\right] = \frac{1}{2} + \frac{1}{2} \E_{xy}[f(x)f(y)f(xy)]$$

and 

$$
\begin{array}{rcl}
 \E_{xy}[f(x)f(y)f(xy)] & = &   \E_{xy}[(\sum_{S}\hat{f}(S)\chi_S(x) ) (\sum_{T}\hat{f}(T)\chi_T(y) )(\sum_{U}\hat{f}(U)\chi_T(xy) ) ] \\ 
	   	     &   & \\
	   	     & = & \E_{xy}[\sum_{STU}\hat{f}(S)\hat{f}(T)\hat{f}(U) \chi_{S}(x)\chi_{T}(y)\chi_{U}(xy)] \\
	   	     &   & \\
		     & = & \sum_{STU}\hat{f}(S)\hat{f}(T)\hat{f}(U) \E[\chi_{S}(x)\chi_{T}(y)\chi_{U}(xy)] \\
		     &   & \\
		     & = & \sum_{S=T=U} \hat{f}(S)^3.
\end{array}
$$

The last equality follows from the fact that 

$\E_{xy}[\chi_S(x)\chi_T(y)\chi_U(xy)] = \E[\chi_S(x)\chi_U(x)] \cdot \E[\chi_T(y)\chi_U(y)] = 
\left\{\begin{array}{cl} 
1 & \mbox{if $S=U$ and $T=U$} \\
0 & \mbox{otherwise}
\end{array} \right. $


\end{proof}


Now we are ready to prove the goal stated in the beginning of this section. 

\begin{proof}
Assume $\Pr[f(x)f(y)f(xy) = -1] < \epsilon$. Then we show that $f$ is $\epsilon$-close to linear. 
$$ 1- \epsilon = \Pr[f(x)f(y)f(x y) = 1] = \frac{1}{2} + \frac{1}{2}\sum_{S\subseteq [n]} \hat{f}(S)^3 $$

then 

$$
\begin{array}{rcl}
1-2\epsilon  & \leq &  \sum_{S\subseteq [n]}\hat{f}(S)^3 \\
	   	     &   & \\
	   	     & \leq & \max_{S}{\hat{f}(S)} \sum_{S\subseteq [n]} \hat{f}(S)^2 \\ 
	   	     &   & \\
		     & \leq  & \max_S{\hat{f}(S)} \\ 
\end{array}
$$
Now let $T$ be such that $\hat{f}(T)=\max_{S}{\hat{f}(S)}$. Then $1-2\epsilon \leq \hat{f}(T)$. By lemma \ref{lemma:Agreement} $\DIST(f,\chi_{T}) \leq \epsilon$.  
\end{proof}


\end{document}





