AENC/resampling_chain Changeset - ad16c1eb532f · Centrum Wiskunde & Informatica (CWI)

\documentclass[a4paper,11pt,english,final]{article}

\pdfoutput=1

\usepackage[utf8]{inputenc}

\usepackage[english]{babel}

\usepackage{fullpage}

\usepackage{graphics}

\usepackage{diagbox}

\usepackage[table]{xcolor}% http://ctan.org/pkg/xcolor

\usepackage{graphicx}

\usepackage{wrapfig}

\usepackage{caption}

\captionsetup{compatibility=false}

\graphicspath{{./}}

\usepackage{tikz}

\usepackage{amssymb}

\usepackage{mathtools}

\usepackage{bm}

\usepackage{bbm}

%\usepackage{bbold}

\usepackage{verbatim}

%for correcting large brackets spacing

\usepackage{mleftright}\mleftright

\usepackage{algorithm}

\usepackage{algorithmic}

\usepackage{enumitem}

\usepackage{float}

%\usepackage{titling}

%\setlength{\droptitle}{-5mm}

%\usepackage{MnSymbol}

\newcommand{\cupdot}{\overset{.}{\cup}}

\newcommand{\pvp}{\vec{p}{\kern 0.45mm}'}

\DeclarePairedDelimiter\bra{\langle}{\rvert}

\DeclarePairedDelimiter\ket{\lvert}{\rangle}

\DeclarePairedDelimiterX\braket[2]{\langle}{\rangle}{#1 \delimsize\vert #2}

\newcommand{\underflow}[2]{\underset{\kern-60mm \overbrace{#1} \kern-60mm}{#2}}

\def\Ind(#1){{{\tt Ind}({#1})}}

\def\Id{\mathrm{Id}}

\def\Pr{\mathrm{Pr}}

\def\Tr{\mathrm{Tr}}

\def\im{\mathrm{im}}

\newcommand{\bOt}[1]{\widetilde{\mathcal O}\left(#1\right)}

\newcommand{\bigO}[1]{\mathcal O\left(#1\right)}

\newcommand{\Res}[1]{\#\mathrm{Res}\left(#1\right)}

\newcommand{\QMAo}{\textsf{QMA$_1$}}

\newcommand{\BQP}{\textsf{BQP}}

\newcommand{\NP}{\textsf{NP}}

\newcommand{\SharpP}{\textsf{\# P}}

\newcommand{\diam}[1]{\mathcal{D}\left(#1\right)}

\newcommand{\paths}[1]{\mathcal{P}\left(#1\to\mathbf{1}\right)}

\newcommand{\start}[1]{\textsc{start}\left(#1\right)}

\newcommand{\patch}[1]{\textsc{Patch}\left(#1\right)}

\newcommand{\patches}[1]{\textsc{Patches}\left(#1\right)}

\newcommand{\maxgap}[1]{\mathrm{maxgap}\left(#1\right)}

\newcommand{\gaps}[1]{#1_{\mathrm{gaps}}}

\renewcommand{\P}{\mathbb{P}}

\newcommand{\E}{\mathbb{E}}

\newcommand{\NZ}[1]{\mathrm{NZ}^{(#1)}}

\newcommand{\Z}[1]{\mathrm{Z}^{(#1)}}

%\newcommand{\dist}[1]{d_{\!\!\not\,#1}}

\newcommand{\dist}[1]{d_{\neg #1}}

\newcommand{\todo}[1]{{\color{red}\textbf{TODO:} #1}}

\long\def\ignore#1{}

\newtheorem{theorem}{Theorem}

\newtheorem{corollary}[theorem]{Corollary}%[theorem]

\newtheorem{lemma}[theorem]{Lemma}

\newtheorem{prop}[theorem]{Proposition}

\newtheorem{definition}[theorem]{Definition}

\newtheorem{claim}[theorem]{Claim}

\newtheorem{remark}[theorem]{Remark}

\newenvironment{proof}

{\noindent {\bf Proof. }}

{{\hfill $\Box$}\\	\smallskip}

\usepackage[final]{hyperref}

\hypersetup{

	colorlinks = true,

	allcolors = {blue},

\usepackage{ifpdf}

\ifpdf

	\typeout{^^J *** PDF mode *** }

%	\input{myBiblatex.tex}

%	\addbibresource{LLL.bib}

%\else

%	\typeout{^^J *** DVI mode ***}

%	\hypersetup{breaklinks = true}

%	\usepackage[quadpoints=false]{hypdvips}

	\let\oldthebibliography=\thebibliography

	\let\endoldthebibliography=\endthebibliography

	\renewenvironment{thebibliography}[1]{%

		\begin{oldthebibliography}{#1}%

			\setlength{\itemsep}{-.3ex}%

}%

{%

		\end{oldthebibliography}%

\fi

%opening

\title{Criticality of resampling on the cycle / in the evolution model}

%\author{?\thanks{QuSoft, CWI and University of Amsterdam, the Netherlands. \texttt{?@cwi.nl} }

	%\and

%?%

%}

%\thanksmarkseries{arabic}

%\renewcommand{\thefootnote}{\fnsymbol{footnote}}

%\date{\vspace{-12mm}}

\begin{document}

	\maketitle

	\begin{abstract}

		The model we consider is the following~\cite{ResampleLimit}: We have a cycle of length $n\geq 3$. Initially we set each site to $0$ or $1$ independently at each site, such that we set it $0$ with probability $p$. After that in each step we select a random vertex with $0$ value and resample it together with its two neighbours assigning $0$ with probability $p$ to each vertex just as initially. The question we try to answer is what is the expected number of resamplings performed before reaching the all $1$ state.

		We present strong evidence for a remarkable critical behaviour. We conjecture that there exists some $p_c\approx0.62$, such that for all $p\in[0,p_c)$ the expected number of resamplings is bounded by a $p$ dependent constant times $n$, whereas for all $p\in(p_c,1]$ the expected number of resamplings is exponentially growing in $n$.

	\end{abstract}

	%Let $R(n)$ denote this quantity for a length $n\geq 3$ cycle.

	We can think about the resampling procedure as a Markov chain. To describe the corresponding matrix we introduce some notation. For $b\in\{0,1\}^n$ let $r(b,i,(x_{-1},x_0,x_1))$ denote the bit string which differs form $b$ by replacing the bits at index $i-1$,$i$ and $i+1$ with the values in $x$, interpreting the indices $\!\!\!\!\mod n$. Also for $x\in\{0,1\}^k$ let $p(x)=p((x_1,\ldots,x_k))=\prod_{i=1}^{k}p^{(1-x_i)}(1-p)^{x_i}$. Now we can describe the matrix of the Markov chain. We use row vectors for the elements of the probability distribution indexed by bitstrings of length $n$. Let $M_{(n)}$ denote the matrix of the leaking Markov chain:

$$

		M_{(n)}=\sum_{b\in\{0,1\}^n\setminus{\{1\}^n}}\sum_{i\in[n]:b_i=0}\sum_{x\in\{0,1\}^3}E_{(b,r(b,i,x))}\frac{p(x)}{n-|b|},

$$

	where $E_{(i,j)}$ denotes the matrix that is all $0$ except $1$ at the $(i,j)$th entry.

	We want to calculate the average number of resamplings $R^{(n)}$, which we define as the expected number of resamplings divided by $n$. For this let $\rho,\mathbbm{1}\in[0,1]^{2^n}$ be indexed with elements of $\{0,1\}^n$ such that $\rho_b=p(b)$ and $\mathbbm{1}_b=1$. Then we use that the expected number of resamplings is just the hitting time of the Markov chain:

	\begin{align*}

		R^{(n)}:&=\mathbb{E}(\#\{\text{resampling before termination}\})/n\\

		&=\sum_{k=1}^{\infty}P(\text{at least } k \text{ resamplings are performed})/n\\

		&=\sum_{k=1}^{\infty}\rho M_{(n)}^k \mathbbm{1}/n\\

		&=\sum_{k=0}^{\infty}a^{(n)}_k p^k

	\end{align*}

	\begin{table}[]

	\centering

	\caption{Table of the coefficients $a^{(n)}_k$}

	\label{tab:coeffs}

	\resizebox{\columnwidth}{!}{%

		\begin{tabular}{c|ccccccccccccccccccccc}

			\backslashbox[10mm]{$n$}{$k$} & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 \\		\hline

			3 &	0 & 1 & \cellcolor{blue!25}2 & 3+1/3 & 5.00 & 7.00 & 9.33 & 12.00 & 15.00 & 18.33 & 22.00 & 26.00 & 30.33 & 35.00 & 40.00 & 45.333 & 51.000 & 57.000 & 63.333 & 70.000 & 77.000 \\

			4 &	0 & 1 & 2 & \cellcolor{blue!25}3+2/3 & 6.16 & 9.66 & 14.3 & 20.33 & 27.83 & 37.00 & 48.00 & 61.00 & 76.16 & 93.66 & 113.6 & 136.33 & 161.83 & 190.33 & 222.00 & 257.00 & 295.50 \\

			5 &	0 & 1 & 2 & 3+2/3 & \cellcolor{blue!25}6.44 & 10.8 & 17.3 & 26.65 & 39.43 & 56.48 & 78.65 & 106.9 & 142.2 & 185.8 & 238.7 & 302.41 & 378.05 & 467.13 & 571.14 & 691.69 & 830.44 \\

			6 &	0 & 1 & 2 & 3+2/3 & 6.44 & \cellcolor{blue!25}11.0 & 18.5 & 30.02 & 47.10 & 71.68 & 106.0 & 152.9 & 215.4 & 297.4 & 403.1 & 537.21 & 705.25 & 913.31 & 1168.2 & 1477.4 & 1849.1 \\

			7 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & \cellcolor{blue!25}18.7 & 31.21 & 50.83 & 80.80 & 125.3 & 189.7 & 280.8 & 407.0 & 578.6 & 808.13 & 1110.2 & 1502.6 & 2005.6 & 2643.2 & 3443.1 \\

			8 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & \cellcolor{blue!25}31.44 & 52.08 & 84.95 & 136.0 & 213.6 & 328.9 & 496.5 & 735.6 & 1070.7 & 1532.5 & 2159.5 & 2998.8 & 4108.1 & 5556.7 \\

			9 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & 31.44 & \cellcolor{blue!25}52.30 & 86.27 & 140.7 & 226.3 & 358.4 & 558.4 & 855.4 & 1289.0 & 1911.5 & 2791.4 & 4017.2 & 5701.4 & 7985.9 \\

			10&	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & 31.44 & 52.30 & \cellcolor{blue!25}86.49 & 142.1 & 231.6 & 373.4 & 594.8 & 934.4 & 1447.1 & 2209.0 & 3324.6 & 4934.8 & 7226.9 & 10447. \\

            \vdots \\

            15& 0 & 1 & 2 & 3+2/3 & 6.44 & 11.08 & 18.76 & 31.45 & 52.31 & 86.49 & 142.33 & 233.31 & 381.17 & 621.02 & \cellcolor{blue!25}1009.38 & 1637.13 & % 2650.74 & 4285.68 & 6913.55 & 11171.2 & 18052.2

        \end{tabular}

	\end{table}

	We observe that this is a power series in $p$. We discovered a very regular structure in this power series. It seems that for all $k\in\mathbb{N}$ and for all $n>k$ we have that $a^{(n)}_k$ is constant, this conjecture we verified using a computer up to $n=14$.

	\newpage

	\noindent Based on our calculations presented in Table~\ref{tab:coeffs} and Figure~\ref{fig:coeffs_conv_radius} we make the following conjectures:

	\begin{enumerate}[label=(\roman*)]

		\item $\forall k\in\mathbb{N}, \forall n\geq 3 : a^{(n)}_k\geq 0$	\label{it:pos}

        (A simpler version: $\forall k>0: a_k^{(3)}=(k+1)(k+2)/6$)

		\item $\forall k\in\mathbb{N}, \forall n>m\geq 3 : a^{(n)}_k\geq a^{(m)}_k$ \label{it:geq}

		\item $\forall k\in\mathbb{N}, \forall n,m > \max(k,3) : a^{(n)}_k=a^{(m)}_k$ \label{it:const}

  		\item $\exists p_c=\lim\limits_{k\rightarrow\infty}1\left/\sqrt[k]{a_{k}^{(k+1)}}\right.$ \label{it:lim}

	\end{enumerate}

	\colorbox{red}{\ref{it:pos}-\ref{it:geq} is false since $a_{1114}^{(10)}<0$ -- needs to be double checked!}

	I figured this out by observing that $R^{(10)}(p)$ has a pole inside the disk of radius $0.96$. This also means that $R^{(10)}(p)=\sum_{k=0}^{\infty}a_k^{(10)}p^k$ is only true in an analytic sense, since for $p>0.96$ the right hand side does not converge.

	We also conjecture that $p_c\approx0.61$, see Figure~\ref{fig:coeffs_conv_radius}.

	\begin{figure}[!htb]\centering

	\includegraphics[width=0.5\textwidth]{coeffs_conv_radius.pdf}

	%\includegraphics[width=0.5\textwidth]{log_coeffs.pdf}

	\caption{$1\left/\sqrt[k]{a_{k}^{(k+1)}}\right.$} %$\frac{1}{\sqrt[k]{a_k^{(k+1)}}}$

	\label{fig:coeffs_conv_radius}

	\end{figure}

    For reference, we also explicitly give formulas for $R^{(n)}(p)$ for small $n$. We also give them in terms of $q=1-p$ because they sometimes look nicer that way.

    \begin{align*}

    	R^{(3)}(p) &= \frac{1-(1-p)^3}{3(1-p)^3}

        			= \frac{1-q^3}{3q^3}\\

    	R^{(4)}(p) &= \frac{p(6-12p+10p^2-3p^3)}{6(1-p)^4}

                    = \frac{(1-q)(1+q+q^2+3q^3)}{6q^4}\\

        R^{(5)}(p) &= \frac{p(90-300p+435p^2-325p^3+136p^4-36p^5+6p^6)}{15(1-p)^5(6-2p+p^2)}\\

                   &= \frac{(1-q)(6+5q+6q^2+21q^3+46q^4+6q^6)}{15q^5(5+q^2)}

    \end{align*}

    For $n=3$ the system becomes very simple because regardless of the current state, the probability of going to $111$ is always equal to $(1-p)^3$. Therefore the expected number of resamplings is simply the expectation of a geometric distribution. This gives the formula for $R^{(3)}(p)$ as shown above. Note that the $k$-th coefficient of the powerseries of a function $f(p)$ is given by $\frac{1}{k!}\left.\frac{d^k f}{dp^k}\right|_{p=0}$, i.e. the $k$-th derivative to $p$ evaluated at $0$ divided by $k!$. For the function $R^{(3)}(p) =\frac{(1-p)^{-3} - 1}{3} $ this yields $a^{(3)}_k = (k+2)(k+1)/6$ for $k\geq 1$ and $a^{(3)}_0=0$.

    We can do the same for $n=4,5$, which gives, for $k\geq 1$ (with Mathematica):

    \begin{align*}

        a^{(3)}_k &= \frac{(k+2)(k+1)}{6}\\

        a^{(4)}_k &= \frac{1}{6}\left(2+\frac{(k+3)(k+2)(k+1)}{6}\right)\\

        a^{(5)}_k &= \frac{1}{15}\left(\frac{(k+4)(k+3)(k+2)(k+1)}{20} - \frac{(k+3)(k+2)(k+1)}{30} - \frac{(k+2)(k+1)}{50} + \frac{76(k+1)}{25}\right.\\

                  &  \qquad\quad \left. + \frac{626}{125} - \frac{4}{250}

                  \left( \left(\frac{1+i\sqrt{5}}{6}\right)^k(94-25\sqrt{5}i)+\left(\frac{1-i\sqrt{5}}{6}\right)^k(94+25\sqrt{5}i) \right)

                  \right)

    \end{align*}

    and from $n=6$ and onwards, the expression becomes complicated and Mathematica can only give expressions including roots of polynomials.

	If statements \ref{it:pos}-\ref{it:lim} are true, then we can define the function

	$$R^{(\infty)}(p):=\sum_{k=0}^{\infty}a^{(k+1)}_k p^k,$$

	which would then have radius of convergence $p_c$, also it would satisfy for all $p\in[0,p_c)$ that $R^{(n)}(p)\leq R^{(\infty)}(p)$ and $\lim\limits_{n\rightarrow\infty}R^{(n)}(p)=R^{(\infty)}(p)$.

	It would also imply, that for all $p\in(p_c,1]$ we get $R^{(n)}(p)=\Omega\left(\left(\frac{p}{p_c}\right)^{n/2}\right)$.

	This would then imply a very strong critical behaviour. It would mean that for all $p\in[0,p_c)$ the expected number of resamplings is bounded by a constant $R^{(\infty)}(p)$ times $n$, whereas for all $p\in(p_c,1]$ the expected number of resamplings is exponentially growing in $n$.

	Now we turn to the possible proof techniques for justifying the conjectures \ref{it:pos}-\ref{it:lim}.

	First note that $\forall n\geq 3$ we have $a^{(n)}_0=0$, since for $p=0$ the expected number of resamplings is $0$.

	Also note that the expected number of initial $0$s is $p\cdot n$. If $p\ll1/n$, then with high probability there is a single $0$ initially and the first resampling will fix it, so the linear term in the expected number of resamplings is $np$, therefore $\forall n\geq 3$, $a^{(n)}_1=1$.

	For the second order coefficients it is a bit harder to argue, but one can use the structure of $M_{(n)}$ to come up with a combinatorial proof. To see this, first assume we have a vector $e_b$ having a single non-zero, unit element indexed with bitstring $b$.

	Observe that $e_bM_{(n)}$ is a vector containing polynomial entries, such that the only indices $b'$ which have a non-zero constant term must have $|b'|\geq|b|+1$, since if a resampling produces a $0$ entry it also introduces a $p$ factor. Using this observation one can see that the second order term can be red off from $\rho M_{(n)}\mathbbm{1}+\rho M_{(n)}^2\mathbbm{1}$,

	which happens to be $2n$. (Note that it is already a bit surprising, form the steps of the combinatorial proof one would expect $n^2$ terms appearing, but they just happen to cancel each other.) Using similar logic one should be able to prove the claim for $k=3$, but for larger $k$s it seems to quickly get more involved.

	The question is how could we prove the statements \ref{it:pos}-\ref{it:lim} for a general $k$?

    \appendix

    \section{Lower bound on $R^{(n)}(p)$}

    Proof that \ref{it:pos} and \ref{it:lim} imply that for any fixed $p>p_c$ we have $R^{(n)}(p)\in\Omega\left(\left(\frac{p}{p_c}\right)^{n/2}\right)$.

    By definition of $p_c = \lim_{k\to\infty} 1\left/ \sqrt[k]{a_k^{(k+1)}} \right.$ we know that for any $\epsilon$ there exists a $k_\epsilon$ such that for all $k\geq k_\epsilon$ we have $a_k^{(k+1)}\geq (p_c + \epsilon)^{-k}$. Now note that $R^{(n)}(p) \geq a_{n-1}^{(n)}p^{n-1}$ since all terms of the power series are positive, so for $n\geq k_\epsilon$ we have $R^{(n)}(p)\geq (p_c +\epsilon)^{-(n-1)}p^{n-1}$. Note that

    \begin{align*}

    	R^{(n)}(p)\geq(p_c+\epsilon)^{-(n-1)}p^{n-1}=\left(\frac{p}{p_c+\epsilon}\right)^{n-1} \geq \left(\frac{p}{p_c}\right)^{\frac{n-1}{2}},

    \end{align*}

    where the last inequality holds for $\epsilon\leq\sqrt{p_c}(\sqrt{p}-\sqrt{p_c})$.

    \section{Calculating the coefficients $a_k^{(n)}$}

    Let $\rho'\in\mathbb{R}[p]^{2^n}$ be a vector of polynomials, and let $\text{rank}(\rho')$ be defined in the following way:

    $$\text{rank}(\rho'):=\min_{b\in\{0,1\}^n}\left( |b|+ \text{maximal } k\in\mathbb{N} \text{ such that } p^k \text{ divides } \rho'_b\right).$$

	Clearly for any $\rho'$ we have that $\text{rank}(\rho' M_{(n)})\geq \text{rank}(\rho') + 1$. Another observation is, that all elements of $\rho'$ are divisible by $p^{\text{rank}(\rho')-n}$.

    We observe that for the initial $\rho$ we have that $\text{rank}(\rho)=n$, therefore $\text{rank}(\rho*(M_{(n)}^k))\geq n+k$, and so $\rho*(M_{(n)}^k)*\mathbbm{1}$ is obviously divisible by $p^{k}$. This implies that $a_k^{(n)}$ can be calculated by only looking at $\rho*(M_{(n)}^1)*\mathbbm{1}, \ldots, \rho*(M_{(n)}^k)*\mathbbm{1}$.

\newpage

\section{Proving that $a_k^{(k+1)}=a_k^{(n)}$ for all $n>k$}

We consider $R^{(n)}(p)$ as a power series in $p$ and our main aim in this section is to show that $R^{(n)}(p)$ and $R^{(n+k)}(p)$ are the same up to order $n-1$.

The proof will consider variations of the Markov Chain:

\begin{itemize}

    \item $\P^{(n)}$ refers to the original process on the length-$n$ cycle.

    \item $\P^{[a,b]}$ or $\P^{[n]}$ refers to a similar Markov Chain but on a finite chain ($[a,b]$ or $[1,n]$).

\end{itemize}

The process on the finite chain has the following modification at the boundary: if a boundary site is resampled, it can only resample itself and its single neighbour so it draws only two new bits.

We use the notation $\E^{(n)}$,$\E^{[a,b]}$ and $\E^{[n]}$ similarly for denoting expectations.

%Note that an \emph{event} is a subset of all possible paths of the Markov Chain.

\begin{definition}[Events conditioned on starting state] \label{def:conditionedevents}

    For any state $b\in\{0,1\}^n$, define $\start{b}$ as the event that the starting state of the chain is the state $b$. For any event $A$ and any $v\in[n]$, define

    \begin{align*}

        \P^{(n)}_b(A) &= \P^{(n)}(A \;|\; \start{b}) \\

        \P^{[n]}_{b_v=1}(A) &= \P^{[n]}(A \;|\; v\text{ is initialized to }1) \\

        \P^{[n]}_{b_v=b_w=1}(A) &= \P^{[n]}(A \;|\; v\text{ and }w\text{ are initialized to }1) ,

    \end{align*}

    The last two probabilities are not conditioned on any other bits of the starting state.

\end{definition}

%Note that we have $\P^{(n)}(\start{b}) = (1-p)^{|b|}p^{n-|b|}$ by definition of our Markov Chain.

\begin{definition}[Vertex visiting event] \label{def:visitingResamplings}

    Denote by $\mathrm{Z}^{(v)}$ the event that site $v$ becomes zero at any point in time before the Markov Chain terminates. Denote the complement by $\mathrm{NZ}^{(v)}$, i.e. the event that site $v$ does \emph{not} become zero before it terminates. Furthermore define $\mathrm{NZ}^{(v,w)} := \mathrm{NZ}^{(v)} \cap \mathrm{NZ}^{(w)}$, i.e. the event that \emph{both} $v$ and $w$ do not become zero before termination.

\end{definition}

%\begin{figure}

%	\begin{center}

%    	\includegraphics{diagram_groups.pdf}

%    \end{center}

%    \caption{\label{fig:separatedgroups} Illustration of setup of Lemma \ref{lemma:eventindependence}. Here $b_1,b_2\in\{0,1\}^n$ are bitstrings such that all zeroes of $b_1$ and all zeroes of $b_2$ are separated by two indices $v,w$.}

%\end{figure}

\begin{wrapfigure}[7]{r}{0.25\textwidth} % The first [] argument is number of lines that are narrowed

    \centering

    \includegraphics{diagram_groups.pdf}

    \caption{\label{fig:separatedgroups} Lemma \ref{lemma:eventindependence}.}

\end{wrapfigure}

The following lemma considers two vertices $v,w$ that are never ``crossed'' so that two halves of the cycle become independent.\begin{lemma}[Conditional independence] \label{lemma:eventindependence} \label{claim:eventindependence}

    Let $b=b_1\land b_2\in\{0,1\}^n$ be a state with two separated groups of zeroes as in Figure \ref{fig:separatedgroups}. Let $v$, $w$ be any indices inbetween the groups, such that $b_1$ lies on one side of them and $b_2$ on the other, as shown in the figure. Furthermore, let $A_1$ be any event that depends only on the sites ``on the $b_1$ side of $v,w$'', and similar for $A_2$ (for example $\mathrm{Z}^{(i)}$ for an $i$ on the correct side). Then we have

    \begin{align*}

        \P^{(n)}_b(\mathrm{NZ}^{(v,w)}, A_1, A_2)

&=

        \P^{(n)}_{b_1}(\mathrm{NZ}^{(v,w)}, A_1)

        \; \cdot \;

        \P^{(n)}_{b_2}(\mathrm{NZ}^{(v,w)}, A_2) \\

        \P^{(n)}_b(A_1, A_2 \mid \mathrm{NZ}^{(v,w)})

&=

        \P^{(n)}_{b_1}(A_1 \mid \mathrm{NZ}^{(v,w)})

        \; \cdot \;

        \P^{(n)}_{b_2}(A_2 \mid \mathrm{NZ}^{(v,w)}) .%\\

        %R_{b,\mathrm{NZ}^{(v,w)},A_1,A_2}

%&=

        %R_{b_1,\mathrm{NZ}^{(v,w)},A_1}

        %\; + \;

        %R_{b_2,\mathrm{NZ}^{(v,w)},A_2}

    \end{align*}

    %up to any order in $p$.

\end{lemma}

\begin{proof}

    From any path $\xi\in\start{b} \cap \mathrm{NZ}^{(v,w)}$ we can construct paths $\xi_1\in\start{b_1}\cap \mathrm{NZ}^{(v,w)}$ and $\xi_2\in\start{b_2}\cap\mathrm{NZ}^{(v,w)}$ as follows. Let us write the path $\xi$ as

    $$\xi=\left( (\text{initialize }b), (z_1, s_1, r_1), (z_2, s_2, r_2), ..., (z_{|\xi|}, s_{|\xi|}, r_{|\xi|}) \right)$$

    where $z_i\in[n]$ denotes the number of zeroes in the state before the $i$th step, $s_i\in [n]$ denotes the site that was resampled and $r_i\in \{0,1\}^3$ is the result of the three resampled bits. We have

    \begin{align*}

        \P^{(n)}_b[\xi] &= \P(\text{pick }s_1 | z_1) \P(r_1) \P(\text{pick }s_2 | z_2) \P(r_2) \cdots \P(\text{pick }s_{|\xi|} | z_{|\xi|}) \P(r_{|\xi|}) \\

                &= \frac{1}{z_1} \P(r_1) \frac{1}{z_2} \P(r_2) \cdots \frac{1}{z_{|\xi|}} \P(r_{|\xi|}) .

    \end{align*}

    To construct $\xi_1$ and $\xi_2$, start with $\xi_1 = \left( (\text{initialize }b_1) \right)$ and $\xi_2 = \left( (\text{initialize }b_2) \right)$. For each step $(z_i,s_i,r_i)$ in $\xi$ do the following: if $s_i$ is ``on the $b_1$ side of $v,w$'' then append $(z^{(1)}_i,s_i,r_i)$ to $\xi_1$ and if its ``on the $b_2$ side of $v,w$'' then append $(z^{(2)}_i,s_i,r_i)$ to $\xi_2$. Here $z^{(1)}_i$ is the number of zeroes that were on the $b_1$ side and $z^{(2)}_i$ is the number of zeroes on the $b_2$ side so we have $z_i = z^{(1)}_i + z^{(2)}_i$.

    %Let the resulting paths be

    %\begin{align*}

    %    \xi_1 &= \left( (z^{(1)}_{a_1}, s_{a_1}, r_{a_1}), (z^{(1)}_{a_2}, s_{a_2}, r_{a_2}), ..., (z^{(1)}_{a_{|\xi_1|}}, s_{a_{|\xi_1|}}, r_{a_{|\xi_1|}}) \right) \\

    %    \xi_2 &= \left( (z^{(2)}_{b_1}, s_{b_1}, r_{b_1}), (z^{(2)}_{b_2}, s_{b_2}, r_{b_2}), ..., (z^{(2)}_{b_{|\xi_1|}}, s_{b_{|\xi_1|}}, r_{b_{|\xi_1|}}) \right)

    %\end{align*}

    Now $\xi_1$ is a valid (terminating) path from $b_1$ to $\mathbf{1}$, because in the original path $\xi$, all zeroes ``on the $b_1$ side'' have been resampled by resamplings ``on the $b_1$ side''. Since the sites $v,w$ inbetween never become zero, there can not be any zero ``on the $b_1$ side'' that was resampled by a resampling ``on the $b_2$ side''.

    Vice versa, any two paths $\xi_1\in\start{b_1}\cap \mathrm{NZ}^{(v,w)}$ and $\xi_2\in\start{b_2}\cap\mathrm{NZ}^{(v,w)}$ also induce a path $\xi\in\start{b} \cap \mathrm{NZ}^{(v,w)}$ by simply interleaving the resampling positions. Note that $\xi_1,\xi_2$ actually induce $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths $\xi$ because of the possible orderings of interleaving the resamplings in $\xi_1$ and $\xi_2$.

    For a fixed $\xi_1,\xi_2$ we will now show the following:

    \begin{align*}

        \sum_{\substack{\xi\in\start{b} \cap \mathrm{NZ}^{(v,w)} \text{ s.t.}\\ \xi \text{ decomposes into } \xi_1,\xi_2 }} \P^{(n)}_b[\xi] &=

        \sum_{\text{interleavings of }\xi_1,\xi_2} \P(\text{interleaving}) \cdot \P^{(n)}_{b_1}[\xi_1] \cdot \P^{(n)}_{b_2}[\xi_2] \\

        &= \P^{(n)}_{b_1}[\xi_1] \cdot \P^{(n)}_{b_2}[\xi_2]

    \end{align*}

    where both sums are over $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ terms.

    This is best explained by an example. Lets consider the following fixed $\xi_1,\xi_2$ and an example interleaving where we choose steps from $\xi_2,\xi_1,\xi_1,\xi_2,\cdots$:

    \begin{align*}

        \xi_1 &= \left( (z_1, s_1, r_1), (z_2, s_2, r_2), (z_3, s_3, r_3), (z_4, s_4, r_4),\cdots  \right) \\

        \xi_2 &= \left( (z_1', s_1', r_1'), (z_2', s_2', r_2'), (z_3', s_3', r_3'), (z_4', s_4', r_4'),\cdots  \right) \\

        \xi   &= \left( (z_1 + z_1', s_1', r_1'), (z_1+z_2', s_1, r_1), (z_2+z_2', s_2, r_2), (z_3+z_2', s_2', r_2'), \cdots \right)

    \end{align*}

    The probability of $\xi_1$, started from $b_1$, is given by

    \begin{align*}

        \P^{(n)}_{b_1}[\xi_1] &= \P(\text{pick }s_1|z_1) \P(r_1) \P(\text{pick }s_2|z_2) \P(r_2) \cdots \P(\text{pick }s_{|\xi_1|}|z_{|\xi_1|}) \P(r_{|\xi_1|}) \\

                &= \frac{1}{z_1} \P(r_1) \frac{1}{z_2} \P(r_2) \cdots \frac{1}{z_{|\xi_1|}} \P(r_{|\xi_1|}) .

    \end{align*}

    and similar for $\xi_2$ but with primes.

    The following diagram illustrates all possible interleavings, and the red line corresponds to the particular interleaving $\xi$ in the example above.

    \begin{center}

        \includegraphics{diagram_paths2.pdf}

    \end{center}

    For the labels shown within the grid, define $p_{ij} = \frac{z_i}{z_i + z_j'}$.

    The probability of $\xi$ is given by

    \begin{align*}

        \P^{(n)}_b[\xi] &= \frac{1}{z_1+z_1'} \P(r_1') \frac{1}{z_1+z_2'} \P(r_1) \frac{1}{z_2+z_2'} \P(r_2) \frac{1}{z_3+z_2'} \P(r_2') \cdots \tag{by definition}\\

&=

        \frac{z_1'}{z_1+z_1'} \frac{1}{z_1'} \P(r_1') \;

        \frac{z_1 }{z_1+z_2'} \frac{1}{z_1 } \P(r_1 ) \;

        \frac{z_2 }{z_2+z_2'} \frac{1}{z_2 } \P(r_2 ) \;

        \frac{z_2'}{z_3+z_2'} \frac{1}{z_2'} \P(r_2')

        \cdots \tag{rewrite fractions}\\

&=

        \frac{z_1'}{z_1+z_1'} \;

        \frac{z_1 }{z_1+z_2'} \;

        \frac{z_2 }{z_2+z_2'} \;

        \frac{z_2'}{z_3+z_2'}

        \cdots

        \P^{(n)}_{b_1}[\xi_1] \; \P^{(n)}_{b_2}[\xi_2] \tag{definition of $\P^{(n)}_{b_i}[\xi_i]$} \\

        &= (1-p_{1,1}) \; p_{1,2} \; p_{2,2} \; (1-p_{3,2}) \; \P^{(n)}_{b_1}[\xi_1] \; \P^{(n)}_{b_2}[\xi_2] \tag{definition of $p_{i,j}$} \\

        &= \P(\text{path in grid}) \; \P^{(n)}_{b_1}[\xi_1] \; \P^{(n)}_{b_2}[\xi_2]

    \end{align*}

    In the grid we see that at every point the probabilities sum to 1, and we always reach the end, so we know the sum of all paths in the grid is 1. This proves the required equality.

    We obtain

    \begin{align*}

        \P^{(n)}_b(\mathrm{NZ}^{(v,w)},A_1,A_2)

        &= \sum_{\substack{\xi\in\start{b} \cap \\ \mathrm{NZ}^{(v,w)}\cap A_1\cap A_2}} \P^{(n)}_b(\xi) \\