AENC/resampling_chain Changeset - 9ad0b3287c70 · Centrum Wiskunde & Informatica (CWI)

Changeset - 9ad0b3287c70

Parent rev.

Child rev.

[Not reviewed]

0 1 2

Tom Bannink - 8 years ago 2017-09-11 22:50:14
tombannink@gmail.com

Add diagram for proof of splitting lemma

3 files changed with 115 insertions and 1 deletions:

diagram_paths3.pdf

bin+new file

diagram_paths3.tex

114

main.tex

0 comments (0 inline, 0 general)

diagram_paths3.pdf

➞

Show inline comments

		new file 100644
		binary diff not shown

diagram_paths3.tex

➞

Show inline comments

@@ new file 100644 @@
 \documentclass{standalone}
 \usepackage[T1]{fontenc}
 \usepackage{amsmath}
 \usepackage{amsfonts}
 \usepackage{parskip}
 \usepackage{marvosym} %Lightning symbol
 \usepackage[usenames,dvipsnames]{color}
 \usepackage[hidelinks]{hyperref}
 \renewcommand*{\familydefault}{\sfdefault}
 \usepackage{bbm} %For \mathbbm{1}
 %\usepackage{bbold}
 \usepackage{tikz}
 \begin{document}
 \begin{tikzpicture}
     \def\height{4};
     \draw[step=1cm,gray,dotted] (-0.9,-0.9) grid (8.9,\height+0.9);
+    %
     % Red line through grid
+    %
     \draw [line width=3.0,red] (0,0) -- (0,1) -- (1,1) -- (2,1) -- (2,2);
+    %
     % Arrows of the grid
+    %
     \foreach \x in {0,...,7} {
         \foreach \y in {1,...,\height} {
             \draw[->] (\x,\y-1) -- (\x+0.9,\y-1);
             \draw[->] (\x,\y-1) -- (\x,\y-1+0.9);
+        }
         \draw [->] (\x,\height) -- (\x+0.9,\height);
+    }
     \foreach \y in {1,...,\height} %somehow the loop cant go to '\height-1'
         \draw [->] (8,\y-1) -- (8,\y-1+0.9); % so we fix it like this with '\y-1'
+    %
     % Move labels
+    %
     \foreach \y in {1,...,\height} {
         \draw (-1.2, \y - 0.5) node {$(z^Y_\y,v^Y_\y,r^Y_\y)$};
+    }
     \foreach \x in {1,...,8} {
         \draw (\x-0.6, -1.7) node[rotate=70] {$(z^X_\x,v^X_\x,r^X_\x)$};
+    }
+    %
     % bitstring labels
+    %
     \draw(-0.1,-0.4) node {$b^X\;1^S\;b^Y$};
     \draw(8.2,-0.4) node {$1^X\;1^S\;b^Y$};
     \draw (-0.2,\height+0.3) node {$b^X\;1^S\;1^Y$};
     \draw (8.2,\height+0.3) node {$\mathbf{1}$};
+    %
     % -> steps of xi
+    %
     \draw (4,-3.0) node {$\to$ steps of $\xi^{G\setminus Y}$};
     \node[rotate=90,anchor=south,xshift=2.0cm,yshift=2.3cm] {$\to$ steps of $\xi^{G\setminus X}$};
+    %
     % (Red) circles
+    %
     \draw[fill,red] (0,0) circle (0.08);
     \draw[fill    ] (8,0) circle (0.05);
     \draw[fill    ] (0,\height) circle (0.05);
     \draw[fill,red] (8,\height) circle (0.08);
+    %
     % Probability labels
+    %
     \def\x{6};
     \def\y{3};
     \draw[fill,black] (\x,\y) circle (0.07);
     \draw[fill=white,draw=black] (\x+0.23,\y-0.26) rectangle +(0.5,0.5);
     \draw[fill=white,draw=black] (\x-0.55,\y+0.26) rectangle +(1.1,0.5);
     \draw (\x+0.5,\y) node {$p_{ij}$};
     \draw (\x,\y+0.5) node {$1-p_{ij}$};
     \def\x{2};
     \def\y{1};
     \draw[fill,black] (\x,\y) circle (0.07);
     \draw[fill=white,draw=black] (\x+0.12,\y-0.26) rectangle +(0.7,0.5);
     \draw[fill=white,draw=black] (\x-0.75,\y+0.26) rectangle +(1.5,0.5);
     \draw (\x+0.5,\y) node {$p_{3,2}$};
     \draw (\x,\y+0.5) node {$1-p_{3,2}$};
     \def\x{8};
     \def\y{1};
     \draw[fill,black] (\x,\y) circle (0.07);
     \draw[fill=white,draw=black] (\x-0.25,\y+0.26) rectangle +(0.5,0.5);
     \draw (\x,\y+0.5) node {$1$};
     \def\x{3};
     \def\y{\height};
     \draw[fill,black] (\x,\y) circle (0.07);
     \draw[fill=white,draw=black] (\x+0.25,\y-0.25) rectangle +(0.5,0.5);
     \draw (\x+0.5,\y) node {$1$};
+    %
     % Probability definition
+    %
     \draw (10,2)+(-1,-0.5) rectangle +(1,0.5);
     \draw (10,2) node {$p_{ij} \equiv \frac{z^X_i}{z^X_i + z^Y_j}$};
 \end{tikzpicture}
 \end{document}

main.tex

➞

Show inline comments

             \begin{align*}
                 \P^{G}_S(A) &= \P^{G}(A \mid \text{All vertices in $S$ get initialized to }1)
             \end{align*}
             The condition does not apply to subsequent resamplings of vertices in $S$, it only specifies the initial assignment.
         \item Define $G\setminus S$ as the graph obtained by removing from $G$ all vertices in $S$ and edges adjacent to $S$.
         \item Define the $d$-neighbourhood $B^G(S;d)$ of $S$ as the set of vertices reachable from $S$ within $d$ steps.
         \item Define the boundary $\overline{\partial} S$ of $S$ as the set of vertices adjacent to $S$, excluding $S$ itself. In other words $\overline{\partial} S = B(S;1) \setminus S$.
     \end{itemize}
 \end{definition}
 The following Lemma says that if a set $S$ splits the graph in two, then those two parts become independent if the vertices in $S$ never become zero.
 \begin{center}
     \includegraphics[scale=0.8]{diagram_splittinglemma.pdf}
 \end{center}
 \begin{lemma}[Splitting lemma] \label{lemma:splitting}
     Let $G=(V,E)$ be a graph. Let $S,X,Y\subseteq V$ be a partition of the vertices, such that $X$ and $Y$ are disconnected in the graph $G\setminus S$. Furthermore, let $A^X$ and $A^Y$ be any events that depends only on the sites in $X$ and $Y$ respectively. Then we have
     \begin{align*}
         \P^{G}_S(\NZ{S} \cap A^X \cap A^Y)
         &=
         \P^{G\setminus Y}_S(\NZ{S} \cap A^X)
         \; \cdot \;
         \P^{G\setminus X}_S(\NZ{S} \cap A^Y)
     \end{align*}
 \end{lemma}
 %\newcommand{\initone}[1]{\textsc{InitOne}_#1}
 \begin{proof}
     We are considering three different Markov Chains, and the events $\NZ{S}$ in the different parts of the equation are events on different probability spaces. We will keep using the same notation for these events because it should be clear from the context which Markov Chain is being considered. We will consider paths (i.e. resampling sequences) and we will use a superscript to denote to which Markov Chain a path belongs. Let $\xi^G \in \NZ{S}$ be a path of the Markov Chain associated to the resample process on the graph $G$, that satisfies the event $\NZ{S}$.
     From $\xi^G$ we will now construct paths $\xi^{G\setminus Y} \in \NZ{S}$ and $\xi^{G \setminus X} \in \NZ{S}$ of the other Markov Chains satisfying the corresponding events on those Markov Chains.
     Let us write the path $\xi^G$ as an initialization and a sequence of resamplings:
     \begin{align*}
         \xi^G=\left( (\text{initialize to }b), (z_1, v_1, r_1), (z_2, v_2, r_2), ..., (z_{|\xi^G|}, v_{|\xi^G|}, r_{|\xi^G|}) \right)
     \end{align*}
     where $b\in\{0,1\}^V$ is the initial state, $1 \leq z_i \leq |V|$ denotes the number of zeroes in the state before the $i$th step, $v_i\in V$ denotes the site that was resampled and $r_i\in \{0,1\}^{d(v_i)+1}$ is the result of the resampled bits. Here $d(v_i)$ is the degree of vertex $v_i$. By definition of the resample process, we have
     \begin{align*}
         \P^{G}_S(\xi^G) &=
         \P(\text{initialize }b \mid \text{initialize $S$ to }1)
         \P(\text{pick }v_1 \mid z_1) \P(r_1)
         \P(\text{pick }v_2 \mid z_2) \P(r_2) \cdots \\
         &= \frac{(1-p)^{|b|} p^{|V|-|b|}}{(1-p)^{|S|}} \cdot
         \frac{1}{z_1} \P(r_1) \cdot
         \frac{1}{z_2} \P(r_2) \cdots
         \frac{1}{z_{|\xi^G|}} \P(r_{|\xi^G|}) .
     \end{align*}
     Let $b|_{G\setminus X}$ be the restriction of $b$ to $G\setminus X$ and similar for $b|_{G\setminus Y}$.
     To construct $\xi^{G\setminus Y}$ and $\xi^{G\setminus X}$, start with $\xi^{G\setminus Y} = \left( (\text{initialize }b|_{G\setminus Y}) \right)$ and $\xi^{G\setminus X} = \left( (\text{initialize }b|_{G\setminus X}) \right)$. For each step $(z_i,v_i,r_i)$ in $\xi^G$ do the following: if $v_i \in X$ then append $(z^{X}_i,v_i,r_i)$ to $\xi^{G\setminus Y}$ and if $v_i \in Y$ then append $(z^{Y}_i,v_i,r_i)$ to $\xi^{G\setminus X}$.
     Here $z^{X}_i$ is the number of zeroes that were in $X$ and $z^{Y}_i$ is the number of zeroes in $Y$. We have $z_i = z^{X}_i + z^{Y}_i$ because $\xi^G\in\NZ{S}$ so there can not be any zero in $S$; they all have to be in $X$ or $Y$. Furthermore, this also makes sure that we always have either $v_i\in X$ or $v_i\in Y$ since only vertices that are zero can be chosen to resample.
     Now $\xi^{G\setminus Y}$ is a valid path of the Markov Chain associated to the graph $G\setminus Y$ (i.e. with vertices $X\cup S$), because in the original path $\xi^G$, all zeroes in $X$ have been resampled by resamplings in $X$. There can not be a vertex $v\in Y$ such that the resampling of $v$ changed a vertex in $X$, since $X$ and $Y$ are only connected through $S$ and we know $\xi^G\in\NZ{S}$.
     Vice versa, any two paths $\xi^{G\setminus Y}\in\NZ{S}$ and $\xi^{G\setminus X}\in\NZ{S}$ also induce a path $\xi^G\in\NZ{S}$ by simply interleaving the resampling positions. Note that $\xi^{G\setminus Y},\xi^{G\setminus X}$ actually induce $\binom{|\xi^{G\setminus Y}|+|\xi^{G\setminus X}|}{|\xi^{G\setminus Y}|}$ paths $\xi^G$ because of the possible orderings of interleaving the resamplings in $\xi^{G\setminus Y}$ and $\xi^{G\setminus X}$.
     For a fixed $\xi^{G\setminus Y},\xi^{G\setminus X}$ we will now show the following:
     \begin{align*}
         \sum_{\substack{\xi^G\in\NZ{S} \text{ s.t.}\\ \xi^G \text{ decomposes into } \xi^{G\setminus Y},\xi^{G\setminus X} }} \P^{G}_S(\xi^G) &=
         \sum_{\text{interleavings of }\xi^{G\setminus Y},\xi^{G\setminus X}} \P(\text{interleaving}) \cdot \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \cdot \P^{G\setminus X}_S(\xi^{G\setminus X}) \\
         &= \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \cdot \P^{G\setminus X}_S(\xi^{G\setminus X})
     \end{align*}
     where both sums are over $\binom{|\xi^{G\setminus Y}|+|\xi^{G\setminus X}|}{|\xi^{G\setminus Y}|}$ terms.
     This is best explained by an example. Lets consider the following fixed $\xi^{G\setminus Y},\xi^{G\setminus X}$ and an example interleaving where we choose vertices from $Y,X,X,Y,\cdots$:
     \begin{align*}
         \xi^{G\setminus Y} &= \left( (\text{initialize to }b^X\;1^S),
         (z^X_1, v^X_1, r^X_1),
         (z^X_2, v^X_2, r^X_2),
         (z^X_3, v^X_3, r^X_3),
         (z^X_4, v^X_4, r^X_4),
         \cdots  \right) \\
         \xi^{G\setminus X} &= \left( (\text{initialize to }1^S\;b^Y),
         (z^Y_1, v^Y_1, r^Y_1),
         (z^Y_2, v^Y_2, r^Y_2),
         (z^Y_3, v^Y_3, r^Y_3),
         (z^Y_4, v^Y_4, r^Y_4),
         \cdots  \right) \\
         \xi^G             &= \big( (\text{initialize to }b^X \; 1^S \; b^Y),
         (z^X_1+z^Y_1, v^Y_1, r^Y_1),
         (z^X_1+z^Y_2, v^X_1, r^X_1), \\
         &\qquad\qquad\qquad\qquad\qquad\qquad\qquad\qquad
         (z^X_2+z^Y_2, v^X_2, r^X_2),
         (z^X_3+z^Y_2, v^Y_2, r^Y_2),
         \cdots \big)
     \end{align*}
     Here $b^X\in \{0,1\}^{X}$ and $b^Y\in\{0,1\}^Y$. Since we condition on the event that $S$ is initialized to ones, we know the initial state is of the form $b^X\;1^S$ in $\xi^{G\setminus Y}$. Similarly, since these paths satisfy the $\NZ{S}$ event, we know all the vertices $v_i$ resampled in $\xi^{G\setminus Y}$ are vertices in $X$, and the resampled bits $r_i$ are bits corresponding to vertices in $X$.
     In the newly constructed path $\xi^G$ the number of zeroes is the number of zeroes in $X$ and $Y$ together, so this starts as $z^X_1 + z^Y_1$. Then in this example, after the first step the number of zeroes is $z^X_1+z^Y_2$ since a step of $\xi^{G\setminus X}$ was done (so a vertex in $Y$ was resampled).
     The probability of $\xi^{G\setminus Y}$ is given by
     \begin{align*}
         \P^{G\setminus Y}_S(\xi^{G\setminus Y}) &=
         \P(\text{initialize }b^X\;1^S \mid \text{initialize $S$ to }1)
         \P(\text{pick }v^X_1 \mid z^X_1) \P(r^X_1)
         \P(\text{pick }v^X_2 \mid z^X_2) \P(r^X_2) \cdots \\
         &= (1-p)^{|b^X|} p^{|X|-|b^X|} \cdot
         \frac{1}{z^X_1} \P(r^X_1) \cdot
         \frac{1}{z^X_2} \P(r^X_2) \cdots
         \frac{1}{z^X_{|\xi^{G\setminus Y}|}} \P(r^X_{|\xi^{G\setminus Y}|}) .
     \end{align*}
     and similar for $\xi^{G\setminus X}$.
     Instead of choosing a step in $Y,X,X,Y,\cdots$ we could have chosen other orderings. The following diagram illustrates all possible interleavings, and the red line corresponds to the particular interleaving $Y,X,X,Y$ in the example above.
     \begin{center}
-        \includegraphics{diagram_paths2.pdf} \todo{change to paths3.pdf}
+        \includegraphics{diagram_paths3.pdf}
     \end{center}
     For the labels shown within the grid, define $p_{ij} = \frac{z^X_i}{z^X_i + z^Y_j}$.
     The probability of this particular interleaving $\xi^G$ is given by
     \begin{align*}
         \P^{G}_S(\xi^{G})
         &= (1-p)^{|b^X\; b^Y|} p^{|X\cup Y|-|b^X\;b^Y|} \quad
         \frac{1}{z^X_1+z^Y_1} \P(r^Y_1) \cdot
         \frac{1}{z^X_1+z^Y_2} \P(r^X_1) \cdots \\
         &= (1-p)^{|b^X|} p^{|X|-|b^X|} \cdot (1-p)^{|b^Y|} p^{|Y|-|b^Y|} \\
         &\qquad \cdot
         \frac{z^Y_1}{z^X_1+z^Y_1} \frac{1}{z^Y_1} \P(r^Y_1) \;
         \frac{z^X_1}{z^X_1+z^Y_2} \frac{1}{z^X_1} \P(r^X_1) \;
         \frac{z^X_2}{z^X_2+z^Y_2} \frac{1}{z^X_2} \P(r^X_2)
         \cdots \tag{rewrite fractions}\\
         &=
         \frac{z^Y_1}{z^X_1+z^Y_1}
         \frac{z^X_1}{z^X_1+z^Y_2}
         \frac{z^X_2}{z^X_2+z^Y_2}
         \cdots
         \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \; \P^{G\setminus X}_S(\xi^{G\setminus X})
         \tag{definition} \\
         &= (1-p_{1,1}) \; p_{1,2} \; p_{2,2} \; (1-p_{3,2}) \cdots \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \; \P^{G\setminus X}_S(\xi^{G\setminus X})
         \tag{definition of $p_{i,j}$} \\
         &= \P(\text{path in grid}) \; \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \; \P^{G\setminus X}_S(\xi^{G\setminus X})
     \end{align*}
     In the grid we see that at every point the probabilities sum to 1, and we always reach the end, so we know the sum of all paths in the grid is 1. This proves the required equality.
     We obtain
     \begin{align*}
         \P^{G}_S(\NZ{S} \cap A^X \cap A^Y)
         &= \sum_{\xi^G \in \NZ{S}\cap A^X \cap A^Y} \P^{G}_S(\xi^G) \\
         &= \sum_{\xi^{G\setminus Y} \in \NZ{S}\cap A^X}
            \sum_{\xi^{G\setminus X} \in \NZ{S}\cap A^Y}
             \P^{G\setminus Y}_S(\xi^{G\setminus Y}) \cdot
             \P^{G\setminus X}_S(\xi^{G\setminus X}) \\
         &= \P^{G\setminus Y}_S(\NZ{S} \cap A^X) \; \cdot \; \P^{G\setminus X}_S(\NZ{S} \cap A^Y)
     \end{align*}
 \end{proof}
 \todo{rewrite from here}
 \begin{lemma}[Conditional independence 2] \label{lemma:eventindependenceNewGen}
 	Let $v,w \in [n]$, and let $A$ be any event that depends only on the sites $[v,w]$ (meaning the initialization and resamples) and similarly $B$ an event that depends only on the sites $[w,v]$. (For example $\mathrm{Z}^{(s)}$ or ``$s$ has been resampled at least $k$ times'' for an $s$ on the correct interval). Then we have
 	\begin{align*}
 		\P^{(n)}(\mathrm{NZ}^{(v,w)}\cap A\cap B)
+		=
 		\P_{b_v=b_w=1}^{[v,w]}(\mathrm{NZ}^{(v,w)}\cap A)
 		\; \cdot \;
 		\P^{[w,v]}(\mathrm{NZ}^{(v,w)}\cap B),
 	\end{align*}
 	and similarly
 	\begin{align*}
 		\P^{[n]}(\mathrm{NZ}^{(v)}\cap A\cap B)
+		=
 		\P_{b_v=1}^{[v]}(\mathrm{NZ}^{(v)}\cap A)
 		\; \cdot \;
 		\P^{[v,n]}(\mathrm{NZ}^{(v)}\cap B)
 	\end{align*}
 	where there is no longer a condition on the starting state.
 \end{lemma}
 \begin{proof}
     We start by relating the different Markov Chains.
     If $b$ is a starting state where all the zeroes are inside an interval $[v,w]$ (not on the boundary) then we can switch between the cycle and the finite chain:
     \begin{align*}
         \P^{(n)}_{b} (\NZ{v,w} \cap A) = \P^{[v,w]}_b (\NZ{v,w}\cap A) .
     \end{align*}
     If vertex $v$ and $w$ never become zero, then the zeroes never get outside of the interval $[v,w]$ and we can ignore the entire circle and only focus on the process within $[v,w]$.
     We can apply this to the result of Lemma \ref{lemma:splitting}, to get
     \begin{align*}
         \P^{(n)}_b(\mathrm{NZ}^{(v,w)} \cap A \cap B)
         &=
         \P^{[v,w]}_{b|_{[v,w]}}(\mathrm{NZ}^{(v,w)} \cap A)
         \; \cdot \;
         \P^{[w,v]}_{b|_{[w,v]}}(\mathrm{NZ}^{(v,w)} \cap B)
     \end{align*}
     Note that this also holds if $b$ has zeroes on the boundary (i.e. $b_v=0$ or $b_w=0$), because then both sides of the equations are zero.
     For the starting state we have the expression $\P^{(n)}(\start{b}) = (1-p)^{|b|} p^{n-|b|}$ so it splits into a product
     \begin{align*}
         \P^{(n)}(\start{b}) = \P^{[v,w]}(\start{b|_{[v+1,w-1]}}) \;\; \P^{[w,v]}(\start{b|_{[w,v]}})
     \end{align*}
     where we have to be careful to count the boudary only once.
     We now have
     \begin{align*}
 		\P^{(n)}(\mathrm{NZ}^{(v,w)}\cap A\cap B)
         &= \sum_{b\in\{0,1\}^n} \P^{(n)}_b(\mathrm{NZ}^{(v,w)}\cap A\cap B) \; \P^{(n)}(\start{b}) \\
         &= \sum_{b\in\{0,1\}^n}
             \P^{[v,w]}_{b|_{[v,w]}}(\mathrm{NZ}^{(v,w)}\cap A)
             \P^{[v,w]}(\start{b|_{[v+1,w-1]}})
             \\ &\qquad\qquad\quad\cdot
             \P^{[w,v]}_{b|_{[w,v]}}(\mathrm{NZ}^{(v,w)}\cap B)
             \P^{[w,v]}(\start{b|_{[w,v]}}) \\
         &= \left( \sum_{\substack{b_1\in\{0,1\}^{[v,w]}\\ b_v=b_w=1}}
             \P^{[v,w]}_{b_1}(\mathrm{NZ}^{(v,w)}\cap A)
             \P^{[v,w]}(\start{b_1}) \right)
             \\ &\qquad \cdot
            \left( \sum_{b_2\in\{0,1\}^{[w,v]}}
             \P^{[w,v]}_{b_2}(\mathrm{NZ}^{(v,w)}\cap B)
             \P^{[w,v]}(\start{b_2}) \right) \\

0 comments (0 inline, 0 general)