AENC/resampling_chain Changeset - 9f86b29b20b0 · Centrum Wiskunde & Informatica (CWI)

Changeset - 9f86b29b20b0

Parent rev.

Child rev.

[Not reviewed]

0 3 0

Tom Bannink - 8 years ago 2017-05-24 16:42:56
tom.bannink@cwi.nl

Update weak claim proof and path diagram

3 files changed with 38 insertions and 20 deletions:

diagram_paths.pdf

bin+mod

diagram_paths.tex

main.tex

0 comments (0 inline, 0 general)

diagram_paths.pdf

➞

Show inline comments

binary diff not shown

diagram_paths.tex

➞

Show inline comments

@@ @@ -23,19 +23,24 @@ @@
+        }
         \draw [->] (\x,6) -- (\x+0.9,6);
+    }
     \foreach \y in {0,...,5}
         \draw [->] (8,\y) -- (8,\y+0.9);
     \draw (8,-0.2) node {$n_1$};
     \draw (-0.2,6) node {$n_2$};
     \draw (4,-0.5) node {step of path 1};
     \node[rotate=90,anchor=south,xshift=3cm,yshift=0.5cm] {step of path 2};
     \draw(-0.1,-0.4) node {$b_1\land b_2$};
     \draw(8,-0.4) node {$\mathbf{1} \land b_2$};
     \draw (-0.2,6.3) node {$b_1\land\mathbf{1}$};
     \draw (8.2,6.3) node {$\mathbf{1}$};
     \draw (4,-0.5) node {$\to$ steps of $\xi_1$};
     \node[rotate=90,anchor=south,xshift=3cm,yshift=0.5cm] {$\to$ steps of $\xi_2$};
     \draw[fill,red] (0,0) circle (0.08);
     \draw[fill,red] (8,0) circle (0.05);
     \draw[fill,red] (0,6) circle (0.05);
     \draw[fill,red] (8,6) circle (0.08);
     \def\x{5};
     \def\y{3};
     \draw[fill,black] (\x,\y) circle (0.07);
     \draw[fill=white,draw=black] (\x+0.25,\y-0.26) rectangle +(0.5,0.5);

main.tex

➞

Show inline comments

@@ @@ -54,12 +54,13 @@ @@
 \newcommand{\BQP}{\textsf{BQP}}
 \newcommand{\NP}{\textsf{NP}}
 \newcommand{\SharpP}{\textsf{\# P}}
 \newcommand{\diam}[1]{\mathcal{D}\left(#1\right)}
 \newcommand{\paths}[1]{\mathcal{P}\left(#1\to\mathbf{1}\right)}
 \newcommand{\gapsum}[1]{\mathrm{gapsum}\left(#1\right)}
 \long\def\ignore#1{}
 \newtheorem{theorem}{Theorem}
 \newtheorem{corollary}[theorem]{Corollary}%[theorem]
 \newtheorem{lemma}[theorem]{Lemma}
@@ @@ -144,13 +145,15 @@ @@
 &	0 & 1 & 2 & 3+2/3 & \cellcolor{blue!25}6.44 & 10.8 & 17.3 & 26.65 & 39.43 & 56.48 & 78.65 & 106.9 & 142.2 & 185.8 & 238.7 & 302.41 & 378.05 & 467.13 & 571.14 & 691.69 & 830.44 \\
 &	0 & 1 & 2 & 3+2/3 & 6.44 & \cellcolor{blue!25}11.0 & 18.5 & 30.02 & 47.10 & 71.68 & 106.0 & 152.9 & 215.4 & 297.4 & 403.1 & 537.21 & 705.25 & 913.31 & 1168.2 & 1477.4 & 1849.1 \\
 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & \cellcolor{blue!25}18.7 & 31.21 & 50.83 & 80.80 & 125.3 & 189.7 & 280.8 & 407.0 & 578.6 & 808.13 & 1110.2 & 1502.6 & 2005.6 & 2643.2 & 3443.1 \\
 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & \cellcolor{blue!25}31.44 & 52.08 & 84.95 & 136.0 & 213.6 & 328.9 & 496.5 & 735.6 & 1070.7 & 1532.5 & 2159.5 & 2998.8 & 4108.1 & 5556.7 \\
 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & 31.44 & \cellcolor{blue!25}52.30 & 86.27 & 140.7 & 226.3 & 358.4 & 558.4 & 855.4 & 1289.0 & 1911.5 & 2791.4 & 4017.2 & 5701.4 & 7985.9 \\
 &	0 & 1 & 2 & 3+2/3 & 6.44 & 11.0 & 18.7 & 31.44 & 52.30 & \cellcolor{blue!25}86.49 & 142.1 & 231.6 & 373.4 & 594.8 & 934.4 & 1447.1 & 2209.0 & 3324.6 & 4934.8 & 7226.9 & 10447. \\
 		\end{tabular}
             \vdots \\
 & 0 & 1 & 2 & 3+2/3 & 6.44 & 11.08 & 18.76 & 31.45 & 52.31 & 86.49 & 142.33 & 233.31 & 381.17 & 621.02 & 1009.38 & \cellcolor{blue!25}1637.13 & % 2650.74 & 4285.68 & 6913.55 & 11171.2 & 18052.2
         \end{tabular}
+	}
 	\end{table}
 	We observe that this is a power series in $p$. We discovered a very regular structure in this power series. It seems that for all $k\in\mathbb{N}$ and for all $n>k$ we have that $a^{(n)}_k$ is constant, this conjecture we verified using a computer up to $n=14$.
 	\newpage
 	\noindent Based on our calculations presented in Table~\ref{tab:coeffs} and Figure~\ref{fig:coeffs_conv_radius} we make the following conjectures:
@@ @@ -158,13 +161,13 @@ @@
 		\item $\forall k\in\mathbb{N}, \forall n\geq 3 : a^{(n)}_k\geq 0$	\label{it:pos}
         (A simpler version: $\forall k>0: a_k^{(3)}=(k+1)(k+2)/6$)
 		\item $\forall k\in\mathbb{N}, \forall n>m\geq 3 : a^{(n)}_k\geq a^{(m)}_k$ \label{it:geq}
 		\item $\forall k\in\mathbb{N}, \forall n,m\geq \max(k,3) : a^{(n)}_k=a^{(m)}_k$ \label{it:const}
   		\item $\exists p_c=\lim\limits_{k\rightarrow\infty}1\left/\sqrt[k]{a_{k}^{(k+1)}}\right.$ \label{it:lim}
 	\end{enumerate}
-	We also conjecture that $p_c\approx0.62$, see Figure~\ref{fig:coeffs_conv_radius}.
+	We also conjecture that $p_c\approx0.61$, see Figure~\ref{fig:coeffs_conv_radius}.
 	\begin{figure}[!htb]\centering
 	\includegraphics[width=0.5\textwidth]{coeffs_conv_radius.pdf}
 	%\includegraphics[width=0.5\textwidth]{log_coeffs.pdf}
 	\caption{$1\left/\sqrt[k]{a_{k}^{(k+1)}}\right.$} %$\frac{1}{\sqrt[k]{a_k^{(k+1)}}}$
 	\label{fig:coeffs_conv_radius}
     &\quad + 432.302 p^{17} + 862.926 p^{18} + 1662.05 p^{19} + 3112.9 p^{20} + \mathcal{O}(p^{21})
 \end{align*}
 and indeed the lowest order is $\diam{C}=9$.
+~
 A weaker version of the claim is that if $C$ contains a gap of size $k$, then the sum is zero up to and including order $p^{k-1}$.
+A weaker version of the claim is that if $C$ contains a gap of size $k$, then the sum is zero up to and including order $p^{|C|+k-1}$.
 \begin{claim}[Weak cancellation claim] \label{claim:weakcancel}
 	For $C\subseteq[n]$ a configuration of slot positions, the lowest order term in
     \begin{align*}
         \sum_{f\in\{0,1'\}^{|C|}} \rho_{C(f)} R_{C(f)} ,
     \end{align*}
 	is at least $p^{\mathrm{gap}(C)}$ when $n$ is large enough. Here $\mathrm{gap}(C)$ is defined as in Figure \ref{fig:diametergap}, its the size of the largest gap of $C$ within the diameter of $C$. All lower order terms cancel out.
+	is at least $p^{|C|+\mathrm{gap}(C)}$ when $n$ is large enough. Here $\mathrm{gap}(C)$ is defined as in Figure \ref{fig:diametergap}, its the size of the largest gap of $C$ within the diameter of $C$. All lower order terms cancel out.
 \end{claim}
 This weaker version would imply \ref{it:const} but for $\mathcal{O}(k^2)$ as opposed to $k+1$.
 \newpage
 The reason that claim \ref{claim:strongcancel} would prove \ref{it:const} is the following:
 For a starting configuration that \emph{does} give a nonzero contribution, you can take that same starting configuration and translate it to get $n$ other configurations that give the same contribution. Therefore the coefficient in the expected number of resamplings is a multiple of $n$ which Andr\'as already divided out in the definition of $R^{(n)}(p)$. To show \ref{it:const} we argue that this is the \emph{only} dependency on $n$. This is because there are only finitely many (depending on $k$ but not on $n$) configurations where the $k$ slots are nearby regardless of the value of $n$. So there are only finitely many nonzero contributions after translation symmetry was taken out. For example, when considering all starting configurations with 5 slots one might think there are $\binom{n}{5}$ configurations to consider which would be a dependency on $n$ (more than only the translation symmetry). But since most of these configurations have a diameter larger than $k$, they do not contribute to $a_k$. Only finitely many do and that does not depend on $n$.
 	We say two paths $\xi_i\in\paths{b_i}$ ($i=1,2$) of the Markov Chain are \emph{independent} if $\xi_1$ never resamples a site that was ever zero in $\xi_2$ and the other way around. It is allowed that $\xi_1$ resamples a $1$ to a $1$ that was also resampled from $1$ to $1$ by $\xi_2$ and vice versa. If the paths are not independent then we call the paths \emph{dependent}.
 \end{definition}
 The key ingredient of the proof is the following claim:
 \begin{claim}[Sum of expectation values] \label{claim:expectationsum}
 When $b=b_1\land b_2\in\{0,1\}^n$ is a state with two groups ($b_1\lor b_2 = 1^n$) of zeroes with $k$ $1$s inbetween the groups, then we have $R_b(p) = R_{b_1}(p) + R_{b_2}(p) + \mathcal{O}(p^{k})$ where $b_1$ and $b_2$ are the configurations where only one of the groups is present and the other group has been replaced by $1$s. To be precise, the sums agree up to and including order $p^{k-1}$.
 \end{claim}
 For example for $b_1 = 10111111$ and $b_2 = 11111000$ we have $b=10111000$ and $k=3$. The claim says that the expected time to reach $\mathbf{1}$ from $b$ is the time to make the first group $1$ plus the time to make the second group $1$, as if they are independent. When going up to order $p^{k}$ or higher, there will be terms where the groups interfere so they are no longer independent.
 \textbf{Example}: For $b_1 = 0111111$ and $b_2 = 1111010$ we have $b=0111010$ and $k=3$. The claim says that the expected time to reach $\mathbf{1}$ from $b$ is the time to make the first group $1$ plus the time to make the second group $1$, as if they are independent. Simulation shows that
 \begin{align*}
     R_{b_1} &= 1 + 3p + 7p^2 + 14.67p^3 + 29p^4 + \mathcal{O}(p^5)\\
     R_{b_2} &= 2 + 5p + 10.67p^2 + 21.11p^3+40.26p^4 + \mathcal{O}(p^5)\\
     R_{b} &= 3 + 8p + 17.67p^2 + 34.78p^3+65.27p^4 + \mathcal{O}(p^5)\\
     R_{b_1} + R_{b_2} &= 3 + 8p + 17.67p^2+35.78p^3 + 69.26p^4 +\mathcal{O}(p^5)
 \end{align*}
 and indeed the sums agree up to order $p^{k-1}=p^2$. When going up to order $p^{k}$ or higher, there will be terms where the groups interfere so they are no longer independent.
+~
 \begin{proof}
 Consider a path $\xi_1\in\paths{b_1}$ and a path $\xi_2\in\paths{b_2}$ such that $\xi_1$ and $\xi_2$ are independent (Definition \ref{def:independence}). The paths $\xi_1,\xi_2$ induce $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ different paths of total length $|\xi_1|+|\xi_2|$ in $\paths{b_1\land b_2}$. In the sums $R_{b_1}$ and $R_{b_2}$, the contribution of these paths are $\mathbb{P}[\xi_1]\cdot |\xi_1|$ and $\mathbb{P}[\xi_2]\cdot |\xi_2|$. The next diagram shows how these $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths contribute to $R_{b_1\land b_2}$. At every step one has to choose between doing a step of path 1 or a step of path 2. The number of zeroes in the current state determine probabilities with which this happens (aside from the probabilities associated to the two original paths already). The grid below shows that at every point one can choose to do a step of path 1 with probability $p_i$ or a step of path 2 with probability $1-p_i$. These $p_i$ could in principle be different at every point in this grid. The weight of such a new path is the weight of the path in the diagram below, multiplied by $\mathbb{P}[\xi_1]\cdot\mathbb{P}[\xi_2]$. By induction one can show that the sum over all $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths in the grid is $1$. Hence the contribution of all $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths together to $R_{b_1\land b_2}$ is given by
 Consider a path $\xi_1\in\paths{b_1}$ and a path $\xi_2\in\paths{b_2}$ such that $\xi_1$ and $\xi_2$ are independent (Definition \ref{def:independence}). The paths $\xi_1,\xi_2$ induce $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ different paths of total length $|\xi_1|+|\xi_2|$ in $\paths{b_1\land b_2}$. In the sums $R_{b_1}$ and $R_{b_2}$, the contribution of these paths are $\mathbb{P}[\xi_1]\cdot |\xi_1|$ and $\mathbb{P}[\xi_2]\cdot |\xi_2|$. The next diagram shows how these $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths contribute to $R_{b_1\land b_2}$. At every step one has to choose between doing a step of $\xi_1$ or a step of $\xi_2$. The number of zeroes in the current state determine the probabilities with which this happens (beside the probabilities associated to the two original paths already). The grid below shows that at every point one can choose to do a step of $\xi_1$ with probability $p_i$ or a step of $\xi_2$ with probability $1-p_i$. These $p_i$ could in principle be different at every point in this grid.
 \begin{center}
 \includegraphics{diagram_paths.pdf}
 \end{center}
 The weight of such a new path is the weight of the path in the diagram, multiplied by $\mathbb{P}[\xi_1]\cdot\mathbb{P}[\xi_2]$. By induction one can show that the sum over all $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths in the grid is $1$. Hence the contribution of all $\binom{|\xi_1|+|\xi_2|}{|\xi_1|}$ paths together to $R_{b_1\land b_2}$ is given by
 \[
 \mathbb{P}[\xi_1]\cdot\mathbb{P}[\xi_2]\cdot(|\xi_1|+|\xi_2|) = \mathbb{P}[\xi_2]\cdot\mathbb{P}[\xi_1]\cdot|\xi_1| \;\; + \;\; \mathbb{P}[\xi_1]\cdot\mathbb{P}[\xi_2]\cdot|\xi_2|.
 \]
 Ideally we would now like to sum this expression over all possible paths $\xi_1,\xi_2$ and use $p_\mathrm{tot}:=\sum_{\xi\in\paths{b_i}} \mathbb{P}[\xi] = 1$ (which also holds up to arbitrary order in $p$). The above expression would then become $R_{b_1} + R_{b_2}$. However, not all paths in the sum would satisfy the independence condition so it seems we can't do this. We now argue that it works up to order $p^{k-1}$.
 For all $\xi\in\paths{b_1\land b_2}$ we have that \emph{either} $\xi$ splits into two independent paths $\xi_1,\xi_2$ as above, \emph{or} it does not. In the latter case, when $\xi$ can not be split like that, we know $\mathbb{P}[\xi]$ contains a power $p^k$ or higher because there is a gap of size $k$  and the paths must have moved at least $k$ times `towards each other' (for example one path moves $m$ times to the right and the other path moves $k-m$ times to the left). So the total weight of such a combined path is at least order $p^k$. Therefore we have
 \[
     &= \sum_{\xi_1\in\paths{b_1}} \sum_{\xi_2\in\paths{b_2}} \mathbb{P}[\xi_2]\mathbb{P}[\xi_1]|\xi_1| + \mathcal{O}(p^k) \\
     &= \sum_{\xi_1\in\paths{b_1}} \mathbb{P}[\xi_1]|\xi_1| + \mathcal{O}(p^k) \\
     &= R_{b_1} + \mathcal{O}(p^k)
 \end{align*}
 we can do the same with the second term and this proves the claim.
 \end{proof}
 \begin{center}
 \includegraphics{diagram_paths.pdf}
 \end{center}
 \textbf{Proof of claim \ref{claim:weakcancel}}: Say we have a group on the left with $l$ slots and a group on the right with $r$ slots, with enough space between the groups. Then on the left we have strings in $\{0,1'\}^l$ as possibilities and on the right we have strings in $\{0,1'\}^r$. The combined configuration can be described by strings $(a,b)\in\{0,1'\}^{l+r}$. Such a configuration has probability $(-1)^{|a|+|b|} p^{r+l}$ in $\rho$ and by claim \ref{claim:expectationsum} we know $R_{(a,b)} = R_a + R_b + \mathcal{O}(p^\mathrm{spacing})$. The total contribution of these configurations is therefore
 ~\\
 \textbf{Proof of claim \ref{claim:weakcancel}}: We can assume $C$ consists of a group on the left with $l$ slots and a group on the right with $r$ slots (so $r+l=|C|$), with a gap of size $k=\mathrm{gap}(C)$ between these groups. Then on the left we have strings in $\{0,1'\}^l$ as possibilities and on the right we have strings in $\{0,1'\}^r$. The combined configuration can be described by strings $f=(a,b)\in\{0,1'\}^{l+r}$. The initial probability of such a state $C(a,b)$ is $\rho_{C(a,b)} = (-1)^{|a|+|b|} p^{r+l}$ and by claim \ref{claim:expectationsum} we know $R_{C(a,b)} = R_{C(a)} + R_{C(b)} + \mathcal{O}(p^k)$ where $C(a)$ indicates that only the left slots have been filled by $a$ and the other slots are filled with $1$s. The total contribution of these configurations is therefore
 \begin{align*}
 	\sum_{a\in\{0,1'\}^l} \sum_{b\in\{0,1'\}^r} (-1)^{|a|+|b|}p^{r+l} \left( R_a + R_b \right) + \mathcal{O}(p^\mathrm{spacing})
     &= p^{r+l}\sum_{a\in\{0,1'\}^l} (-1)^{|a|} R_a \sum_{b\in\{0,1'\}^r} (-1)^{|b|} \\
     &\quad + p^{r+l}\sum_{b\in\{0,1'\}^r} (-1)^{|b|} R_b \sum_{a\in\{0,1'\}^l} (-1)^{|a|} \\
     &\quad + \mathcal{O}(p^\mathrm{spacing})\\
     &= 0 + \mathcal{O}(p^\mathrm{spacing})
     \sum_{f\in\{0,1'\}^{|C|}} \rho_{C(f)} R_{C(f)}
     &= \sum_{a\in\{0,1'\}^l} \sum_{b\in\{0,1'\}^r} (-1)^{|a|+|b|}p^{r+l} \left( R_{C(a)} + R_{C(b)} + \mathcal{O}(p^k) \right) \\
     &=\;\;\; p^{r+l}\sum_{a\in\{0,1'\}^l} (-1)^{|a|} R_{C(a)} \sum_{b\in\{0,1'\}^r} (-1)^{|b|} \\
     &\quad + p^{r+l}\sum_{b\in\{0,1'\}^r} (-1)^{|b|} R_{C(b)} \sum_{a\in\{0,1'\}^l} (-1)^{|a|}
         + \mathcal{O}(p^{r+l+k})\\
     &= 0 + \mathcal{O}(p^{|C|+k})
 \end{align*}
 where we used the identity $\sum_{a\in\{0,1\}^l} (-1)^{|a|} = 0$.
 \newpage
     \subsection{Sketch of the (false) proof of the linear bound \ref{it:const}}
     Let us interpret $[n]$ as the vertices of a length-$n$ cycle, and interpret operations on vertices mod $n$ s.t. $n+1\equiv 1$ and $1-1\equiv n$.
     %\begin{definition}[Resample sequences]
     %	A sequence of indices $(r_\ell)=(r_1,r_2,\ldots,r_k)\in[n]^k$ is called resample sequence if our procedure performs $k$ consequtive resampling, where the first resampling of the procedure resamples around the mid point $r_1$ the second around $r_2$ and so on. Let $RS(k)$ the denote the set of length $k$ resample sequences, and let $RS=\cup_{k\in\mathbb{N}}RS(k)$.
     %\end{definition}
     %\begin{definition}[Constrained resample sequence]\label{def:constrainedRes}

0 comments (0 inline, 0 general)