diff --git a/README b/README new file mode 100644 index 0000000000000000000000000000000000000000..9b3d09055bc0f54c4f7c0afbd559fae775e5b0c0 --- /dev/null +++ b/README @@ -0,0 +1,4 @@ +# Numerical analysis of resample markov chain + +The montecarlo directory contains cpp programs to run the markov chain and sample from it. + diff --git a/montecarlo/Makefile b/montecarlo/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d0d660a1548a472de516bb04bf48602c046fb41c --- /dev/null +++ b/montecarlo/Makefile @@ -0,0 +1,10 @@ +CXXFLAGS += -std=c++14 -O3 +CXXFLAGS += -Wall -Wextra -Wfatal-errors -Werror -pedantic -Wno-deprecated-declarations + +TARGETS += resampler_cycle resampler_segment + +all: $(TARGETS) + +clean: + rm -f $(TARGETS) + diff --git a/montecarlo/fenwicktree.hpp b/montecarlo/fenwicktree.hpp new file mode 100644 index 0000000000000000000000000000000000000000..76ef445c4897b5475c5d1185ff1bd0b98925f4e6 --- /dev/null +++ b/montecarlo/fenwicktree.hpp @@ -0,0 +1,51 @@ +// Taken from +// https://kartikkukreja.wordpress.com/2013/05/11/bit-fenwick-tree-data-structure-c-implementation/ +// Has MIT licence +// Slightly modified +#pragma once + +#define LSOne(S) (S & (-S)) + +template +class BIT { + T* ft; + size_t size; +public: + // initialize a BIT of n elements to zero + BIT(size_t n) { + size = n; + ft = new T[n+1](); // () sets it to zero + } + + ~BIT() { + delete [] ft; + } + + // returns sum of the range [1...b] + T sum(size_t b) { + T sum = 0; + for (; b; b -= LSOne(b)) sum += ft[b]; + return sum; + } + + // returns sum of the range [a...b] + T sum(size_t a, size_t b) { + return sum(b) - (a == 1 ? 0 : sum(a - 1)); + } + + // update value of the k-th element by v (v can be +ve/inc or -ve/dec) + // i.e., increment or decrement kth element by a value v + void update(size_t k, T v) { + for (; k <= size; k += LSOne(k)) ft[k] += v; + } + + // divide each original frequency by c + void scaleDown(T c){ + for (int i=1 ; i<=size ; i++) ft[i] /= c; + } + + // multiply each original frequency by c + void scaleUp(T c){ + for (int i=1 ; i<=size ; i++) ft[i] *= c; + } +}; diff --git a/montecarlo/resampler.hpp b/montecarlo/resampler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8497dbbce98e1f4c275792248d651601b2e78c99 --- /dev/null +++ b/montecarlo/resampler.hpp @@ -0,0 +1,100 @@ +#include "fenwicktree.hpp" +#include +#include +#include + +enum VState { BAD = 0, GOOD = 1 }; + +// Base class for resampler +// Contains code for storing n vertices and efficiently picking +// a random BAD using a fenwick tree data structure. +// The graph structure should be specified by the subclass, +// by implementing a function +// ``void resampleNeighbors(int i) { +// resampleVertex(i); +// resampleVertex(j1); +// // ... +// resampleVertex(jk); +// }`` +template +class ResamplerBase { + public: + // p - probability of sampling a BAD + // n - total number of vertices + // rng - random number generator, usually std::mt19937 + // Default initial state is all-GOOD + ResamplerBase(float p_, size_t n, RNG& rng_) + : p(p_), state(n, GOOD), fwTree(n), nBads(0), rng(rng_), distr(p) { + assert(p >= 0.0f && p <= 1.0f); + } + + // Returns the chosen site or -1 if in all-good state + int doMove() { + if (nBads == 0) + return -1; + assert(fwTree.sum(state.size()) == nBads); + std::uniform_int_distribution<> intDist(1, nBads); + unsigned int a = intDist(rng); + // There are 'nBads' 1's in state + // Select the a'th Bad using a binary search on the fenwicktree + // Where a is 1-based, as well as the fenwick tree + int i = 0; + { + // The a-th Bad is in the range + // [x_lower, ..., x_upper-1] + // where lower and upper are 0-based + int lower = 0; + int upper = state.size(); + + while (upper - lower > 1) { + int cur = (upper + lower) / 2; + + // Check the number of 1's in [x0,...,x_cur-1] + // fwTree is 1-based so no -1 needed + if (fwTree.sum(cur) >= a) { + // The a-th 1 is in [x_lower,...,x_cur-1] + upper = cur; + } else { + // The a-th 1 is in the range [x_cur,...,x_upper-1] + lower = cur; + } + } + i = lower; + } + + // state[i] is now the a'th Bad + ((Derived*)this)->resampleNeighbors(i); + return i; + } + + size_t getN() const { return state.size(); }; + size_t numBads() const { return nBads; } + + const std::vector& getState() const { return state; } + + protected: + void setVertex(int i, VState value) { + if (state[i] == BAD) { + fwTree.update(i + 1, -1); + nBads--; + } + state[i] = value; + if (state[i] == BAD) { + fwTree.update(i + 1, 1); + nBads++; + } + } + + // Used by Derived::resampleNeighbors + void resampleVertex(int i) { setVertex(i, (distr(rng) ? BAD : GOOD)); } + + private: + float p; + std::vector state; + BIT fwTree; // fenwicktree + size_t nBads; + + RNG& rng; + std::bernoulli_distribution distr; +}; + diff --git a/montecarlo/resampler_cycle.cpp b/montecarlo/resampler_cycle.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf25d72d052f1e622eed54392a8200595d3de762 --- /dev/null +++ b/montecarlo/resampler_cycle.cpp @@ -0,0 +1,124 @@ +#include +#include +#include "resampler.hpp" + +std::mt19937 mt(std::random_device{}()); + +class CycleResampler : public ResamplerBase { + public: + CycleResampler(float p_, size_t n, bool allBads = true) + : ResamplerBase(p_, n, mt) { + assert(n >= 3); + if (allBads) { + for (auto i = 0u; i < n; ++i) { + setVertex(i, BAD); + } + } else { + setVertex(0, BAD); + } + } + + // Cycle has periodic boundary conditions + void resampleNeighbors(int i) { + resampleVertex(i); + + if (i == 0) + resampleVertex(getN() - 1); + else + resampleVertex(i - 1); + + if (i + 1 == (int)getN()) + resampleVertex(0); + else + resampleVertex(i + 1); + } + + // Returns number of steps before all good + int run(int limit = -1) { + int t; + for (t = 0; t != limit && numBads() != 0; t++) + doMove(); + return t; + } +}; + +int main() { + //if (0) { + CycleResampler sampler(0.6f, 40); + int loc; + for (int i = 0; i < 2000; ++i) { + loc = sampler.doMove(); + if (loc == -1) + break; + std::cout << '|'; + int j = 0; + for (auto x : sampler.getState()) { + if (j++ == loc) { + std::cout << 'X'; + } else { + std::cout << (x == BAD ? '*' : ' '); + } + } + std::cout << '|' << std::endl; + } + //} + + // Measure mixing time for various n,p + // starting from all-1 state + if (0) { + std::cout << "{{0,0,0}"; + for (int i = 0; i <= 8; ++i) { + float p = 0.6f + 0.01f * i; + for (int n = 5; n <= 25; ++n) { + std::cerr << "Running p = " << p << " and n = " << n + << std::endl; + for (int j = 0; j < 1000; ++j) { + CycleResampler sampler(p, n); + int time = sampler.run(500000); + std::cout << ',' << '{' << p << ',' << n << ',' << time + << '}'; + } + std::cout << std::flush; + } + } + std::cout << '}' << std::endl; + } + + // Measure probability of hitting all-good + // starting from single-bad and stopping if some condition is met + // Interesting range is p in [0.60,0.75] + std::cout << "{{0,0,0}"; + for (int i = 0; i <= 100; ++i) { + float p = 0.60f + 0.001f * i; + for (size_t n = 1000; n <= 1000; n += 200) { + std::cerr << "Running p = " << p << " and n = " << n << std::endl; + constexpr int samples = 500; + int hitCount = 0; + int otherHitCount = 0; + for (int j = 0; j < samples; ++j) { + CycleResampler sampler(p, n, false); + for (int t = 0; t != 500000; t++) { + sampler.doMove(); + if (sampler.numBads() == 0) { + hitCount++; + break; + } + //if (sampler.numBads() >= (5*n)/10) { + if (sampler.getState()[n/2] == BAD) { + otherHitCount++; + break; + } + } + } + std::cout << ',' << '{' << p << ',' << n << ',' + << float(hitCount) / float(samples) << '}'; + std::cout << std::flush; + + std::cerr << (samples - otherHitCount - hitCount) << '/' << samples + << " runs with timelimit.\n"; + } + } + std::cout << '}' << std::endl; + + return 0; +} diff --git a/montecarlo/resampler_segment.cpp b/montecarlo/resampler_segment.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3321b5948cd92538331a632b884ef6602de720e5 --- /dev/null +++ b/montecarlo/resampler_segment.cpp @@ -0,0 +1,85 @@ +#include +#include +#include "resampler.hpp" + +std::mt19937 mt(std::random_device{}()); + +class SegmentResampler : public ResamplerBase { + public: + SegmentResampler(float p_, size_t n) : ResamplerBase(p_, n, mt) { + setVertex(0, BAD); + } + + // No periodic boundary + void resampleNeighbors(int i) { + resampleVertex(i); + + if (i != 0) + resampleVertex(i - 1); + if (i + 1 != (int)getN()) + resampleVertex(i + 1); + } +}; + +void doRun(float p, size_t n) { + std::cerr << "Running p = " << p << " and n = " << n << std::endl; + + constexpr int samples = 500; + + int terminateCount = 0; + int hitnCount = 0; + for (int j = 0; j < samples; ++j) { + SegmentResampler sampler(p, n); + for (int t = 0; t != 500000; t++) { + sampler.doMove(); + if (sampler.numBads() == 0) { + terminateCount++; + break; + } + if (sampler.getState()[n - 1] == BAD) { + hitnCount++; + break; + } + } + } + if (terminateCount + hitnCount != samples) { + std::cerr << (samples - terminateCount - hitnCount) << '/' << samples + << " runs with timelimit!\n"; + } + + std::cout << '{' << p << ',' << n << ','; + std::cout << hitnCount << '/' << hitnCount + terminateCount << '}'; + std::cout << std::flush; +} + +int main() { + // Measure probability of hitting vertex N starting from single-bad + // Interesting range is p in [0.60,0.75] + std::cout << "(* Probability of hitting site n when starting from single " + "BAD at 0 on segment [0,n].\n"; + std::cout << "Data-format: {p, n, probability} *)\n"; + std::cout << "{\n"; + for (size_t n = 600; ; n += 200) { + doRun(0.10f, n); + std::cout << ','; + + for (int i = 0; i <= 100; ++i) { + float p = 0.60f + 0.001f * i; + doRun(p, n); + std::cout << ','; + } + doRun(0.75f, n); + std::cout << ','; + doRun(0.80f, n); + std::cout << ','; + doRun(0.90f, n); + + if (n == 1000) + break; + else + std::cout << ','; + } + std::cout << '}' << std::endl; + + return 0; +}