Files @ fda8425fac05
Branch filter:

Location: AENC/switchchain/cpp/switchchain.cpp

Tom Bannink
Add scatter plot of GCM success rate vs mixing time
#include "exports.hpp"
#include "graph.hpp"
#include "powerlaw.hpp"
#include <algorithm>
#include <fstream>
#include <iostream>
#include <numeric>
#include <random>
#include <vector>

// Its assumed that u,v are distinct.
// Check if all four vertices are distinct
bool edgeConflicts(const Edge& e1, const Edge& e2) {
    return (e1.u == e2.u || e1.u == e2.v || e1.v == e2.u || e1.v == e2.v);
}

class SwitchChain {
  public:
    SwitchChain()
        : mt(std::random_device{}()), permutationDistribution(0.5)
    // permutationDistribution(0, 2)
    {
        // random_device uses hardware entropy if available
        // std::random_device rd;
        // mt.seed(rd());
    }
    ~SwitchChain() {}

    bool initialize(const Graph& gstart) {
        if (gstart.edgeCount() == 0)
            return false;
        g = gstart;
        edgeDistribution.param(
            std::uniform_int_distribution<>::param_type(0, g.edgeCount() - 1));
        return true;
    }

    bool doMove() {
        int e1index, e2index;
        int timeout = 0;
        // Keep regenerating while conflicting edges
        do {
            e1index = edgeDistribution(mt);
            e2index = edgeDistribution(mt);
            if (++timeout % 100 == 0) {
                std::cerr << "Warning: sampled " << timeout
                          << " random edges but they keep conflicting.\n";
            }
        } while (edgeConflicts(g.getEdge(e1index), g.getEdge(e2index)));

        // Consider one of the three possible permutations
        // 1) e1.u - e1.v and e2.u - e2.v (original)
        // 2) e1.u - e2.u and e1.v - e2.v
        // 3) e1.u - e2.v and e1.v - e2.u
        bool switchType = permutationDistribution(mt);
        return g.exchangeEdges(e1index, e2index, switchType);
    }

    Graph g;
    std::mt19937 mt;
    std::uniform_int_distribution<> edgeDistribution;
    //std::uniform_int_distribution<> permutationDistribution;
    std::bernoulli_distribution permutationDistribution;
};

//
// Assumes degree sequence does NOT contain any zeroes!
//
// method2 = true  -> take highest degree and finish its pairing completely
// method2 = false -> take new highest degree after every pairing
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
    // that remain, simply pair up with random ones
    unsigned int n = ds.size();

    // degree, vertex index
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
    for (unsigned int i = 0; i < n; ++i) {
        degrees[i].first = ds[i];
        degrees[i].second = i;
    }

    std::vector<decltype(degrees.begin())> available;
    available.reserve(n);

    // Clear the graph
    g.reset(n);

    while (!degrees.empty()) {
        std::shuffle(degrees.begin(), degrees.end(), rng);
        // Get the highest degree:
        // If there are multiple highest ones, we pick a random one,
        // ensured by the shuffle.
        // The shuffle is needed anyway for the remaining part
        unsigned int dmax = 0;
        auto uIter = degrees.begin();
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
            if (iter->first >= dmax) {
                dmax = iter->first;
                uIter = iter;
            }
        }

        if (dmax > degrees.size() - 1)
            return false;

        if (dmax == 0) {
            std::cerr << "ERROR 1 in GCM.\n";
        }

        unsigned int u = uIter->second;

        if (method2) {
            // Take the highest degree out of the vector
            degrees.erase(uIter);

            // Now assign randomly to the remaining vertices
            // Since its shuffled, we can pick the first 'dmax' ones
            auto vIter = degrees.begin();
            while (dmax--) {
                if (vIter->first == 0)
                    std::cerr << "ERROR in GCM2.\n";
                if (!g.addEdge({u, vIter->second}))
                    std::cerr << "ERROR. Could not add edge in GCM2.\n";
                vIter->first--;
                if (vIter->first == 0)
                    vIter = degrees.erase(vIter);
                else
                    vIter++;
            }
        } else {
            // Pair with a random vertex that is not u itself and to which
            // u has not paired yet!
            available.clear();
            for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
                if (iter->second != u && !g.hasEdge({u, iter->second}))
                    available.push_back(iter);
            }
            if (available.empty())
                return false;
            std::uniform_int_distribution<> distr(0, available.size() - 1);
            auto vIter = available[distr(rng)];
            // pair u to v
            if (vIter->first == 0)
                std::cerr << "ERROR 2 in GCM1.\n";
            if (!g.addEdge({u, vIter->second}))
                std::cerr << "ERROR. Could not add edge in GCM1.\n";
            // Purge anything with degree zero
            // Be careful with invalidating the other iterator!
            // Degree of u is always greater or equal to the degree of v
            if (dmax == 1) {
                // Remove both
                // Erasure invalidates all iterators AFTER the erased one
                if (vIter > uIter) {
                    degrees.erase(vIter);
                    degrees.erase(uIter);
                } else {
                    degrees.erase(uIter);
                    degrees.erase(vIter);
                }
            } else {
                // Remove only v if it reaches zero
                uIter->first--;
                vIter->first--;
                if (vIter->first == 0)
                    degrees.erase(vIter);
            }
            //degrees.erase(std::remove_if(degrees.begin(), degrees.end(),
            //                             [](auto x) { return x.first == 0; }));
        }
    }
    return true;
}

int main() {
    // Generate a random degree sequence
    std::mt19937 rng(std::random_device{}());

    // Goal:
    // Degrees follow a power-law distribution with some parameter tau
    // Expect:  #tri = const * n^{ something }
    // The goal is to find the 'something' by finding the number of triangles
    // for different values of n and tau
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};

    Graph g;
    Graph g1;
    Graph g2;

    std::ofstream outfile("graphdata.m");
    outfile << '{';
    bool outputComma = false;

    for (int numVertices = 200; numVertices <= 2000; numVertices += 400) {
        for (float tau : tauValues) {

            DegreeSequence ds(numVertices);
            powerlaw_distribution degDist(tau, 1, numVertices);
            //std::poisson_distribution<> degDist(12);

            // For a single n,tau take samples over several instances of
            // the degree distribution.
            // 500 samples seems to give reasonable results
            for (int degreeSample = 0; degreeSample < 200; ++degreeSample) {
                // Generate a graph
                // might require multiple tries
                for (int i = 1; ; ++i) {
                    std::generate(ds.begin(), ds.end(),
                                  [&degDist, &rng] { return degDist(rng); });
                    // First make the sum even
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
                    if (sum % 2) {
                        continue;
                        // Can we do this: ??
                        ds.back()++;
                    }

                    if (g.createFromDegreeSequence(ds))
                        break;

                    // When 10 tries have not worked, output a warning
                    if (i % 10 == 0) {
                        std::cerr << "Warning: could not create graph from "
                                     "degree sequence. Trying again...\n";
                    }
                }

                //
                // Test the GCM1 and GCM2 success rate
                //
                std::vector<int> greedyTriangles1;
                std::vector<int> greedyTriangles2;
                int successrate1 = 0;
                int successrate2 = 0;
                for (int i = 0; i < 100; ++i) {
                    Graph gtemp;
                    // Take new highest degree every time
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
                        ++successrate1;
                        greedyTriangles1.push_back(gtemp.countTriangles());
                        g1 = gtemp;
                    }
                    // Finish all pairings of highest degree first
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
                        ++successrate2;
                        greedyTriangles2.push_back(gtemp.countTriangles());
                        g2 = gtemp;
                    }
                }

                SwitchChain chain;
                if (!chain.initialize(g)) {
                    std::cerr << "Could not initialize Markov chain.\n";
                    return 1;
                }

                SwitchChain chain1, chain2;
                bool do1 = true, do2 = true;
                if (!chain1.initialize(g1)) {
                    std::cerr << "Could not initialize Markov chain.\n";
                    do1 = false;
                }
                if (!chain2.initialize(g2)) {
                    std::cerr << "Could not initialize Markov chain.\n";
                    do2 = false;
                }

                std::cout << "Running n = " << numVertices << ", tau = " << tau
                          << ". \t" << std::flush;

                //int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
                //constexpr int measurements = 50;
                //constexpr int measureSkip =
                //    200; // Take a sample every ... steps
                int mixingTime = 0;
                constexpr int measurements = 50000;
                constexpr int measureSkip = 1;


                int movesDone = 0;

                int triangles[measurements];

                for (int i = 0; i < mixingTime; ++i) {
                    if (chain.doMove())
                        ++movesDone;
                }
                for (int i = 0; i < measurements; ++i) {
                    for (int j = 0; j < measureSkip; ++j)
                        if (chain.doMove())
                            ++movesDone;
                    triangles[i] = chain.g.countTriangles();
                }

                std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
                          << " moves succeeded ("
                          << 100.0f * float(movesDone) /
                                 float(mixingTime + measurements * measureSkip)
                          << "%).";
                //std::cout << std::endl;

                if (outputComma)
                    outfile << ',' << '\n';
                outputComma = true;

                std::sort(ds.begin(), ds.end());
                outfile << '{' << '{' << numVertices << ',' << tau << '}';
                outfile << ',' << triangles;
                outfile << ',' << ds;
                outfile << ',' << greedyTriangles1;
                outfile << ',' << greedyTriangles2;

                if (do1) {
                    movesDone = 0;
                    SwitchChain& c = chain1;
                    for (int i = 0; i < mixingTime; ++i) {
                        if (c.doMove())
                            ++movesDone;
                    }
                    for (int i = 0; i < measurements; ++i) {
                        for (int j = 0; j < measureSkip; ++j)
                            if (c.doMove())
                                ++movesDone;
                        triangles[i] = c.g.countTriangles();
                    }

                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
                        << " moves succeeded ("
                        << 100.0f * float(movesDone) /
                        float(mixingTime + measurements * measureSkip)
                        << "%).";

                    outfile << ',' << triangles;
                }
                if (do2) {
                    movesDone = 0;
                    SwitchChain& c = chain2;
                    for (int i = 0; i < mixingTime; ++i) {
                        if (c.doMove())
                            ++movesDone;
                    }
                    for (int i = 0; i < measurements; ++i) {
                        for (int j = 0; j < measureSkip; ++j)
                            if (c.doMove())
                                ++movesDone;
                        triangles[i] = c.g.countTriangles();
                    }

                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
                        << " moves succeeded ("
                        << 100.0f * float(movesDone) /
                        float(mixingTime + measurements * measureSkip)
                        << "%).";

                    outfile << ',' << triangles;
                }

                outfile << '}' << std::flush;

                std::cout << std::endl;
            }
        }
    }
    outfile << '}';
    return 0;
}