Changeset - 6218fdc51593
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-03-31 17:21:59
tom.bannink@cwi.nl
Add greedy configuration model as initial graph
3 files changed with 83 insertions and 4 deletions:
0 comments (0 inline, 0 general)
cpp/graph.hpp
Show inline comments
 
@@ -39,48 +39,49 @@ class Graph {
 
    ~Graph() {}
 

	
 
    // Clears any previous edges and create
 
    // an empty graph on n vertices
 
    void reset(unsigned int n) {
 
        edges.clear();
 
        adj.resize(n);
 
        for (auto &v : adj)
 
            v.clear();
 
        badj.resize(n);
 
        for (auto &v : badj) {
 
            v.resize(n);
 
            v.assign(n, false);
 
        }
 
    }
 

	
 
    unsigned int edgeCount() const { return edges.size(); }
 

	
 
    const Edge &getEdge(unsigned int i) const { return edges[i].e; }
 

	
 
    // When the degree sequence is not graphics, the Graph can be
 
    // in any state, it is not neccesarily empty
 
    bool createFromDegreeSequence(const DegreeSequence &d) {
 
        // Havel-Hakimi algorithm
 
        // Based on Erdos-Gallai theorem
 

	
 
        unsigned int n = d.size();
 

	
 
        // degree, vertex index
 
        std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
        for (unsigned int i = 0; i < n; ++i) {
 
            degrees[i].first = d[i];
 
            degrees[i].second = i;
 
        }
 

	
 
        // Clear the graph
 
        reset(n);
 

	
 
        while (!degrees.empty()) {
 
            std::sort(degrees.begin(), degrees.end());
 
            // Highest degree is at back of the vector
 
            // Take it out
 
            unsigned int degree = degrees.back().first;
 
            unsigned int u = degrees.back().second;
 
            degrees.pop_back();
 
            if (degree > degrees.size()) {
 
                return false;
 
            }
 
            // Now loop over the last 'degree' entries of degrees
cpp/switchchain.cpp
Show inline comments
 
@@ -42,82 +42,159 @@ class SwitchChain {
 
        do {
 
            e1index = edgeDistribution(mt);
 
            e2index = edgeDistribution(mt);
 
            if (++timeout % 100 == 0) {
 
                std::cerr << "Warning: sampled " << timeout
 
                          << " random edges but they keep conflicting.\n";
 
            }
 
        } while (edgeConflicts(g.getEdge(e1index), g.getEdge(e2index)));
 

	
 
        // Consider one of the three possible permutations
 
        // 1) e1.u - e1.v and e2.u - e2.v (original)
 
        // 2) e1.u - e2.u and e1.v - e2.v
 
        // 3) e1.u - e2.v and e1.v - e2.u
 
        bool switchType = permutationDistribution(mt);
 
        return g.exchangeEdges(e1index, e2index, switchType);
 
    }
 

	
 
    Graph g;
 
    std::mt19937 mt;
 
    std::uniform_int_distribution<> edgeDistribution;
 
    //std::uniform_int_distribution<> permutationDistribution;
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng) {
 
    // Same as Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 
    std::vector<decltype(degrees.begin())> available;
 
    available.reserve(n);
 

	
 
    // Clear the graph
 
    g.reset(n);
 

	
 
    std::uniform_int_distribution<> distr(1, n - 1);
 

	
 
    while (!degrees.empty()) {
 
        // Get the highest degree:
 
        unsigned int dmax = 0;
 
        unsigned int u = 0;
 
        auto maxIter = degrees.begin();
 
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
            if (iter->first >= dmax) {
 
                dmax = iter->first;
 
                u = iter->second;
 
                maxIter = iter;
 
            }
 
        }
 
        // Take the highest degree out of the vector
 
        degrees.erase(maxIter);
 

	
 
        available.clear();
 
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
            if (iter->first)
 
                available.push_back(iter);
 
        }
 

	
 
        if (dmax > available.size())
 
            return false;
 

	
 
        std::shuffle(available.begin(), available.end(), rng);
 

	
 
        // Now assign randomly to the remaining vertices
 
        // Pick 'cmax' distinct integers between 1 and degrees.size()
 
        for (unsigned int i = 0; i < dmax; ++i) {
 
            if (!g.addEdge({u,available[i]->second})) {
 
                std::cerr << "ERROR. Could not add edge in greedy configuration model.\n";
 
            }
 
            available[i]->first--;
 
        }
 
    }
 
    return true;
 
}
 

	
 
int main() {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};
 

	
 
    Graph g;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution
 
            for (int degreeSample = 0; degreeSample < 500; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    if (g.createFromDegreeSequence(ds))
 
                    // First make the sum even
 
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
 
                    if (sum % 2) {
 
                        continue;
 
                        // Can we do this: ??
 
                        ds.back()++;
 
                    }
 

	
 
                    // Option 1:
 
                    if (g.createFromDegreeSequence(ds)) {
 
                        // Test option 2:
 
                        //int good = 0;
 
                        //for (int i = 0; i < 100; ++i) {
 
                        //    if (greedyConfigurationModel(ds, g, rng))
 
                        //        ++good;
 
                        //}
 
                        //std::cerr << "Greedy configuration model success rate: " << good << "%\n";
 

	
 
                        //g.createFromDegreeSequence(ds);
 
                        break;
 
                    }
 

	
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                constexpr int measurements = 50;
 
                constexpr int measureSkip =
 
                    200; // Take a sample every ... steps
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
 

	
showgraphs.m
Show inline comments
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*TODO*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(**)
 
(*- Use different starting point for switch chain that is closer to uniform:*)
 
(*   Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*)
 
(*   (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 
(**)
 
(*- Improve runtime*)
 
(*   (a) Better direct triangle counting? (I doubt it)*)
 
(*   (b) Better triangle counting by only keeping track of CHANGES in #triangles*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Done*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*  - Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them. Done, is way faster for large n.*)
 
(*   (b) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(*   *)
 
(*   - Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*     Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(*  *)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Data import and data merge*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata2.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 

	
 

	
 
newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"];
 
mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
0 comments (0 inline, 0 general)