Changeset - 6218fdc51593
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-03-31 17:21:59
tom.bannink@cwi.nl
Add greedy configuration model as initial graph
3 files changed with 83 insertions and 4 deletions:
0 comments (0 inline, 0 general)
cpp/graph.hpp
Show inline comments
 
@@ -15,96 +15,97 @@ class Edge {
 
class StoredEdge {
 
  public:
 
    Edge e;
 
    // indices into adjacency lists
 
    // adj[u][u2vindex] = v;
 
    // adj[v][v2uindex] = u;
 
    unsigned int u2vindex, v2uindex;
 
};
 

	
 
class DiDegree {
 
  public:
 
    unsigned int in;
 
    unsigned int out;
 
};
 

	
 
typedef std::vector<unsigned int> DegreeSequence;
 
typedef std::vector<DiDegree> DiDegreeSequence;
 

	
 
class Graph {
 
  public:
 
    Graph() {}
 

	
 
    Graph(unsigned int n) { reset(n); }
 

	
 
    ~Graph() {}
 

	
 
    // Clears any previous edges and create
 
    // an empty graph on n vertices
 
    void reset(unsigned int n) {
 
        edges.clear();
 
        adj.resize(n);
 
        for (auto &v : adj)
 
            v.clear();
 
        badj.resize(n);
 
        for (auto &v : badj) {
 
            v.resize(n);
 
            v.assign(n, false);
 
        }
 
    }
 

	
 
    unsigned int edgeCount() const { return edges.size(); }
 

	
 
    const Edge &getEdge(unsigned int i) const { return edges[i].e; }
 

	
 
    // When the degree sequence is not graphics, the Graph can be
 
    // in any state, it is not neccesarily empty
 
    bool createFromDegreeSequence(const DegreeSequence &d) {
 
        // Havel-Hakimi algorithm
 
        // Based on Erdos-Gallai theorem
 

	
 
        unsigned int n = d.size();
 

	
 
        // degree, vertex index
 
        std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
        for (unsigned int i = 0; i < n; ++i) {
 
            degrees[i].first = d[i];
 
            degrees[i].second = i;
 
        }
 

	
 
        // Clear the graph
 
        reset(n);
 

	
 
        while (!degrees.empty()) {
 
            std::sort(degrees.begin(), degrees.end());
 
            // Highest degree is at back of the vector
 
            // Take it out
 
            unsigned int degree = degrees.back().first;
 
            unsigned int u = degrees.back().second;
 
            degrees.pop_back();
 
            if (degree > degrees.size()) {
 
                return false;
 
            }
 
            // Now loop over the last 'degree' entries of degrees
 
            auto rit = degrees.rbegin();
 
            for (unsigned int i = 0; i < degree; ++i) {
 
                if (rit->first == 0 || !addEdge({u, rit->second})) {
 
                    return false;
 
                }
 
                rit->first--;
 
                ++rit;
 
            }
 
        }
 
        return true;
 
    }
 

	
 
    DegreeSequence getDegreeSequence() const {
 
        DegreeSequence d(adj.size());
 
        std::transform(adj.begin(), adj.end(), d.begin(),
 
                       [](const auto &u) { return u.size(); });
 
        return d;
 
    }
 

	
 
    // Assumes valid vertex indices
 
    bool hasEdge(const Edge& e_) const {
 
        return badj[e_.u][e_.v];
 
        //Edge e;
 
        //if (adj[e_.u].size() <= adj[e_.v].size()) {
cpp/switchchain.cpp
Show inline comments
 
@@ -18,130 +18,207 @@ class SwitchChain {
 
  public:
 
    SwitchChain()
 
        : mt(std::random_device{}()), permutationDistribution(0.5)
 
    // permutationDistribution(0, 2)
 
    {
 
        // random_device uses hardware entropy if available
 
        // std::random_device rd;
 
        // mt.seed(rd());
 
    }
 
    ~SwitchChain() {}
 

	
 
    bool initialize(const Graph& gstart) {
 
        if (gstart.edgeCount() == 0)
 
            return false;
 
        g = gstart;
 
        edgeDistribution.param(
 
            std::uniform_int_distribution<>::param_type(0, g.edgeCount() - 1));
 
        return true;
 
    }
 

	
 
    bool doMove() {
 
        int e1index, e2index;
 
        int timeout = 0;
 
        // Keep regenerating while conflicting edges
 
        do {
 
            e1index = edgeDistribution(mt);
 
            e2index = edgeDistribution(mt);
 
            if (++timeout % 100 == 0) {
 
                std::cerr << "Warning: sampled " << timeout
 
                          << " random edges but they keep conflicting.\n";
 
            }
 
        } while (edgeConflicts(g.getEdge(e1index), g.getEdge(e2index)));
 

	
 
        // Consider one of the three possible permutations
 
        // 1) e1.u - e1.v and e2.u - e2.v (original)
 
        // 2) e1.u - e2.u and e1.v - e2.v
 
        // 3) e1.u - e2.v and e1.v - e2.u
 
        bool switchType = permutationDistribution(mt);
 
        return g.exchangeEdges(e1index, e2index, switchType);
 
    }
 

	
 
    Graph g;
 
    std::mt19937 mt;
 
    std::uniform_int_distribution<> edgeDistribution;
 
    //std::uniform_int_distribution<> permutationDistribution;
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng) {
 
    // Same as Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 
    std::vector<decltype(degrees.begin())> available;
 
    available.reserve(n);
 

	
 
    // Clear the graph
 
    g.reset(n);
 

	
 
    std::uniform_int_distribution<> distr(1, n - 1);
 

	
 
    while (!degrees.empty()) {
 
        // Get the highest degree:
 
        unsigned int dmax = 0;
 
        unsigned int u = 0;
 
        auto maxIter = degrees.begin();
 
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
            if (iter->first >= dmax) {
 
                dmax = iter->first;
 
                u = iter->second;
 
                maxIter = iter;
 
            }
 
        }
 
        // Take the highest degree out of the vector
 
        degrees.erase(maxIter);
 

	
 
        available.clear();
 
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
            if (iter->first)
 
                available.push_back(iter);
 
        }
 

	
 
        if (dmax > available.size())
 
            return false;
 

	
 
        std::shuffle(available.begin(), available.end(), rng);
 

	
 
        // Now assign randomly to the remaining vertices
 
        // Pick 'cmax' distinct integers between 1 and degrees.size()
 
        for (unsigned int i = 0; i < dmax; ++i) {
 
            if (!g.addEdge({u,available[i]->second})) {
 
                std::cerr << "ERROR. Could not add edge in greedy configuration model.\n";
 
            }
 
            available[i]->first--;
 
        }
 
    }
 
    return true;
 
}
 

	
 
int main() {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};
 

	
 
    Graph g;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution
 
            for (int degreeSample = 0; degreeSample < 500; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    if (g.createFromDegreeSequence(ds))
 
                    // First make the sum even
 
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
 
                    if (sum % 2) {
 
                        continue;
 
                        // Can we do this: ??
 
                        ds.back()++;
 
                    }
 

	
 
                    // Option 1:
 
                    if (g.createFromDegreeSequence(ds)) {
 
                        // Test option 2:
 
                        //int good = 0;
 
                        //for (int i = 0; i < 100; ++i) {
 
                        //    if (greedyConfigurationModel(ds, g, rng))
 
                        //        ++good;
 
                        //}
 
                        //std::cerr << "Greedy configuration model success rate: " << good << "%\n";
 

	
 
                        //g.createFromDegreeSequence(ds);
 
                        break;
 
                    }
 

	
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                constexpr int measurements = 50;
 
                constexpr int measureSkip =
 
                    200; // Take a sample every ... steps
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                }
 
                for (int i = 0; i < measurements; ++i) {
 
                    for (int j = 0; j < measureSkip; ++j)
 
                        if (chain.doMove())
 
                            ++movesDone;
 
                    triangles[i] = chain.g.countTriangles();
 
                }
 

	
 
                std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                          << " moves succeeded ("
 
                          << 100.0f * float(movesDone) /
 
                                 float(mixingTime + measurements * measureSkip)
 
                          << "%)." << std::endl;
 

	
 
                if (outputComma)
 
                    outfile << ',';
 
                outputComma = true;
 

	
 
                std::sort(ds.begin(), ds.end());
 
                outfile << '{' << '{' << numVertices << ',' << tau << '}';
 
                outfile << ',' << triangles << ',' << ds << '}' << std::flush;
showgraphs.m
Show inline comments
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*TODO*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(**)
 
(*- Use different starting point for switch chain that is closer to uniform:*)
 
(*   Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*)
 
(*   (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 
(**)
 
(*- Improve runtime*)
 
(*   (a) Better direct triangle counting? (I doubt it)*)
 
(*   (b) Better triangle counting by only keeping track of CHANGES in #triangles*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Done*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*  - Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them. Done, is way faster for large n.*)
 
(*   (b) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(*   *)
 
(*   - Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*     Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(*  *)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Data import and data merge*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata2.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 

	
 

	
 
newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"];
 
mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #trianges vs some degree-sequence-property*)
 

	
 

	
 
getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]},
 
ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *)
 
(* The next table contains unneeded entries, but its faster to have a square table for the sum *)
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}];
 
Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i<j<k !!! *)
 
(* This sparser table is slower
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n-1},{j,i+1,n}];
 
(* tmp[[a,b]] is now with  ds[[a]]*ds[[a+b]] *)
 
Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 
*)
 
];
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,2,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
0 comments (0 inline, 0 general)