Changeset - 0290e63ba024
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-03-06 16:58:42
tombannink@gmail.com
Add start of faster cpp code
3 files changed with 50 insertions and 32 deletions:
0 comments (0 inline, 0 general)
cpp/graph.hpp
Show inline comments
 
@@ -43,97 +43,104 @@ class Graph {
 
        // Havel-Hakimi algorithm
 

	
 
        unsigned int n = d.size();
 

	
 
        // degree, vertex index
 
        std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
        for (unsigned int i = 0; i < n; ++i) {
 
            degrees[i].first = d[i];
 
            degrees[i].second = i;
 
        }
 

	
 
        edges.clear();
 
        adj.resize(n);
 
        while (!degrees.empty()) {
 
            std::sort(degrees.begin(), degrees.end());
 
            // Highest degree is at back of the vector
 
            // Take it out
 
            unsigned int degree = degrees.back().first;
 
            unsigned int u = degrees.back().second;
 
            degrees.pop_back();
 
            if (degree > degrees.size()) {
 
                edges.clear();
 
                adj.clear();
 
                return false;
 
            }
 
            // Now loop over the last 'degree' entries of degrees
 
            auto rit = degrees.rbegin();
 
            for (unsigned int i = 0; i < degree; ++i) {
 
                if (rit->first == 0 || !addEdge({u, rit->second})) {
 
                    edges.clear();
 
                    adj.clear();
 
                    return false;
 
                }
 
                rit->first--;
 
                ++rit;
 
            }
 
        }
 
        return true;
 
    }
 

	
 
    DegreeSequence getDegreeSequence() const {
 
        DegreeSequence d(adj.size());
 
        std::transform(adj.begin(), adj.end(), d.begin(),
 
                       [](const auto &u) { return u.size(); });
 
        return d;
 
    }
 

	
 
    // Assumes valid vertex indices
 
    bool hasEdge(const Edge &e) const {
 
    bool hasEdge(const Edge& e_) const {
 
        Edge e;
 
        if (adj[e_.u].size() <= adj[e_.v].size()) {
 
            e = e_;
 
        } else {
 
            e.u = e_.v;
 
            e.v = e_.u;
 
        }
 
        for (unsigned int v : adj[e.u]) {
 
            if (v == e.v)
 
                return true;
 
        }
 
        return false;
 
    }
 

	
 
    bool addEdge(const Edge &e) {
 
        if (e.u >= adj.size() || e.v >= adj.size())
 
            return false;
 
        if (hasEdge(e))
 
            return false;
 
        edges.push_back(e);
 
        adj[e.u].push_back(e.v);
 
        adj[e.v].push_back(e.u);
 
        return true;
 
    }
 

	
 
    // There are two possible edge exchanges
 
    // switchType indicates which one is desired
 
    // Returns false if the switch is not possible
 
    bool exchangeEdges(const Edge &e1, const Edge &e2, bool switchType) {
 
        // The new edges configuration is one of these two
 
        // A) e1.u - e2.u and e1.v - e2.v
 
        // B) e1.u - e2.v and e1.v - e2.u
 
        // First check if the move is possible
 
        if (switchType) {
 
            if (hasEdge({e1.u, e2.u}) || hasEdge({e1.v, e2.v}))
 
                return false; // conflicting edges
 
        } else {
 
            if (hasEdge({e1.u, e2.v}) || hasEdge({e1.v, e2.u}))
 
                return false; // conflicting edges
 
        }
 

	
 
        // Find the edges in the adjacency lists
 
        unsigned int i1, j1, i2, j2;
 
        for (i1 = 0; i1 < adj[e1.u].size(); ++i1) {
 
            if (adj[e1.u][i1] == e1.v)
 
                break;
 
        }
 
        for (j1 = 0; j1 < adj[e1.v].size(); ++j1) {
 
            if (adj[e1.v][j1] == e1.u)
 
                break;
 
        }
 
        for (i2 = 0; i2 < adj[e2.u].size(); ++i2) {
 
            if (adj[e2.u][i2] == e2.v)
 
                break;
 
        }
cpp/switchchain.cpp
Show inline comments
 
#include "exports.hpp"
 
#include "graph.hpp"
 
#include "powerlaw.hpp"
 
#include <algorithm>
 
#include <fstream>
 
#include <iostream>
 
#include <numeric>
 
#include <random>
 
#include <vector>
 

	
 
// Its assumed that u,v are distinct.
 
// Check if all four vertices are distinct
 
bool edgeConflicts(const Edge& e1, const Edge& e2) {
 
    return (e1.u == e2.u || e1.u == e2.v || e1.v == e2.u || e1.v == e2.v);
 
}
 

	
 
class SwitchChain {
 
  public:
 
    SwitchChain() : mt(std::random_device{}()), permutationDistribution(0, 2) {
 
    SwitchChain()
 
        : mt(std::random_device{}()), permutationDistribution(0.5)
 
    // permutationDistribution(0, 2)
 
    {
 
        // random_device uses hardware entropy if available
 
        // std::random_device rd;
 
        // mt.seed(rd());
 
    }
 
    ~SwitchChain() {}
 

	
 
    bool initialize(const Graph& gstart) {
 
        if (gstart.edgeCount() == 0)
 
            return false;
 
        g = gstart;
 
        edgeDistribution.param(
 
            std::uniform_int_distribution<>::param_type(0, g.edgeCount() - 1));
 
        return true;
 
    }
 

	
 
    bool doMove() {
 
        Edge e1 = g.getEdge(edgeDistribution(mt));
 
        Edge e2 = g.getEdge(edgeDistribution(mt));
 
        // Keep regenerating while conflicting edges
 
        Edge e1, e2;
 
        int e1index, e2index;
 
        int timeout = 0;
 
        while (edgeConflicts(e1, e2)) {
 
            e1 = g.getEdge(edgeDistribution(mt));
 
            e2 = g.getEdge(edgeDistribution(mt));
 
        // Keep regenerating while conflicting edges
 
        do {
 
            e1index = edgeDistribution(mt);
 
            e2index = edgeDistribution(mt);
 
            e1 = g.getEdge(e1index);
 
            e2 = g.getEdge(e2index);
 
            ++timeout;
 
            if (timeout % 100 == 0) {
 
                std::cerr << "Warning: sampled " << timeout
 
                          << " random edges but they keep conflicting.\n";
 
            }
 
        }
 
        } while (edgeConflicts(e1, e2));
 

	
 
        // Consider one of the three possible permutations
 
        // 1) e1.u - e1.v and e2.u - e2.v (original)
 
        // 2) e1.u - e2.u and e1.v - e2.v
 
        // 3) e1.u - e2.v and e1.v - e2.u
 

	
 
        // Note that it might be that these new edges already exist
 
        // in which case we also reject the move
 
        // This is checked in exchangeEdges
 
        // in which case we reject the move
 
        bool switchType = permutationDistribution(mt);
 
        if (switchType) {
 
            if (g.hasEdge({e1.u, e2.u}) || g.hasEdge({e1.v, e2.v}))
 
                return false; // conflicting edges
 
        } else {
 
            if (g.hasEdge({e1.u, e2.v}) || g.hasEdge({e1.v, e2.u}))
 
                return false; // conflicting edges
 
        }
 

	
 
        int perm = permutationDistribution(mt);
 
        if (perm == 0) // Original permutation
 
            return false;
 
        return g.exchangeEdges(e1, e2, perm == 1);
 
        // TODO
 
        // rest of the switching process
 

	
 
        // int perm = permutationDistribution(mt);
 
        // if (perm == 0) // Original permutation
 
        //    return false;
 
        // return g.exchangeEdges(e1, e2, perm == 1);
 
        return g.exchangeEdges(e1, e2, switchType);
 
    }
 

	
 
    Graph g;
 
    std::mt19937 mt;
 
    std::uniform_int_distribution<> edgeDistribution;
 
    std::uniform_int_distribution<> permutationDistribution;
 
    //std::uniform_int_distribution<> permutationDistribution;
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
int main() {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.2f, 2.35f, 2.5f, 2.65f, 2.8f};
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};
 

	
 
    Graph g;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution
 
            for (int degreeSample = 0; degreeSample < 500; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    if (g.createFromDegreeSequence(ds))
 
                        break;
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                std::cout << "Starting switch Markov chain with n = "
 
                          << numVertices << ", tau = " << tau << ". \t"
 
                          << std::flush;
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                constexpr int mixingTime = 30000;
 
                constexpr int mixingTime = 40000;
 
                constexpr int measureTime = 20000;
 
                constexpr int measureSkip =
 
                    200; // Take a sample every ... steps
 
                constexpr int measurements =
 
                    (measureTime - 1) / measureSkip + 1;
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                }
 
                for (int i = 0; i < measureTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                    if (i % measureSkip == 0)
 
                        triangles[i / measureSkip] = chain.g.countTriangles();
 
                }
 

	
 
                std::cout << movesDone << '/' << mixingTime + measureTime
 
                          << " moves succeeded." << std::endl;
 

	
 
                if (outputComma)
 
                    outfile << ',';
 
                outputComma = true;
 

	
 
                std::sort(ds.begin(), ds.end());
 
                outfile << '{' << '{' << numVertices << ',' << tau << '}';
 
                outfile << ',' << triangles << ',' << ds << '}' << std::flush;
 
            }
 
        }
 
    }
 
    outfile << '}';
 
    return 0;
 
}
showgraphs.m
Show inline comments
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*TODO*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Use different starting point for switch chain that is closer to uniform:*)
 
(*   Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*)
 
(*   (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(**)
 
(*- Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them*)
 
(*   (b) Better direct triangle counting? (I doubt it)*)
 
(*   (b') Better triangle counting by only keeping track of CHANGES in #triangles*)
 
(*   (c) Do not choose the three permutations with 1/3 probability: choose the "staying" one with a very low probability. Should still be a valid switch chain?*)
 
(*   (c) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(**)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Done*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Data import and data merge*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/min_deg_two/graphdata_merged.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 

	
 

	
 
newData=Import[NotebookDirectory[]<>"graphdata_tau_multi4.m"];
 
mergedData=Import[NotebookDirectory[]<>"graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"graphdata_merged_new.m",Join[mergedData,newData]]
 
newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"];
 
mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #trianges vs some degree-sequence-property*)
 

	
 

	
 
getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]},
 
ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *)
 
(* The next table contains unneeded entries, but its faster to have a square table for the sum *)
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}];
 
Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i<j<k !!! *)
 
(* This sparser table is slower
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n-1},{j,i+1,n}];
 
(* tmp[[a,b]] is now with  ds[[a]]*ds[[a+b]] *)
 
Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 
*)
 
];
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,4,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gsraw[[-numPlots;;-1]];
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
skipPts=Max[1,Round[maxTime/100]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot average #triangles vs n*)
 

	
 

	
 
averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw];
 
(* averages=SortBy[averages,#[[1,1]]&]; (* Sort by n *) *)
 
averagesGrouped=GatherBy[averages,{#[[1,2]]&,#[[1,1]]&}]; (* Split by n,tau *)
 
(* averagesGrouped[[ tau index, n index, run index , {ntau, avgtri} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]];
 
averagesErrorBars=Map[
 
{{#[[1,1,1]],Mean[#[[All,2]]]},
 
ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]]
 
}&,averagesGrouped,{2}];
 

	
 

	
 
ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels]
 

	
 

	
 
ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels]
 

	
 

	
 
averagesErrorBars=Map[
 
{{Log[#[[1,1,1]]],Log[Mean[#[[All,2]]]]},
 
ErrorBar[ (StandardDeviation[#[[All,2]]] )]
 
}&,averagesGrouped,{2}];
 

	
 

	
 
(* ::Subsection:: *)
 
(*Fitting the log-log-plot*)
 

	
 

	
 
loglogdata=Log[averagesErrorBars[[All,All,1]]];
 
fits=Map[Fit[#,{1,logn},logn]&,loglogdata];
 

	
 

	
 
Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels],Plot[fits,{logn,1,2000}]]
 

	
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponents=Transpose[{tauValues,fits[[All,2,1]]}];
 
Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
0 comments (0 inline, 0 general)