From bfca8e3039c5271e7368f73c3730c801b5f101ce 2017-02-27 15:35:32 From: Tom Bannink Date: 2017-02-27 15:35:32 Subject: [PATCH] Add powerlaw sampler and plots --- diff --git a/cpp/exports.hpp b/cpp/exports.hpp index c6128061badfc045b382378814af6b0c2921cf88..58a682ccb6d467bfddd064669fd7f762dbffb095 100644 --- a/cpp/exports.hpp +++ b/cpp/exports.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include "graph.hpp" std::ostream &operator<<(std::ostream &s, const Edge &e) { @@ -22,3 +23,36 @@ std::ostream &operator<<(std::ostream &s, const Graph &g) { return s; } +template +std::ostream& operator<<(std::ostream& s, const std::vector& v) { + if (v.empty()) { + s << '{' << '}'; + return s; + } + auto iter = v.begin(); + s << '{' << *iter++; + for (; iter != v.end(); ++iter) { + s << ',' << *iter; + } + s << '}'; + return s; +} + +// Mathematica style export for arrays +// SFINAE to disable char arrays since it will mess up +// things of the form s << "some string"; +template < + typename T, std::size_t N, + typename = typename std::enable_if::value, T>::type> +std::ostream& operator<<(std::ostream& s, const T (&v)[N]) { + if (N == 0) { + s << '{' << '}'; + return s; + } + s << '{' << v[0]; + for (size_t i = 1; i < N; ++i) + s << ',' << v[i]; + s << '}'; + return s; +} + diff --git a/cpp/graph.hpp b/cpp/graph.hpp index f17e7a43d51d97356827405ba26fbfd48d1ebf2b..feac06b5febbcda3f9b3e78018181e7022f55e9a 100644 --- a/cpp/graph.hpp +++ b/cpp/graph.hpp @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include diff --git a/cpp/powerlaw.hpp b/cpp/powerlaw.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5dd0194ce349f77bf3f82f642f43d0e0bd2656eb --- /dev/null +++ b/cpp/powerlaw.hpp @@ -0,0 +1,36 @@ +#pragma once +#include +#include +#include + +// Discrete powerlaw distribution +// obtained by rounding a continuous powerlaw distribution +class powerlaw_distribution { + public: + // xmin and xmax are inclusive + powerlaw_distribution(float a, unsigned int _xmin, unsigned int _xmax) + : alpha(a), xmin(_xmin), xmax(_xmax) { + assert(xmin > 0); + assert(xmin <= xmax); + assert(alpha > 1.0f); + _exponent = -1.0f / (alpha - 1.0f); + } + ~powerlaw_distribution() {} + + template + unsigned int operator()(Generator& rng) const { + unsigned int x = xmax + 1; + while (x > xmax) { + // Generate uniform value in [0,1) + double r = std::generate_canonical< + double, std::numeric_limits::digits>(rng); + x = std::round(std::pow(r, _exponent) * xmin); + } + return x; + } + + private: + float alpha; + double _exponent; + unsigned int xmin, xmax; +}; diff --git a/cpp/showgraphs.m b/cpp/showgraphs.m index f9efe4e7088cfebf80fddea00414d73ed856b55b..beec01993c5f52bbb41157fe018ab21c4d0297b0 100644 --- a/cpp/showgraphs.m +++ b/cpp/showgraphs.m @@ -24,18 +24,73 @@ Grid[Partition[gs,10],Frame->All] (*Plot triangle counts*) -gsraw=Import[NotebookDirectory[]<>"graphdata.m"]; +(* ::Subsection:: *) +(*Data import and data merge*) + + +gsraw=Import[NotebookDirectory[]<>"graphdata_merged.m"]; + + +newData=Import[NotebookDirectory[]<>"graphdata_tau_multi3.m"]; +mergedData=Import[NotebookDirectory[]<>"graphdata_merged.m"]; +Export[NotebookDirectory[]<>"graphdata_merged_new.m",Join[mergedData,newData]] + + +(* ::Subsection:: *) +(*Plot empirical distribution of maximum degree*) + + +maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw]; +maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}]; + + +Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,1100},{0,200}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,1100},{0,200}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,1100},{0,200}},AxesLabel->{"d_max","frequency"}] -Map[ListPlot[#[[2]],Joined->True,PlotRange->All]&,gsraw[[1;;3]]] +(* ::Subsection:: *) +(*Plot triangle count over "time" in Markov chain instances*) -averages=Map[{#[[1]],Mean[#[[2,-1000;;-1]]]}&,gsraw]; -averagesGrouped=GatherBy[averages,#[[1]]&]; +numPlots=10; +minCount=Min[Map[Min[#[[2]]]&,gsraw[[1;;numPlots]]]]; +maxCount=Max[Map[Max[#[[2]]]&,gsraw[[1;;numPlots]]]]; +maxTime=Max[Map[Length[#[[2]]]&,gsraw[[1;;numPlots]]]]; +skipPts=Round[maxTime/100]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *) +coarseData=Map[#[[2,1;;-1;;skipPts]]&,gsraw[[1;;numPlots]]]; +labels=Map["{n,tau} = "<>ToString[#[[1]]]&,gsraw[[1;;numPlots]]]; +ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,maxTime},PlotLegends->labels] +(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *) + + +(* ::Subsection:: *) +(*Plot average #triangles vs n*) + + +averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw]; +averagesGrouped=GatherBy[averages,{#[[1,1]]&,#[[1,2]]&}]; +(* averagesGroupes[[ n index, tau index , run index, {ntau, tri, ds} ]] *) +averagesGrouped=Transpose[averagesGrouped]; +(* averagesGrouped[[ tau index, n index, run index ]] *) +nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; +taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; averagesErrorBars=Map[ -{{#[[1,1]],Mean[#[[All,2]]]}, +{{#[[1,1,1]],Mean[#[[All,2]]]}, ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]] -}&,averagesGrouped]; +}&,averagesGrouped,{2}]; + + +ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels] + + +(* ::Subsection:: *) +(*Plot #triangles distribution for specific (n,tau)*) + + +histograms=Map[Histogram[#[[All,2]]]&,averagesGrouped,{2}]; +nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; +taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; -ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"}] +TableForm[histograms,TableHeadings->{taulabels,nlabels}] diff --git a/cpp/switchchain.cpp b/cpp/switchchain.cpp index 3b85a8cbcacfa7d6af9f5bab490f4f207c2a0c7d..cdfa68cea8fdcf7ca0f345b3fd82f9c4e20320c0 100644 --- a/cpp/switchchain.cpp +++ b/cpp/switchchain.cpp @@ -1,5 +1,6 @@ #include "exports.hpp" #include "graph.hpp" +#include "powerlaw.hpp" #include #include #include @@ -67,11 +68,15 @@ class SwitchChain { }; int main() { + // Generate a random degree sequence + std::mt19937 rng(std::random_device{}()); + // Goal: // Degrees follow a power-law distribution with some parameter tau // Expect: #tri = const * n^{ something } // The goal is to find the 'something' by finding the number of triangles // for different values of n and tau + float tauValues[] = {2.2f, 2.5f, 2.8f}; Graph g; @@ -79,74 +84,72 @@ int main() { outfile << '{'; bool outputComma = false; - for (int numVertices = 500; numVertices <= 1000; numVertices += 100) { - // Generate a random degree sequence - std::mt19937 gen(std::random_device{}()); - - // average degree 12 - DegreeSequence ds(numVertices); - std::poisson_distribution<> degDist(12); - - // For a single n, take samples over several instances of - // the degree distribution - for (int degreeSample = 0; degreeSample < 10; ++degreeSample) { - - // Try at most 10 times to generate a valid sequence - bool validGraph = false; - for (int i = 0; i < 10; ++i) { - std::generate(ds.begin(), ds.end(), - [°Dist, &gen] { return degDist(gen); }); - if (g.createFromDegreeSequence(ds)) { - validGraph = true; - break; + for (int numVertices = 200; numVertices <= 2000; numVertices += 200) { + for (float tau : tauValues) { + + DegreeSequence ds(numVertices); + powerlaw_distribution degDist(tau, 2, numVertices); + //std::poisson_distribution<> degDist(12); + + // For a single n,tau take samples over several instances of + // the degree distribution + for (int degreeSample = 0; degreeSample < 200; ++degreeSample) { + // Generate a graph + // might require multiple tries + for (int i = 1; ; ++i) { + std::generate(ds.begin(), ds.end(), + [°Dist, &rng] { return degDist(rng); }); + if (g.createFromDegreeSequence(ds)) + break; + // When 10 tries have not worked, output a warning + if (i % 10 == 0) { + std::cerr << "Warning: could not create graph from " + "degree sequence. Trying again...\n"; + } } - } - if (!validGraph) { - std::cerr << "Could not create graph from degree sequence.\n"; - return 1; - } - std::sort(ds.begin(), ds.end()); - //std::cout << "Degree sequence:"; - //for (auto i : ds) - // std::cout << ' ' << i; - //std::cout << std::endl; + SwitchChain chain; + if (!chain.initialize(g)) { + std::cerr << "Could not initialize Markov chain.\n"; + return 1; + } - SwitchChain chain; - if (!chain.initialize(g)) { - std::cerr << "Could not initialize Markov chain.\n"; - return 1; - } + std::cout << "Starting switch Markov chain with n = " + << numVertices << ", tau = " << tau << ". \t" + << std::flush; - std::cout << "Starting switch Markov chain" << std::endl; + constexpr int mixingTime = 15000; + constexpr int measureTime = 10000; + constexpr int measureSkip = + 100; // Take a sample every 100 steps + constexpr int measurements = + (measureTime - 1) / measureSkip + 1; + int movesDone = 0; - constexpr int mixingTime = 15000; - constexpr int measureTime = 5000; - int movesDone = 0; + int triangles[measurements]; - int triangles[measureTime]; - for (int i = 0; i < mixingTime; ++i) { - if (chain.doMove()) - ++movesDone; - } - for (int i = 0; i < measureTime; ++i) { - if (chain.doMove()) - ++movesDone; - triangles[i] = chain.g.countTriangles(); - } + for (int i = 0; i < mixingTime; ++i) { + if (chain.doMove()) + ++movesDone; + } + for (int i = 0; i < measureTime; ++i) { + if (chain.doMove()) + ++movesDone; + if (i % measureSkip == 0) + triangles[i / measureSkip] = chain.g.countTriangles(); + } - if (outputComma) - outfile << ','; - outputComma = true; + std::cout << movesDone << '/' << mixingTime + measureTime + << " moves succeeded." << std::endl; - outfile << '{' << numVertices << ',' << '{'; - outfile << triangles[0]; - for (int i = 1; i < measureTime; ++i) - outfile << ',' << triangles[i]; - outfile << '}' << '}'; + if (outputComma) + outfile << ','; + outputComma = true; - std::cout << movesDone << '/' << mixingTime + measureTime - << " moves succeeded." << std::endl; + std::sort(ds.begin(), ds.end()); + outfile << '{' << '{' << numVertices << ',' << tau << '}'; + outfile << ',' << triangles << ',' << ds << '}' << std::flush; + } } } outfile << '}';