Changeset - fda8425fac05
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-05-08 17:45:02
tombannink@gmail.com
Add scatter plot of GCM success rate vs mixing time
3 files changed with 46 insertions and 22 deletions:
0 comments (0 inline, 0 general)
cpp/switchchain.cpp
Show inline comments
 
@@ -57,24 +57,26 @@ class SwitchChain {
 
    }
 

	
 
    Graph g;
 
    std::mt19937 mt;
 
    std::uniform_int_distribution<> edgeDistribution;
 
    //std::uniform_int_distribution<> permutationDistribution;
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
//
 
// Assumes degree sequence does NOT contain any zeroes!
 
//
 
// method2 = true  -> take highest degree and finish its pairing completely
 
// method2 = false -> take new highest degree after every pairing
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
 
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 

	
 
@@ -180,25 +182,25 @@ int main() {
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};
 

	
 
    Graph g;
 
    Graph g1;
 
    Graph g2;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
    for (int numVertices = 200; numVertices <= 2000; numVertices += 400) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution.
 
            // 500 samples seems to give reasonable results
 
            for (int degreeSample = 0; degreeSample < 200; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
@@ -223,29 +225,31 @@ int main() {
 
                    }
 
                }
 

	
 
                //
 
                // Test the GCM1 and GCM2 success rate
 
                //
 
                std::vector<int> greedyTriangles1;
 
                std::vector<int> greedyTriangles2;
 
                int successrate1 = 0;
 
                int successrate2 = 0;
 
                for (int i = 0; i < 100; ++i) {
 
                    Graph gtemp;
 
                    // Take new highest degree every time
 
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
 
                        ++successrate1;
 
                        greedyTriangles1.push_back(gtemp.countTriangles());
 
                        g1 = gtemp;
 
                    }
 
                    // Finish all pairings of highest degree first
 
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
 
                        ++successrate2;
 
                        greedyTriangles2.push_back(gtemp.countTriangles());
 
                        g2 = gtemp;
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 
@@ -260,25 +264,25 @@ int main() {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    do2 = false;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                //int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                //constexpr int measurements = 50;
 
                //constexpr int measureSkip =
 
                //    200; // Take a sample every ... steps
 
                int mixingTime = 0;
 
                constexpr int measurements = 5000;
 
                constexpr int measurements = 50000;
 
                constexpr int measureSkip = 1;
 

	
 

	
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                }
 
                for (int i = 0; i < measurements; ++i) {
data/README
Show inline comments
 
Contents of each file
 

	
 
graphdata_exponent.m
 
    output: {{n,tau},avgTriangles}
 
    n from 200 to 2000 with step 200
 
    degreeSamples = 500 + 1000
 
    initial ErdosGallai
 
    mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * n
 
    measurements = 50
 
    measureSkip = 200
 

	
 
graphdata_gcm_partial.m
 
    ??
 

	
 
graphdata_partial.m
 
    output: {{n,tau},triangleseq,ds,greedyTriangles1,greedyTriangles2,greedySeq1,greedySeq2}
 
    degreeSamples = 200
 
    mixingTime = 0
 
    measurements = 50000
 
    measureSkip = 1
showgraphs.m
Show inline comments
 
@@ -43,30 +43,32 @@ Needs["ErrorBarPlots`"]
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*- Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them. Done, is way faster for large n.*)
 
(*   (b) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(*   *)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*   Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(**)
 
(*- GCM1: Greedy Configuration Model 1: take highest degree and completely do all its pairings (at random)*)
 
(*- GCM2: Greedy Configuration Model 2: take highest degree and do a single pairing, then take new highest degree. So this matters mostly if there are multiple high degree nodes*)
 
(*- GCM1: Greedy Configuration Model 1: take highest degree and do a single pairing, then take new highest degree*)
 
(*- GCM2: Greedy Configuration Model 2: take highest degree and completely do all its pairings (at random)*)
 
(*The difference does not matter if one node is by far the highest.*)
 
(*The success rates, conditioned on the degree sequence being graphical, is almost always higher using GCM2. For certain degree sequences the success rate of GCM2 can be 0.9 higher than that of GCM1. (i.e. amost always works vs almost always fails).*)
 
(*For tau > ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*)
 
(*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*)
 
(**)
 
(*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time.*)
 
(**)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 
@@ -74,25 +76,25 @@ ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 

	
 
(* ::Subsection:: *)
 
(*Merge data*)
 

	
 
@@ -166,48 +168,60 @@ ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,me
 

	
 

	
 
(* Compute average of last part and check when the value drops below that for the first time *)
 
getMixingTime[values_]:=Module[{avg},
 
    (* average over the last 20 percent *)
 
    avg=Mean[values[[-Round[Length[values]/5];;-1]]];
 
    FirstPosition[values,_?(#<=avg&)][[1]]
 
]
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]]}&
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}&
 
,mixingTimes,{2}];
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels]
 

	
 

	
 
(* For n fixed, look at function of tau *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,2]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}&
 
,mixingTimes[[All,-1]],{1}];
 

	
 

	
 
Show[
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}]
 
,Plot[(32-26(tau-2)),{tau,2,3}]]
 
,Plot[(32-20(tau-2)),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Triangle exponent: Plot average #triangles vs n*)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Triangle exponent*)
 

	
 

	
 
(* When importing from exponent-only-data file *)
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_exponent.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 
averagesGrouped=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 

	
 

	
 
(* When importing from general file *)
 
averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw];
 
(* averages=SortBy[averages,#[[1,1]]&]; (* Sort by n *) *)
 
averagesGrouped=GatherBy[averages,{#[[1,2]]&,#[[1,1]]&}]; (* Split by n,tau *)
 

	
 

	
 
(* averagesGrouped[[ tau index, n index, run index , {ntau, avgtri} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
@@ -244,65 +258,62 @@ Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Aut
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponents=Transpose[{tauValues,fits[[All,2,1]]}];
 
Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponentsErrorBars=Map[{{#[[1]],#[[2]]["BestFitParameters"][[2]]},ErrorBar[#[[2]]["ParameterConfidenceIntervals"][[2]]-#[[2]]["BestFitParameters"][[2]]]}&,
 
Transpose[{tauValues,fitsExtra}]];
 
Show[ErrorListPlot[exponentsErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Greedy configuration model*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*#triangles(GCM) distribution vs #triangles(SwitchChain)*)
 

	
 

	
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
getStats[run_]:=Module[{avg,stddev},
 
    avg=N[Mean[run[[2,-timeWindow;;-1]]]];
 
    stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]];
 
    {run[[1]],stddev/avg,(run[[2,1]])/avg,Map[N[#/avg]&,run[[4]]]}
 
]
 
stats=Map[getStats,gdata,{3}];
 

	
 

	
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,2},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,5},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 

	
 

	
 
TableForm[histograms,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Greedy CM success rates*)
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *)
 
successrates=Map[{Length[#[[4]]],Length[#[[5]]]}&,gdata,{3}];
 
successrates=Map[Transpose,successrates,{2}];
 
successratesDelta=Map[Length[#[[5]]]-Length[#[[4]]]&,gdata,{3}];
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 
(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compare success rate with mixing time*)
 

	
 

	
 
successrates2=Map[{Length[#[[4]]],Length[#[[5]]],getMixingTime[#[[2]]]}&,gdata,{3}];
 
(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *)
 

	
 

	
 
scatterPlots=Map[ListPlot[#[[All,{1,3}]],PlotRange->{All,All},PlotStyle->PointSize[Large]]&,successrates2,{2}];
 
TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]
0 comments (0 inline, 0 general)