Changeset - fda8425fac05
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-05-08 17:45:02
tombannink@gmail.com
Add scatter plot of GCM success rate vs mixing time
3 files changed with 46 insertions and 22 deletions:
0 comments (0 inline, 0 general)
cpp/switchchain.cpp
Show inline comments
 
@@ -63,12 +63,14 @@ class SwitchChain {
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
//
 
// Assumes degree sequence does NOT contain any zeroes!
 
//
 
// method2 = true  -> take highest degree and finish its pairing completely
 
// method2 = false -> take new highest degree after every pairing
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
 
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
@@ -186,13 +188,13 @@ int main() {
 
    Graph g2;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
    for (int numVertices = 200; numVertices <= 2000; numVertices += 400) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
@@ -229,17 +231,19 @@ int main() {
 
                std::vector<int> greedyTriangles1;
 
                std::vector<int> greedyTriangles2;
 
                int successrate1 = 0;
 
                int successrate2 = 0;
 
                for (int i = 0; i < 100; ++i) {
 
                    Graph gtemp;
 
                    // Take new highest degree every time
 
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
 
                        ++successrate1;
 
                        greedyTriangles1.push_back(gtemp.countTriangles());
 
                        g1 = gtemp;
 
                    }
 
                    // Finish all pairings of highest degree first
 
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
 
                        ++successrate2;
 
                        greedyTriangles2.push_back(gtemp.countTriangles());
 
                        g2 = gtemp;
 
                    }
 
                }
 
@@ -266,13 +270,13 @@ int main() {
 

	
 
                //int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                //constexpr int measurements = 50;
 
                //constexpr int measureSkip =
 
                //    200; // Take a sample every ... steps
 
                int mixingTime = 0;
 
                constexpr int measurements = 5000;
 
                constexpr int measurements = 50000;
 
                constexpr int measureSkip = 1;
 

	
 

	
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
data/README
Show inline comments
 
@@ -6,6 +6,15 @@ graphdata_exponent.m
 
    degreeSamples = 500 + 1000
 
    initial ErdosGallai
 
    mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * n
 
    measurements = 50
 
    measureSkip = 200
 

	
 
graphdata_gcm_partial.m
 
    ??
 

	
 
graphdata_partial.m
 
    output: {{n,tau},triangleseq,ds,greedyTriangles1,greedyTriangles2,greedySeq1,greedySeq2}
 
    degreeSamples = 200
 
    mixingTime = 0
 
    measurements = 50000
 
    measureSkip = 1
showgraphs.m
Show inline comments
 
@@ -49,18 +49,20 @@ Needs["ErrorBarPlots`"]
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them. Done, is way faster for large n.*)
 
(*   (b) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(*   *)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*   Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(**)
 
(*- GCM1: Greedy Configuration Model 1: take highest degree and completely do all its pairings (at random)*)
 
(*- GCM2: Greedy Configuration Model 2: take highest degree and do a single pairing, then take new highest degree. So this matters mostly if there are multiple high degree nodes*)
 
(*- GCM1: Greedy Configuration Model 1: take highest degree and do a single pairing, then take new highest degree*)
 
(*- GCM2: Greedy Configuration Model 2: take highest degree and completely do all its pairings (at random)*)
 
(*The difference does not matter if one node is by far the highest.*)
 
(*The success rates, conditioned on the degree sequence being graphical, is almost always higher using GCM2. For certain degree sequences the success rate of GCM2 can be 0.9 higher than that of GCM1. (i.e. amost always works vs almost always fails).*)
 
(*For tau > ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*)
 
(*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*)
 
(**)
 
(*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time.*)
 
(**)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 
@@ -80,13 +82,13 @@ Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
@@ -172,13 +174,13 @@ getMixingTime[values_]:=Module[{avg},
 
    FirstPosition[values,_?(#<=avg&)][[1]]
 
]
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]]}&
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}&
 
,mixingTimes,{2}];
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels]
 

	
 

	
 
(* For n fixed, look at function of tau *)
 
measureSkip=1;
 
@@ -187,21 +189,33 @@ mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}&
 
,mixingTimes[[All,-1]],{1}];
 

	
 

	
 
Show[
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}]
 
,Plot[(32-26(tau-2)),{tau,2,3}]]
 
,Plot[(32-20(tau-2)),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Triangle exponent: Plot average #triangles vs n*)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Triangle exponent*)
 

	
 

	
 
(* When importing from exponent-only-data file *)
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_exponent.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 
averagesGrouped=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 

	
 

	
 
(* When importing from general file *)
 
averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw];
 
@@ -250,24 +264,12 @@ Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","t
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponentsErrorBars=Map[{{#[[1]],#[[2]]["BestFitParameters"][[2]]},ErrorBar[#[[2]]["ParameterConfidenceIntervals"][[2]]-#[[2]]["BestFitParameters"][[2]]]}&,
 
Transpose[{tauValues,fitsExtra}]];
 
Show[ErrorListPlot[exponentsErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Greedy configuration model*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*#triangles(GCM) distribution vs #triangles(SwitchChain)*)
 
@@ -279,13 +281,13 @@ getStats[run_]:=Module[{avg,stddev},
 
    stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]];
 
    {run[[1]],stddev/avg,(run[[2,1]])/avg,Map[N[#/avg]&,run[[4]]]}
 
]
 
stats=Map[getStats,gdata,{3}];
 

	
 

	
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,2},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,5},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 

	
 

	
 
TableForm[histograms,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
@@ -302,7 +304,16 @@ TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 
(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compare success rate with mixing time*)
 

	
 

	
 
successrates2=Map[{Length[#[[4]]],Length[#[[5]]],getMixingTime[#[[2]]]}&,gdata,{3}];
 
(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *)
 

	
 

	
 
scatterPlots=Map[ListPlot[#[[All,{1,3}]],PlotRange->{All,All},PlotStyle->PointSize[Large]]&,successrates2,{2}];
 
TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]
0 comments (0 inline, 0 general)