Changeset - 9c4226491043
[Not reviewed]
0 2 1
Tom Bannink - 8 years ago 2017-05-22 17:08:04
tombannink@gmail.com
Split gcm-initial-triangles notebook
3 files changed with 97 insertions and 10 deletions:
0 comments (0 inline, 0 general)
cpp/switchchain_initialtris.cpp
Show inline comments
 
@@ -130,172 +130,174 @@ bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool meth
 
            }
 
        } else {
 
            // Pair with a random vertex that is not u itself and to which
 
            // u has not paired yet!
 
            available.clear();
 
            for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
                if (iter->second != u && !g.hasEdge({u, iter->second}))
 
                    available.push_back(iter);
 
            }
 
            if (available.empty())
 
                return false;
 
            std::uniform_int_distribution<> distr(0, available.size() - 1);
 
            auto vIter = available[distr(rng)];
 
            // pair u to v
 
            if (vIter->first == 0)
 
                std::cerr << "ERROR 2 in GCM1.\n";
 
            if (!g.addEdge({u, vIter->second}))
 
                std::cerr << "ERROR. Could not add edge in GCM1.\n";
 
            // Purge anything with degree zero
 
            // Be careful with invalidating the other iterator!
 
            // Degree of u is always greater or equal to the degree of v
 
            if (dmax == 1) {
 
                // Remove both
 
                // Erasure invalidates all iterators AFTER the erased one
 
                if (vIter > uIter) {
 
                    degrees.erase(vIter);
 
                    degrees.erase(uIter);
 
                } else {
 
                    degrees.erase(uIter);
 
                    degrees.erase(vIter);
 
                }
 
            } else {
 
                // Remove only v if it reaches zero
 
                uIter->first--;
 
                vIter->first--;
 
                if (vIter->first == 0)
 
                    degrees.erase(vIter);
 
            }
 
            //degrees.erase(std::remove_if(degrees.begin(), degrees.end(),
 
            //                             [](auto x) { return x.first == 0; }));
 
        }
 
    }
 
    return true;
 
}
 

	
 
int main() {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f};
 

	
 
    Graph g;
 

	
 
    std::ofstream outfile("graphdata_initialtris.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 2000; numVertices += 400) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution.
 
            // 500 samples seems to give reasonable results
 
            for (int degreeSample = 0; degreeSample < 200; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    // First make the sum even
 
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
 
                    if (sum % 2) {
 
                        continue;
 
                        // Can we do this: ??
 
                        ds.back()++;
 
                    }
 

	
 
                    if (g.createFromDegreeSequence(ds))
 
                        break;
 

	
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << "." << std::flush;
 

	
 
                //
 
                // Test the GCM1 and GCM2 success rate
 
                //
 
                long long gcmTris1 = 0;
 
                long long gcmTris2 = 0;
 
                int successrate1 = 0;
 
                int successrate2 = 0;
 
                for (int i = 0; i < 100; ++i) {
 
                    Graph gtemp;
 
                    // Take new highest degree every time
 
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
 
                        ++successrate1;
 
                        gcmTris1 += gtemp.countTriangles();
 
                    }
 
                    // Finish all pairings of highest degree first
 
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
 
                        ++successrate2;
 
                        gcmTris2 += gtemp.countTriangles();
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                int mixingTime = (32.0f - 20.0f * (tau - 2.0f)) * numVertices;
 
                constexpr int measurements = 20;
 
                constexpr int measureSkip = 200;
 

	
 
                int movesDone = 0;
 

	
 
                long long trianglesTotal = 0;
 

	
 
                std::cout << "  .. \t" << std::flush;
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                }
 
                for (int i = 0; i < measurements; ++i) {
 
                    for (int j = 0; j < measureSkip; ++j)
 
                        if (chain.doMove())
 
                            ++movesDone;
 
                    trianglesTotal = chain.g.countTriangles();
 
                    trianglesTotal += chain.g.countTriangles();
 
                }
 

	
 
                std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                          << " moves succeeded ("
 
                          << 100.0f * float(movesDone) /
 
                                 float(mixingTime + measurements * measureSkip)
 
                          << "%).";
 
                //std::cout << std::endl;
 

	
 
                if (outputComma)
 
                    outfile << ',' << '\n';
 
                outputComma = true;
 

	
 
                float avgTriangles =
 
                    float(trianglesTotal) / float(measurements);
 
                outfile << '{';
 
                outfile << '{' << numVertices << ',' << tau << '}';
 
                outfile << ',' << avgTriangles;
 
                outfile << ',' << '{' << gcmTris1 << ',' << successrate1 << '}';
 
                outfile << ',' << '{' << gcmTris2 << ',' << successrate2 << '}';
 
                outfile << '}' << std::flush;
 

	
 
                std::cout << std::endl;
 
            }
 
        }
 
    }
 
    outfile << '}';
 
    return 0;
 
}
triangle_analysis.m
Show inline comments
 
@@ -133,169 +133,171 @@ tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n-1},{j,i+1,n}];
 
Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 
*)
 
];
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,2,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gdata[[4,-1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
skipPts=Max[1,Round[maxTime/200]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compute 'mixing time'*)
 

	
 

	
 
(* Compute average of last part and check when the value drops below that for the first time *)
 
getMixingTime[values_]:=Module[{avg},
 
    (* average over the last 20 percent *)
 
    avg=Mean[values[[-Round[Length[values]/5];;-1]]];
 
    FirstPosition[values,_?(#<=avg&)][[1]]
 
]
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}&
 
,mixingTimes,{2}];
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels]
 

	
 

	
 
(* For n fixed, look at function of tau *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,2]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}&
 
,mixingTimes[[All,-1]],{1}];
 

	
 

	
 
Show[
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}]
 
,Plot[(32-20(tau-2)),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Greedy configuration model*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Distribution of initial #triangles for GCM1,GCM2,EG compared to average #triangles.*)
 

	
 

	
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
3: degree sequence
 
4: GCM1 starting triangle counts
 
5: GCM2 starting triangle counts
 
6: GCM1 time sequence
 
7: GCM2 time sequence
 
*)
 

	
 

	
 
 (* Stats for a single run at every (n,tau) *)
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
skipPts=Max[1,Round[timeWindow/100]];
 
getSingleStats[runs_]:=Module[{run,avg,stddev},
 
    run=runs[[1]]; (* Select some run *)
 
    avg=N[Mean[run[[2,-timeWindow;;-1]]]];
 
    stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]];
 
    {run[[1]], (* {n,tau} *)
 
    stddev/avg,
 
    (run[[2,1]])/avg, (* EG starting point *)
 
    Map[N[#/avg]&,run[[4]]],  (* GCM1 counts *)
 
    Map[N[#/avg]&,run[[5]]] (* GCM2 counts *)
 
    Map[N[#/avg]&,run[[5]]],  (* GCM2 counts *)
 
    (run[[2,-timeWindow;;-1;;skipPts]])/avg  (* counts in uniform distribution *)
 
    }
 
]
 
singleStats=Map[getSingleStats,gdata,{2}];
 

	
 

	
 
(* Yellow: GCM1 (take new highest everytime *)
 
(* Blue: GCM2 (finish highest first, more similar to EG) *)
 
histogramsSingle=Map[Histogram[{#[[4]],#[[5]]},PlotRange->{{0,5},Automatic},ImageSize->300,PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[2]],3]]<>"\[Cross]average"]&,singleStats,{2}];
 
(* Green: Actual uniform distribution *)
 
histogramsSingle=Map[Histogram[{#[[4]],#[[5]],#[[6]]},PlotRange->{{0,3},Automatic},ImageSize->300,PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[3]],3]]<>"\[Cross]average"]&,singleStats,{2}];
 

	
 

	
 
TableForm[histogramsSingle,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
 (* Consider all runs *)
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
skipPts=Max[1,Round[timeWindow/100]];
 
getAverage[run_]:=Module[{avg,stddev},
 
    avg=N[Mean[run[[2,-timeWindow;;-1]]]];
 
    {
 
    Mean[run[[4]]]/avg,(* GCM1 counts *)
 
    Mean[run[[5]]]/avg, (* GCM2 counts *)
 
    (run[[2,1]])/avg (* EG starting point *)
 
    (run[[2,1]])/avg, (* EG starting point *)
 
    }
 
]
 
getTotalStats[runs_]:=Transpose[Map[getAverage,runs]];
 
totalStats=Map[getTotalStats,gdata,{2}];
 

	
 

	
 
(* Yellow: GCM1 (take new highest everytime *)
 
(* Blue: GCM2 (finish\[AliasDelimiter] highest first, more similar to EG) *)
 
(* Blue: GCM2 (finish highest first, more similar to EG) *)
 
histogramsTotal=Map[Histogram[#,{0.1},PlotRange->{{0,5},Automatic},ImageSize->300]&,totalStats,{2}];
 

	
 

	
 
TableForm[histogramsTotal,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 

	
 

	
 
(* ::Subsection:: *)
 
(*GCM1 vs GCM2 success rates*)
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *)
 
successrates=Map[{Length[#[[4]]],Length[#[[5]]]}&,gdata,{3}];
 
successrates=Map[Transpose,successrates,{2}];
 
successratesDelta=Map[Length[#[[5]]]-Length[#[[4]]]&,gdata,{3}];
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 
(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compare success rate with mixing time*)
 

	
 

	
 
successrates2=Map[{Length[#[[4]]],Length[#[[5]]],getMixingTime[#[[2]]]}&,gdata,{3}];
 
(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *)
 

	
 

	
 
scatterPlots=Map[ListPlot[#[[All,{1,3}]],PlotRange->{All,All},PlotStyle->PointSize[Large]]&,successrates2,{2}];
 
TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]
triangle_gcm_initial_analysis.m
Show inline comments
 
new file 100644
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_initialtris.m"];
 
(* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: avgtriangles when mixed
 
3: {GCM1 starting triangle counts summed, GCM1 number of successes}
 
4: {GCM2 starting triangle counts summed, GCM2 number of successes}
 
*)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 

	
 
(* ::Section:: *)
 
(*Greedy configuration model*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Distribution of initial #triangles for GCM1,GCM2,EG compared to average #triangles.*)
 

	
 

	
 
 (* Consider all runs *)
 
getIt[x_,avg_]:=If[x[[2]]>=5, x[[1]]/x[[2]]/avg,-0.5]
 
getAverage[run_]:=Module[{avg},
 
    avg=20*run[[2]];
 
    If[avg>0,
 
    { getIt[run[[3]],avg], getIt[run[[4]],avg] }
 
    , {3,3}]
 
]
 
getTotalStats[runs_]:=Transpose[Map[getAverage,runs]];
 
totalStats=Map[getTotalStats,gdata,{2}];
 

	
 

	
 
(* Yellow: GCM1 (take new highest everytime *)
 
(* Blue: GCM2 (finish highest first, more similar to EG) *)
 
histogramsTotal=Map[Histogram[#,{0.05},PlotRange->{{-0.5,2},Automatic},ImageSize->300,AxesOrigin->{0,0}]&,totalStats,{2}];
 

	
 

	
 
TableForm[histogramsTotal,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsubsection:: *)
 
(*Exporting plots*)
 

	
 

	
 
tauIndices={2,5,8};
 
nIndices={2};
 
makeHistogram[datasets_,n_,tau_]:=Histogram[datasets,{0.05},PlotRange->{{-0.5,1.5},Automatic},ImageSize->300,AxesOrigin->{0,0}];
 
histogramsTotal=Map[makeHistogram[#,1000,tau]&,totalStats[[tauIndices,nIndices]],{2}];
 
TableForm[histogramsTotal,TableHeadings->{taulabels[[tauIndices]],nlabels[[nIndices]]}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*GCM1 vs GCM2 success rates*)
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *)
 
successrates=Map[{#[[3,2]],#[[4,2]]}&,gdata,{3}];
 
successrates=Map[Transpose,successrates,{2}];
 
successratesDelta=Map[#[[3,2]]-#[[4,2]]&,gdata,{3}];
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 
(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*)
 

	
 

	
 

	
0 comments (0 inline, 0 general)