Changeset - 2c2c8fc81176
[Not reviewed]
3 1 3
Tom Bannink - 8 years ago 2017-03-06 15:34:02
tombannink@gmail.com
Add computation of degree-sequence-property and more
5 files changed with 194 insertions and 128 deletions:
0 comments (0 inline, 0 general)
cpp/showgraphs.m
Show inline comments
 
deleted file
cpp/switchchain.cpp
Show inline comments
 
@@ -76,7 +76,7 @@ int main() {
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.2f, 2.5f, 2.8f};
 
    float tauValues[] = {2.2f, 2.35f, 2.5f, 2.65f, 2.8f};
 

	
 
    Graph g;
 

	
 
@@ -84,16 +84,16 @@ int main() {
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 4000; numVertices <= 8000; numVertices += 1000) {
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 2, numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution
 
            for (int degreeSample = 0; degreeSample < 200; ++degreeSample) {
 
            for (int degreeSample = 0; degreeSample < 500; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
@@ -118,10 +118,10 @@ int main() {
 
                          << numVertices << ", tau = " << tau << ". \t"
 
                          << std::flush;
 

	
 
                constexpr int mixingTime = 20000;
 
                constexpr int measureTime = 10000;
 
                constexpr int mixingTime = 30000;
 
                constexpr int measureTime = 20000;
 
                constexpr int measureSkip =
 
                    100; // Take a sample every 100 steps
 
                    200; // Take a sample every ... steps
 
                constexpr int measurements =
 
                    (measureTime - 1) / measureSkip + 1;
 
                int movesDone = 0;
power_law_sampling_0706.1062.pdf
Show inline comments
 
file renamed from cpp/power_law_sampling_0706.1062.pdf to power_law_sampling_0706.1062.pdf
showgraphs.m
Show inline comments
 
new file 100644
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*TODO*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Use different starting point for switch chain that is closer to uniform:*)
 
(*   Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*)
 
(*   (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(**)
 
(*- Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them*)
 
(*   (b) Better direct triangle counting? (I doubt it)*)
 
(*   (b') Better triangle counting by only keeping track of CHANGES in #triangles*)
 
(*   (c) Do not choose the three permutations with 1/3 probability: choose the "staying" one with a very low probability. Should still be a valid switch chain?*)
 
(**)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Done*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Data import and data merge*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/min_deg_two/graphdata_merged.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 

	
 

	
 
newData=Import[NotebookDirectory[]<>"graphdata_tau_multi4.m"];
 
mergedData=Import[NotebookDirectory[]<>"graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #trianges vs some degree-sequence-property*)
 

	
 

	
 
getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]},
 
ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *)
 
(* The next table contains unneeded entries, but its faster to have a square table for the sum *)
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}];
 
Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i<j<k !!! *)
 
(* This sparser table is slower
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n-1},{j,i+1,n}];
 
(* tmp[[a,b]] is now with  ds[[a]]*ds[[a+b]] *)
 
Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 
*)
 
];
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,4,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gsraw[[-numPlots;;-1]];
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
skipPts=Max[1,Round[maxTime/100]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot average #triangles vs n*)
 

	
 

	
 
averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw];
 
(* averages=SortBy[averages,#[[1,1]]&]; (* Sort by n *) *)
 
averagesGrouped=GatherBy[averages,{#[[1,2]]&,#[[1,1]]&}]; (* Split by n,tau *)
 
(* averagesGrouped[[ tau index, n index, run index , {ntau, avgtri} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]];
 
averagesErrorBars=Map[
 
{{#[[1,1,1]],Mean[#[[All,2]]]},
 
ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]]
 
}&,averagesGrouped,{2}];
 

	
 

	
 
ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels]
 

	
 

	
 
ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels]
 

	
 

	
 
averagesErrorBars=Map[
 
{{Log[#[[1,1,1]]],Log[Mean[#[[All,2]]]]},
 
ErrorBar[ (StandardDeviation[#[[All,2]]] )]
 
}&,averagesGrouped,{2}];
 

	
 

	
 
(* ::Subsection:: *)
 
(*Fitting the log-log-plot*)
 

	
 

	
 
loglogdata=Log[averagesErrorBars[[All,All,1]]];
 
fits=Map[Fit[#,{1,logn},logn]&,loglogdata];
 

	
 

	
 
Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels],Plot[fits,{logn,1,2000}]]
 

	
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponents=Transpose[{tauValues,fits[[All,2,1]]}];
 
Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
triangle_counting_algorithms.pdf
Show inline comments
 
file renamed from cpp/triangle_counting_algorithms.pdf to triangle_counting_algorithms.pdf
0 comments (0 inline, 0 general)