diff --git a/cpp/showgraphs.m b/cpp/showgraphs.m deleted file mode 100644 index aacc73ca46ba52499d44a4f91317698f98955a93..0000000000000000000000000000000000000000 --- a/cpp/showgraphs.m +++ /dev/null @@ -1,121 +0,0 @@ -(* ::Package:: *) - -Needs["ErrorBarPlots`"] - - -(* ::Section:: *) -(*TODO*) - - -(* ::Text:: *) -(*- Compute Sum over i"graphdata.m"]; - - -ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}] - - -gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]]; -gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]]; - - -Grid[Partition[gs,10],Frame->All] - - -(* ::Section:: *) -(*Plot triangle counts*) - - -(* ::Subsection:: *) -(*Data import and data merge*) - - -gsraw=Import[NotebookDirectory[]<>"graphdata_merged.m"]; - - -newData=Import[NotebookDirectory[]<>"graphdata_tau_multi4.m"]; -mergedData=Import[NotebookDirectory[]<>"graphdata_merged.m"]; -Export[NotebookDirectory[]<>"graphdata_merged_new.m",Join[mergedData,newData]] - - -(* ::Subsection:: *) -(*Plot empirical distribution of maximum degree*) - - -maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw]; -maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}]; -(* maxDegrees[[ tau index, n index, run index, ntau or dmax ]] *) - - -Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] -Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] -Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] - - -(* ::Subsection:: *) -(*Plot triangle count over "time" in Markov chain instances*) - - -numPlots=15; -selectedData=gsraw[[-numPlots-1;;-1]]; -minCount=Min[Map[Min[#[[2]]]&,selectedData]]; -maxCount=Max[Map[Max[#[[2]]]&,selectedData]]; -maxTime=Max[Map[Length[#[[2]]]&,selectedData]]; -skipPts=Round[maxTime/100]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *) -coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData]; -labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData]; -ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,maxTime},PlotLegends->labels] -(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *) - - -(* ::Subsection:: *) -(*Plot average #triangles vs n*) - - -averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw]; -(* Sort by n *) -averages=SortBy[averages,#[[1,1]]&]; -(* Split by n,tau *) -averagesGrouped=GatherBy[averages,{#[[1,2]]&,#[[1,1]]&}]; -(* averagesGrouped[[ tau index, n index, run index , {ntau, tri, ds} ]] *) -nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; -taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; -averagesErrorBars=Map[ -{{#[[1,1,1]],Mean[#[[All,2]]]}, -ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]] -}&,averagesGrouped,{2}]; - - -ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels] - - -ListLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels] - - -(* ::Subsection:: *) -(*Plot #triangles distribution for specific (n,tau)*) - - -histograms=Map[Histogram[#[[All,2]]]&,averagesGrouped,{2}]; -nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; -taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; - - -TableForm[histograms,TableHeadings->{taulabels,nlabels}] diff --git a/cpp/switchchain.cpp b/cpp/switchchain.cpp index d46956ceedb2e54c40cc04157f92a297309c1731..442b4b280f94c9bd1a03a978aab5b9eac04f4295 100644 --- a/cpp/switchchain.cpp +++ b/cpp/switchchain.cpp @@ -76,7 +76,7 @@ int main() { // Expect: #tri = const * n^{ something } // The goal is to find the 'something' by finding the number of triangles // for different values of n and tau - float tauValues[] = {2.2f, 2.5f, 2.8f}; + float tauValues[] = {2.2f, 2.35f, 2.5f, 2.65f, 2.8f}; Graph g; @@ -84,16 +84,16 @@ int main() { outfile << '{'; bool outputComma = false; - for (int numVertices = 4000; numVertices <= 8000; numVertices += 1000) { + for (int numVertices = 200; numVertices <= 1000; numVertices += 100) { for (float tau : tauValues) { DegreeSequence ds(numVertices); - powerlaw_distribution degDist(tau, 2, numVertices); + powerlaw_distribution degDist(tau, 1, numVertices); //std::poisson_distribution<> degDist(12); // For a single n,tau take samples over several instances of // the degree distribution - for (int degreeSample = 0; degreeSample < 200; ++degreeSample) { + for (int degreeSample = 0; degreeSample < 500; ++degreeSample) { // Generate a graph // might require multiple tries for (int i = 1; ; ++i) { @@ -118,10 +118,10 @@ int main() { << numVertices << ", tau = " << tau << ". \t" << std::flush; - constexpr int mixingTime = 20000; - constexpr int measureTime = 10000; + constexpr int mixingTime = 30000; + constexpr int measureTime = 20000; constexpr int measureSkip = - 100; // Take a sample every 100 steps + 200; // Take a sample every ... steps constexpr int measurements = (measureTime - 1) / measureSkip + 1; int movesDone = 0; diff --git a/cpp/power_law_sampling_0706.1062.pdf b/power_law_sampling_0706.1062.pdf similarity index 100% rename from cpp/power_law_sampling_0706.1062.pdf rename to power_law_sampling_0706.1062.pdf diff --git a/showgraphs.m b/showgraphs.m new file mode 100644 index 0000000000000000000000000000000000000000..48127315c852e3851039ad2d997733725205fc14 --- /dev/null +++ b/showgraphs.m @@ -0,0 +1,187 @@ +(* ::Package:: *) + +Needs["ErrorBarPlots`"] + + +(* ::Section:: *) +(*TODO*) + + +(* ::Text:: *) +(*- Use different starting point for switch chain that is closer to uniform:*) +(* Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*) +(* (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*) +(* (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*) +(**) +(*- Improve runtime*) +(* (a) Don't remove/add edges from the std::vector. Simply replace them*) +(* (b) Better direct triangle counting? (I doubt it)*) +(* (b') Better triangle counting by only keeping track of CHANGES in #triangles*) +(* (c) Do not choose the three permutations with 1/3 probability: choose the "staying" one with a very low probability. Should still be a valid switch chain?*) +(**) +(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*) +(**) +(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*) + + +(* ::Subsection:: *) +(*Done*) + + +(* ::Text:: *) +(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*) +(**) +(*- Compute Sum over i"graphdata.m"]; + + +ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}] + + +gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]]; +gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]]; + + +Grid[Partition[gs,10],Frame->All] + + +(* ::Section:: *) +(*Plot triangle counts*) + + +(* ::Subsection:: *) +(*Data import and data merge*) + + +gsraw=Import[NotebookDirectory[]<>"data/min_deg_two/graphdata_merged.m"]; +gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *) + + +gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}]; +(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *) + + +newData=Import[NotebookDirectory[]<>"graphdata_tau_multi4.m"]; +mergedData=Import[NotebookDirectory[]<>"graphdata_merged.m"]; +Export[NotebookDirectory[]<>"graphdata_merged_new.m",Join[mergedData,newData]] + + +(* ::Subsection:: *) +(*Plot empirical distribution of maximum degree*) + + +maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw]; +maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}]; +(* maxDegrees[[ tau index, n index, run index, ntau or dmax ]] *) + + +Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] + + +(* ::Subsection:: *) +(*Plot #trianges vs some degree-sequence-property*) + + +getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]}, +ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *) +(* The next table contains unneeded entries, but its faster to have a square table for the sum *) +tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}]; +Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]] + + +(* ::Subsection:: *) +(*Plot triangle count over "time" in Markov chain instances*) + + +numPlots=20; +selectedData=gsraw[[-numPlots;;-1]]; +minCount=Min[Map[Min[#[[2]]]&,selectedData]]; +maxCount=Max[Map[Max[#[[2]]]&,selectedData]]; +maxTime=Max[Map[Length[#[[2]]]&,selectedData]]; +skipPts=Max[1,Round[maxTime/100]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *) +coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData]; +labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData]; +ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,maxTime},PlotLegends->labels] +(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *) + + +(* ::Subsection:: *) +(*Plot average #triangles vs n*) + + +averages=Map[{#[[1]],Mean[#[[2,1;;-1]]]}&,gsraw]; +(* averages=SortBy[averages,#[[1,1]]&]; (* Sort by n *) *) +averagesGrouped=GatherBy[averages,{#[[1,2]]&,#[[1,1]]&}]; (* Split by n,tau *) +(* averagesGrouped[[ tau index, n index, run index , {ntau, avgtri} ]] *) +nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; +taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; +averagesErrorBars=Map[ +{{#[[1,1,1]],Mean[#[[All,2]]]}, +ErrorBar[StandardDeviation[#[[All,2]]]/Sqrt[Length[#]]] +}&,averagesGrouped,{2}]; + + +ErrorListPlot[averagesErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels] + + +ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels] + + +averagesErrorBars=Map[ +{{Log[#[[1,1,1]]],Log[Mean[#[[All,2]]]]}, +ErrorBar[ (StandardDeviation[#[[All,2]]] )] +}&,averagesGrouped,{2}]; + + +(* ::Subsection:: *) +(*Fitting the log-log-plot*) + + +loglogdata=Log[averagesErrorBars[[All,All,1]]]; +fits=Map[Fit[#,{1,logn},logn]&,loglogdata]; + + +Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels],Plot[fits,{logn,1,2000}]] + + +tauValues=averagesGrouped[[All,1,1,1,2]]; +exponents=Transpose[{tauValues,fits[[All,2,1]]}]; +Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","triangle-law-exponent"}],Plot[3/2(3-tau),{tau,2,3}]] + + +(* ::Subsection:: *) +(*Plot #triangles distribution for specific (n,tau)*) + + +plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.32.7}},Automatic] +histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}]; +nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]]; +taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]]; + + +(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *) +TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}] diff --git a/cpp/triangle_counting_algorithms.pdf b/triangle_counting_algorithms.pdf similarity index 100% rename from cpp/triangle_counting_algorithms.pdf rename to triangle_counting_algorithms.pdf