diff --git a/triangle_analysis.m b/triangle_analysis.m new file mode 100644 index 0000000000000000000000000000000000000000..a665023c7c105737590d4831948ff15431fb23b8 --- /dev/null +++ b/triangle_analysis.m @@ -0,0 +1,263 @@ +(* ::Package:: *) + +Needs["ErrorBarPlots`"] + + +(* ::Section:: *) +(*TODO*) + + +(* ::Text:: *) +(*- Triangle law exponent: gather more data*) +(**) +(*- Why does GCM-2 start with very low #triangles*) +(* Do not only consider number of standard deviations but also relative number of triangles.*) +(* Look at the following: for all triangles (v1, v2, v3) consider the degrees d1 ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*) +(*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*) +(**) +(*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time. *) +(**) +(*The initial #triangles in GCM2 is somewhere between 0 and 5 standard deviations below the average #triangles, whereas the #triangles in Erdos-Gallai can be as high as 100 standard deviations above the average.*) + + +(* ::Section:: *) +(*Visualize graphs*) + + +gsraw=Import[NotebookDirectory[]<>"graphdata.m"]; + + +ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}] + + +gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]]; +gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]]; + + +Grid[Partition[gs,10],Frame->All] + + +(* ::Section:: *) +(*Data import*) + + +gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"]; +gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *) + + +gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}]; +(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *) +nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]]; +taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]]; + + +(* ::Subsection:: *) +(*Merge data*) + + +newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"]; +mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"]; +Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]] + + +(* ::Section:: *) +(*Plot triangle counts*) + + +(* ::Subsection:: *) +(*Plot empirical distribution of maximum degree*) + + +maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw]; +maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}]; +(* maxDegrees[[ tau index, n index, run index, ntau or dmax ]] *) + + +Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] +Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] + + +(* ::Subsection:: *) +(*Plot #trianges vs some degree-sequence-property*) + + +getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]}, +ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *) +(* The next table contains unneeded entries, but its faster to have a square table for the sum *) +tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}]; +Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]] + + +(* ::Subsection:: *) +(*Plot triangle count over "time" in Markov chain instances*) + + +numPlots=20; +selectedData=gdata[[1,-1]][[-numPlots;;-1]]; +measureSkip=1; +minCount=Min[Map[Min[#[[2]]]&,selectedData]]; +maxCount=Max[Map[Max[#[[2]]]&,selectedData]]; +maxTime=Max[Map[Length[#[[2]]]&,selectedData]]; +skipPts=Max[1,Round[maxTime/200]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *) +coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData]; +labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData]; +ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels] +(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *) + + +(* ::Subsection:: *) +(*Compute 'mixing time'*) + + +(* Compute average of last part and check when the value drops below that for the first time *) +getMixingTime[values_]:=Module[{avg}, + (* average over the last 20 percent *) + avg=Mean[values[[-Round[Length[values]/5];;-1]]]; + FirstPosition[values,_?(#<=avg&)][[1]] +] +(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *) +measureSkip=1; +mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}]; +mixingTimesBars=Map[ + {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}& +,mixingTimes,{2}]; +ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels] + + +(* For n fixed, look at function of tau *) +measureSkip=1; +mixingTimes=Map[{#[[1,2]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}]; +mixingTimesBars=Map[ + {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}& +,mixingTimes[[All,-1]],{1}]; + + +Show[ +ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}] +,Plot[(32-20(tau-2)),{tau,2,3}]] + + +(* ::Subsection:: *) +(*Plot #triangles distribution for specific (n,tau)*) + + +plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.32.7}},Automatic] +histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}]; + + +(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *) +TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}] + + +(* ::Section:: *) +(*Greedy configuration model*) + + +(* ::Subsection:: *) +(*#triangles(GCM) distribution vs #triangles(SwitchChain)*) + + +timeWindow=Round[Length[gdata[[1,1,1,2]]]/10]; +getStats[run_]:=Module[{avg,stddev}, + avg=N[Mean[run[[2,-timeWindow;;-1]]]]; + stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]]; + {run[[1]],stddev/avg,(run[[2,1]])/avg,Map[N[#/avg]&,run[[4]]]} +] +stats=Map[getStats,gdata,{3}]; + + +histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,5},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}]; + + +TableForm[histograms,TableHeadings->{taulabels,nlabels}] + + +(* ::Subsection:: *) +(*Greedy CM success rates*) + + +(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *) +successrates=Map[{Length[#[[4]]],Length[#[[5]]]}&,gdata,{3}]; +successrates=Map[Transpose,successrates,{2}]; +successratesDelta=Map[Length[#[[5]]]-Length[#[[4]]]&,gdata,{3}]; + +rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}]; +TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}] + +rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}]; +TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}] +(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*) + + +(* ::Subsection:: *) +(*Compare success rate with mixing time*) + + +successrates2=Map[{Length[#[[4]]],Length[#[[5]]],getMixingTime[#[[2]]]}&,gdata,{3}]; +(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *) + + +scatterPlots=Map[ListPlot[#[[All,{1,3}]],PlotRange->{All,All},PlotStyle->PointSize[Large]]&,successrates2,{2}]; +TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]