(* ::Package:: *) Needs["ErrorBarPlots`"] (* ::Section:: *) (*TODO*) (* ::Text:: *) (*- Triangle law exponent: gather more data*) (**) (*- Why does GCM-2 start with very low #triangles*) (* Do not only consider number of standard deviations but also relative number of triangles.*) (* Look at the following: for all triangles (v1, v2, v3) consider the degrees d1 ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*) (*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*) (**) (*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time. *) (**) (*Initial #triangles in both GCM1 and GCM2 is always below the average #triangles whereas Erdos-Gallai is usually many times higher than average.*) (* ::Section:: *) (*Data import*) gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"]; (* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *) gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}]; (* Data format: *) (* gdata[[ tau index, n index, run index , datatype index ]] *) (* datatype index: 1: {n,tau} 2: #triangles time sequence 3: degree sequence 4: GCM1 starting triangle counts 5: GCM2 starting triangle counts 6: GCM1 time sequence 7: GCM2 time sequence *) nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]]; taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]]; (* ::Subsection:: *) (*Merge data*) newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"]; mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"]; Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]] (* ::Section:: *) (*Plot triangle counts*) (* ::Subsection:: *) (*Plot empirical distribution of maximum degree*) maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw]; maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}]; (* maxDegrees[[ tau index, n index, run index, ntau or dmax ]] *) Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] Histogram[maxDegrees[[4,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] Histogram[maxDegrees[[-1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}] (* ::Subsection:: *) (*Plot #trianges vs some degree-sequence-property*) getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]}, ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *) (* The next table contains unneeded entries, but its faster to have a square table for the sum *) tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}]; Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]] (* ::Subsection:: *) (*Plot triangle count over "time" in Markov chain instances*) numPlots=20; selectedData=gdata[[4,-1]][[-numPlots;;-1]]; measureSkip=1; minCount=Min[Map[Min[#[[2]]]&,selectedData]]; maxCount=Max[Map[Max[#[[2]]]&,selectedData]]; maxTime=Max[Map[Length[#[[2]]]&,selectedData]]; skipPts=Max[1,Round[maxTime/200]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *) coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData]; labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData]; ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels] (* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *) (* ::Subsection:: *) (*Compute 'mixing time'*) (* Compute average of last part and check when the value drops below that for the first time *) getMixingTime[values_]:=Module[{avg}, (* average over the last 20 percent *) avg=Mean[values[[-Round[Length[values]/5];;-1]]]; FirstPosition[values,_?(#<=avg&)][[1]] ] (* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *) measureSkip=1; mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}]; mixingTimesBars=Map[ {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}& ,mixingTimes,{2}]; ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels] (* For n fixed, look at function of tau *) measureSkip=1; mixingTimes=Map[{#[[1,2]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}]; mixingTimesBars=Map[ {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}& ,mixingTimes[[All,-1]],{1}]; Show[ ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}] ,Plot[(32-20(tau-2)),{tau,2,3}]] (* ::Subsection:: *) (*Plot #triangles distribution for specific (n,tau)*) plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.32.7}},Automatic] histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}]; (* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *) TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}] (* ::Section:: *) (*Greedy configuration model*) (* ::Subsection:: *) (*Distribution of initial #triangles for GCM1,GCM2,EG compared to average #triangles.*) (* Data format: *) (* gdata[[ tau index, n index, run index , datatype index ]] *) (* datatype index: 1: {n,tau} 2: #triangles time sequence 3: degree sequence 4: GCM1 starting triangle counts 5: GCM2 starting triangle counts 6: GCM1 time sequence 7: GCM2 time sequence *) (* Stats for a single run at every (n,tau) *) timeWindow=Round[Length[gdata[[1,1,1,2]]]/10]; getSingleStats[runs_]:=Module[{run,avg,stddev}, run=runs[[1]]; (* Select some run *) avg=N[Mean[run[[2,-timeWindow;;-1]]]]; stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]]; {run[[1]], (* {n,tau} *) stddev/avg, (run[[2,1]])/avg, (* EG starting point *) Map[N[#/avg]&,run[[4]]], (* GCM1 counts *) Map[N[#/avg]&,run[[5]]] (* GCM2 counts *) } ] singleStats=Map[getSingleStats,gdata,{2}]; (* Yellow: GCM1 (take new highest everytime *) (* Blue: GCM2 (finish highest first, more similar to EG) *) histogramsSingle=Map[Histogram[{#[[4]],#[[5]]},PlotRange->{{0,5},Automatic},ImageSize->300,PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[2]],3]]<>"\[Cross]average"]&,singleStats,{2}]; TableForm[histogramsSingle,TableHeadings->{taulabels,nlabels}] (* Consider all runs *) timeWindow=Round[Length[gdata[[1,1,1,2]]]/10]; getAverage[run_]:=Module[{avg,stddev}, avg=N[Mean[run[[2,-timeWindow;;-1]]]]; { Mean[run[[4]]]/avg,(* GCM1 counts *) Mean[run[[5]]]/avg, (* GCM2 counts *) (run[[2,1]])/avg (* EG starting point *) } ] getTotalStats[runs_]:=Transpose[Map[getAverage,runs]]; totalStats=Map[getTotalStats,gdata,{2}]; (* Yellow: GCM1 (take new highest everytime *) (* Blue: GCM2 (finish\[AliasDelimiter] highest first, more similar to EG) *) histogramsTotal=Map[Histogram[#,{0.1},PlotRange->{{0,5},Automatic},ImageSize->300]&,totalStats,{2}]; TableForm[histogramsTotal,TableHeadings->{taulabels,nlabels}] (* ::Subsection:: *) (*GCM1 vs GCM2 success rates*) (* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *) successrates=Map[{Length[#[[4]]],Length[#[[5]]]}&,gdata,{3}]; successrates=Map[Transpose,successrates,{2}]; successratesDelta=Map[Length[#[[5]]]-Length[#[[4]]]&,gdata,{3}]; rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}]; TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}] rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}]; TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}] (*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*) (* ::Subsection:: *) (*Compare success rate with mixing time*) successrates2=Map[{Length[#[[4]]],Length[#[[5]]],getMixingTime[#[[2]]]}&,gdata,{3}]; (* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *) scatterPlots=Map[ListPlot[#[[All,{1,3}]],PlotRange->{All,All},PlotStyle->PointSize[Large]]&,successrates2,{2}]; TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]