Changeset - 5cec3a409ef3
[Not reviewed]
0 1 2
Tom Bannink - 8 years ago 2017-06-12 17:26:59
tombannink@gmail.com
Add triangle creation frequency plots
3 files changed with 85 insertions and 0 deletions:
0 comments (0 inline, 0 general)
triangle_analysis.m
Show inline comments
 
@@ -100,192 +100,196 @@ mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[4,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[-1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #trianges vs some degree-sequence-property*)
 

	
 

	
 
getProperty[ds1_]:=Module[{ds,n=Length[ds1],tmp=ConstantArray[0,{Length[ds1],Length[ds1]}]},
 
ds=N[ds1/Sqrt[N[Total[ds1]]]]; (* scale degrees by 1/Sqrt[total] *)
 
(* The next table contains unneeded entries, but its faster to have a square table for the sum *)
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n},{j,1,n}];
 
Sum[tmp[[i,j]]*tmp[[j,k]]*tmp[[i,k]],{i,3,n},{j,2,i-1},{k,1,j-1}] (* somehow i>j>k is about 60x faster than doing i<j<k !!! *)
 
(* This sparser table is slower
 
tmp=Table[1.-Exp[-ds[[i]]ds[[j]]],{i,1,n-1},{j,i+1,n}];
 
(* tmp[[a,b]] is now with  ds[[a]]*ds[[a+b]] *)
 
Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 
*)
 
];
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,2,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gdata[[1,1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
(* maxTime=30000; *)
 
skipPts=Max[1,Round[maxTime/500]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;maxTime;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
selectedData=gdata[[1,1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
maxTime=30000;
 
skipPts=Max[1,Round[maxTime/100]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;maxTime;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
plot1=ListPlot[coarseData[[1;;5]],Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime}]
 
plot2=ListPlot[coarseData[[6;;10]],Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime}]
 
plot3=ListPlot[coarseData[[11;;15]],Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime}]
 
plot4=ListPlot[coarseData[[16;;20]],Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime}]
 

	
 

	
 
(* For export *)
 
numPlots=20;
 
selectedData=gdata[[2,1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
(* maxTime=30000; *)
 
skipPts=Max[1,Round[maxTime/5000]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;maxTime;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
plotTimeEvol=ListPlot[coarseData,Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime},Frame->True,FrameLabel->{"timesteps","number of triangles"},ImageSize->300]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
Export[NotebookDirectory[]<>"plots/timeevol.pdf",plotTimeEvol]
 

	
 

	
 
movingAvg=Map[MovingAverage[#[[2]],2000][[1;;-1;;skipPts]]-Mean[#[[2,-20000;;-1]]]&,selectedData[[1;;-1;;5]]];
 
plotMovingAvg=ListPlot[movingAvg,Joined->True,PlotRange->All,DataRange->{0,measureSkip*maxTime},Frame->True,FrameLabel->{"timesteps","number of triangles"}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot success rate over "time"*)
 

	
 

	
 
numPlots=10;
 
selectedData=gdata[[1,-1]][[-numPlots;;-1]];
 
measureSkip=100;
 
maxTime=Max[Map[Length[#[[4]]]&,selectedData]];
 
maxTime=10000;
 
coarseData=Map[#[[4,1;;maxTime/measureSkip]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{0,100},DataRange->{0,maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Correlation of avgsuccess rate vs other things*)
 

	
 

	
 
compare1=Map[{Mean[#[[4]]],Mean[#[[2]]]}&,gdata,{3}];
 
(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *)
 

	
 

	
 
scatterPlots=Map[ListPlot[#,PlotRange->{{0,100},All},PlotStyle->PointSize[Large]]&,compare1,{2}];
 
TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compute 'mixing time'*)
 

	
 

	
 
(* Compute average of last part and check when the value drops below that for the first time *)
 
getMixingTime[values_]:=Module[{avg},
 
    (* average over the last 20 percent *)
 
    avg=Mean[values[[-Round[Length[values]/5];;-1]]];
 
    FirstPosition[values,_?(#<=avg&)][[1]]
 
]
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,1]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]](*/Sqrt[Length[#]]*)]}&
 
,mixingTimes,{2}];
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","~~mixing time divided by n"},PlotLegends->taulabels]
 

	
 

	
 
(* For n fixed, look at function of tau *)
 
measureSkip=1;
 
mixingTimes=Map[{#[[1,2]],(1/#[[1,1]])measureSkip * getMixingTime[#[[2]]]}&,gdata,{3}];
 
mixingTimesBars=Map[
 
    {{#[[1,1]],Mean[#[[All,2]]]},ErrorBar[StandardDeviation[#[[All,2]]]]}&
 
,mixingTimes[[All,-1]],{1}];
 

	
 

	
 
Show[
 
ErrorListPlot[mixingTimesBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","~~mixing time divided by n, for n = 1000"},PlotRange->{{2,3},{0,30}}]
 
,Plot[(32-20(tau-2)),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot #triangles distribution for specific (n,tau)*)
 

	
 

	
 
plotRangeByTau[tau_]:=Piecewise[{{{0,30000},tau<2.3},{{0,4000},2.3<tau<2.7},{{0,800},tau>2.7}},Automatic]
 
histograms=Map[Histogram[#[[All,2]],PlotRange->{plotRangeByTau[#[[1,1,2]]],Automatic}]&,averagesGrouped,{2}];
 

	
 

	
 
(* TableForm[histograms,TableHeadings->{taulabels,nlabels}] *)
 
TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Section:: *)
 
(*Greedy configuration model*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Distribution of initial #triangles for GCM1,GCM2,EG compared to average #triangles.*)
 

	
 

	
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
3: degree sequence
 
4: GCM1 starting triangle counts
 
5: GCM2 starting triangle counts
 
6: GCM1 time sequence
 
7: GCM2 time sequence
 
*)
 

	
 

	
 
 (* Stats for a single run at every (n,tau) *)
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
skipPts=Max[1,Round[timeWindow/100]];
 
getSingleStats[runs_]:=Module[{run,avg,stddev},
triangle_creation_frequency_plots.m
Show inline comments
 
new file 100644
 
(* ::Package:: *)
 

	
 
Needs["ErrorBarPlots`"]
 

	
 

	
 
(* ::Section:: *)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_timeevol.m"];
 
(* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
*)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 

	
 
(* ::Section:: *)
 
(*Triangle creation frequencies*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gdata[[1,1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
(* maxTime=30000; *)
 
skipPts=Max[1,Round[maxTime/500]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;maxTime;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
differences=Map[Differences[#[[2,25000;;-1]]]&,gdata,{3}];
 

	
 

	
 
(* Take three runs of each *)
 
histograms=Map[Histogram[#[[{1,6,11,16}]],{-20,20,1},"LogCount",ImageSize->300]&,differences,{2}];
 

	
 

	
 
TableForm[histograms,TableHeadings->{taulabels,nlabels}]
triangle_spectrum_plots.m
Show inline comments
 
new file 100644
 
(* ::Package:: *)
 

	
 
gsraw=Import[NotebookDirectory[]<>"cpp/graphdata_spectrum.m"];
 
(* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: etmt
 
*)
 
tauvalues=gdata[[All,1,1,1,2]];
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["\[Tau] = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 

	
 
ListPlot[gdata[[1,1,1,{4,6}]]]
 
ListPlot[gdata[[1,1,1,{5,7}]]]
 

	
 

	
 
Histogram[gdata[[1,1,1,{4,6}]],50]
 
Histogram[gdata[[1,1,1,{5,7}]],50]
 

	
 

	
 

	
0 comments (0 inline, 0 general)