Changeset - 9c99596eadb5
[Not reviewed]
0 1 0
Tom Bannink - 8 years ago 2017-05-15 16:54:24
tombannink@gmail.com
Add more plots of initial triangles in GCM
1 file changed with 70 insertions and 32 deletions:
0 comments (0 inline, 0 general)
triangle_analysis.m
Show inline comments
 
@@ -12,7 +12,7 @@ Needs["ErrorBarPlots`"]
 
(**)
 
(*- Why does GCM-2 start with very low #triangles*)
 
(*  Do not only consider number of standard deviations but also relative number of triangles.*)
 
(*  Look at the following: for all triangles (v1, v2, v3) consider the degrees d1<d2<d3) and make a scatter plot of di vs dj. Make such a scatter plot for the initial GCM-2 graph and for a mixed graph and see how it changes.*)
 
(*  Look at the following: for all triangles (v1, v2, v3) consider the degrees d1<d2<d3 and make a scatter plot of di vs dj. Make such a scatter plot for the initial GCM-2 graph and for a mixed graph and see how it changes.*)
 
(**)
 
(*- GCM success rates: for the degree sequences where it "always fails", look at the degree sequence. Does it have a low/high number of degree 1 nodes? Is the maximum degree very low/high?*)
 
(**)
 
@@ -20,7 +20,6 @@ Needs["ErrorBarPlots`"]
 
(*   (a) How close to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(*   (d) Time evolution for GCM on top of Erdos-Gallai time evolution.*)
 
(* TODO: Investigate #triangles not only in number of standard deviations but also percentage above/below average.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 
(**)
 
@@ -65,24 +64,7 @@ Needs["ErrorBarPlots`"]
 
(**)
 
(*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time.  *)
 
(**)
 
(*The initial #triangles in GCM2 is somewhere between 0 and 5 standard deviations below the average #triangles, whereas the #triangles in Erdos-Gallai can be as high as 100 standard deviations above the average.*)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 
(*Initial #triangles in both GCM1 and GCM2 is always below the average #triangles whereas Erdos-Gallai is usually many times higher than average.*)
 

	
 

	
 
(* ::Section:: *)
 
@@ -90,11 +72,21 @@ Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 
(* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
3: degree sequence
 
4: GCM1 starting triangle counts
 
5: GCM2 starting triangle counts
 
6: GCM1 time sequence
 
7: GCM2 time sequence
 
*)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 
@@ -122,8 +114,8 @@ maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[4,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[-1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 

	
 

	
 
(* ::Subsection:: *)
 
@@ -155,7 +147,7 @@ Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property
 

	
 

	
 
numPlots=20;
 
selectedData=gdata[[1,-1]][[-numPlots;;-1]];
 
selectedData=gdata[[4,-1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
@@ -216,26 +208,72 @@ TableForm[Transpose[histograms],TableHeadings->{nlabels,taulabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*#triangles(GCM) distribution vs #triangles(SwitchChain)*)
 
(*Distribution of initial #triangles for GCM1,GCM2,EG compared to average #triangles.*)
 

	
 

	
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
3: degree sequence
 
4: GCM1 starting triangle counts
 
5: GCM2 starting triangle counts
 
6: GCM1 time sequence
 
7: GCM2 time sequence
 
*)
 

	
 

	
 
 (* Stats for a single run at every (n,tau) *)
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
getStats[run_]:=Module[{avg,stddev},
 
getSingleStats[runs_]:=Module[{run,avg,stddev},
 
    run=runs[[1]]; (* Select some run *)
 
    avg=N[Mean[run[[2,-timeWindow;;-1]]]];
 
    stddev=N[StandardDeviation[run[[2,timeWindow;;-1]]]];
 
    {run[[1]],stddev/avg,(run[[2,1]])/avg,Map[N[#/avg]&,run[[4]]]}
 
    {run[[1]], (* {n,tau} *)
 
    stddev/avg,
 
    (run[[2,1]])/avg, (* EG starting point *)
 
    Map[N[#/avg]&,run[[4]]],  (* GCM1 counts *)
 
    Map[N[#/avg]&,run[[5]]] (* GCM2 counts *)
 
    }
 
]
 
singleStats=Map[getSingleStats,gdata,{2}];
 

	
 

	
 
(* Yellow: GCM1 (take new highest everytime *)
 
(* Blue: GCM2 (finish highest first, more similar to EG) *)
 
histogramsSingle=Map[Histogram[{#[[4]],#[[5]]},PlotRange->{{0,5},Automatic},ImageSize->300,PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[2]],3]]<>"\[Cross]average"]&,singleStats,{2}];
 

	
 

	
 
TableForm[histogramsSingle,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
 (* Consider all runs *)
 
timeWindow=Round[Length[gdata[[1,1,1,2]]]/10];
 
getAverage[run_]:=Module[{avg,stddev},
 
    avg=N[Mean[run[[2,-timeWindow;;-1]]]];
 
    {
 
    Mean[run[[4]]]/avg,(* GCM1 counts *)
 
    Mean[run[[5]]]/avg, (* GCM2 counts *)
 
    (run[[2,1]])/avg (* EG starting point *)
 
    }
 
]
 
stats=Map[getStats,gdata,{3}];
 
getTotalStats[runs_]:=Transpose[Map[getAverage,runs]];
 
totalStats=Map[getTotalStats,gdata,{2}];
 

	
 

	
 
(* Yellow: GCM1 (take new highest everytime *)
 
(* Blue: GCM2 (finish\[AliasDelimiter] highest first, more similar to EG) *)
 
histogramsTotal=Map[Histogram[#,{0.1},PlotRange->{{0,5},Automatic},ImageSize->300]&,totalStats,{2}];
 

	
 

	
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,5},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 
TableForm[histogramsTotal,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
TableForm[histograms,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Greedy CM success rates*)
 
(*GCM1 vs GCM2 success rates*)
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *)
0 comments (0 inline, 0 general)