diff --git a/cpp/exports.hpp b/cpp/exports.hpp index 261f9fda3374aeb1ea5e43aabaebbefda9cedfcb..6172e831d63aff837e72b386cfd771630ac3818f 100644 --- a/cpp/exports.hpp +++ b/cpp/exports.hpp @@ -1,5 +1,6 @@ #pragma once #include "graph.hpp" +#include "histogram.hpp" #include #include #include @@ -64,3 +65,9 @@ template std::ostream& operator<<(std::ostream& s, const std::array& v) { return output_array(s, std::begin(v), std::end(v)); } + +std::ostream &operator<<(std::ostream &s, const Histogram &h) { + s << h.getHistogram(); + return s; +} + diff --git a/cpp/switchchain_canonical_mixingtime.cpp b/cpp/switchchain_canonical_mixingtime.cpp index cfd5fe08e37e6db0bba97084220234f33facc37b..77d47dd39c6ae922e1b1a0b9f80c7e50052101a0 100644 --- a/cpp/switchchain_canonical_mixingtime.cpp +++ b/cpp/switchchain_canonical_mixingtime.cpp @@ -2,6 +2,7 @@ #include "graph.hpp" #include "graph_powerlaw.hpp" #include "graph_spectrum.hpp" +#include "histogram.hpp" #include "switchchain.hpp" #include #include @@ -12,19 +13,21 @@ int main(int argc, char* argv[]) { // Simulation parameters - const int numVerticesMin = 200; - const int numVerticesMax = 1000; - const int numVerticesStep = 200; + const int numVerticesMin = 2000; + const int numVerticesMax = 5000; + const int numVerticesStep = 1000; float tauValues[] = {2.1f, 2.3f, 2.5f, 2.7f, 2.9f}; + const int sampleRuns = 100000; + auto getMixingTime = [](int n, float tau) { return int(50.0f * (50.0f - 5.0f * (tau - 2.0f)) * n); }; auto getMeasurements = [](int n, float tau) { (void)n; (void)tau; - return 10000; + return 100000; }; auto getMeasureSkip = [](int n, float tau) { (void)tau; @@ -48,17 +51,16 @@ int main(int argc, char* argv[]) { << " step " << numVerticesStep << std::endl; outfile << "tauValues: " << tauValues << std::endl; outfile << "Canonical degree sequence.\n"; - outfile << "max time: 20 n\n"; - outfile << "samples per time: 1000\n"; - outfile << "time skip: n\n"; + outfile << "sample runs: " << sampleRuns << std::endl; + outfile << "time stamps: {0.1 n, 0.2 n, ... , 20.0 n}\n"; outfile << "For uniform samples:\n"; outfile << "mixingTime: 50 * (50 - 5 (tau - 2)) n\n"; - outfile << "measurements: 10000\n"; + outfile << "measurements: 100000\n"; outfile << "measureSkip: 30 n\n"; outfile << "data:\n"; outfile << "1: {n,tau}\n"; - outfile << "2: { {timestamp 1, {samples}}, {timestamp 2, {samples}} }\n"; - outfile << "3: {uniform samples}}\n"; + outfile << "2: { {timestamp 1, {histogram}}, {timestamp 2, {histogram}} }\n"; + outfile << "3: {uniform histogram}\n"; outfile << "*)" << std::endl; // Mathematica does not accept normal scientific notation @@ -100,19 +102,18 @@ int main(int argc, char* argv[]) { } outfile << '}'; #else - std::vector>> samples; - for (int maxTime = numVertices; maxTime <= 20 * numVertices; - maxTime += numVertices) { - samples.push_back(make_pair(maxTime, std::vector())); + std::vector> samples; + for (int i = 1; i <= 200; i++) { + samples.push_back({(i * numVertices / 10), Histogram()}); } - for (int sample = 0; sample < 5000; ++sample) { + for (int sample = 0; sample < 100000; ++sample) { chain.initialize(g, true); int curTime = 0; for (auto &piv : samples) { for (; curTime < piv.first; ++curTime) chain.doMove(true); - piv.second.push_back(chain.g.getTrackedTriangles()); + piv.second.add(chain.g.getTrackedTriangles()); } } outfile << ',' << samples; @@ -128,11 +129,11 @@ int main(int argc, char* argv[]) { chain.g.getTrackedTriangles() = chain.g.countTriangles(); int measurements = getMeasurements(numVertices, tau); int measureSkip = getMeasureSkip(numVertices, tau); - std::vector usamples; + Histogram usamples; for (int i = 0; i < measurements; ++i) { for (int j = 0; j < measureSkip; ++j) chain.doMove(true); - usamples.push_back(chain.g.getTrackedTriangles()); + usamples.add(chain.g.getTrackedTriangles()); } outfile << ',' << usamples; outfile << '}' << std::flush; diff --git a/triangle_canonical_mixingtime.m b/triangle_canonical_mixingtime.m index cb8ff6ad2e8391563f88f11de4691def6141c1e9..08f52fab664f79e78f60d1b5a0117648398ddca5 100644 --- a/triangle_canonical_mixingtime.m +++ b/triangle_canonical_mixingtime.m @@ -1,57 +1,152 @@ (* ::Package:: *) -gsraw=Import[NotebookDirectory[]<>"data/graphdata_canonical_mixingtime.m"]; +gsraw=Import[NotebookDirectory[]<>"data/graphdata_canonical_mixingtime_histogram.m"]; gdata=GatherBy[gsraw,{#[[1,2]]&}]; (* Data format: *) (* gdata[[ tau index, n index, datatype index ]] *) (* datatype index: 1: {n,tau} -2: { {time1, {samples1}}, {time2, {samples2}} , ... } +2: { {time1, {samples1}}, {time2, {samples2}} , ... } samples can also be a HistogramList kind of list 3: {uniform samples} *) -choices={1,2,3,6,8,12,14}; (*Histogram[histogramdata[[All,2]],Automatic,"Probability",ChartLegends\[Rule]histogramdata[[All,1]]]*) -makeHistogram[run_,choices_]:=Module[{gridsize,labels}, -gridsize=Max[0.5,Mean[run[[3]]]/200]; -labels="t = "<>ToString[#/run[[1,1]]]<>"\[CenterDot]n"&/@run[[2,choices,1]]; +makeHistogram[run_,plotrange_,choices_]:=Module[{gridsize,labels,wd,uni}, +uni=WeightedData[run[[3,All,1]],run[[3,All,2]]];(* Only in case of histogram-type file input *) +gridsize=Max[1,Mean[uni]/100]; +labels="t = "<>ToString[NumberForm[N[#/run[[1,1]]],{3,1}]]<>" n"&/@run[[2,choices,1]]; +wd=run[[2,choices,2]]; +wd=Map[WeightedData[#[[All,1]],#[[All,2]]]&,wd];(* Only in case of histogram-type file input *) Show[ -SmoothHistogram[run[[2,choices,2]],gridsize,PlotRange->{All,Automatic},PlotLegends->labels,PlotLabel->"n = "<>ToString[run[[1,1]]],ImageSize->500], -SmoothHistogram[run[[3]],gridsize,PlotLegends->{"uniform"},PlotStyle->{Thick,Black}]]] +SmoothHistogram[wd,gridsize, +PlotRange->plotrange, +PlotLegends->Placed[labels,Scaled[{0.7,0.7}]], +Epilog->Text["n = "<>ToString[run[[1,1]]]<>", \[Tau] = "<>ToString[run[[1,2]]],Scaled[{0.15,0.95}]], +ImageSize->300, +Frame->True, +FrameLabel->{"triangles","probability"} +], +SmoothHistogram[uni,gridsize,PlotRange->plotrange,PlotLegends->Placed[{"uniform"},Scaled[{0.7,0.7}]],PlotStyle->{Thick,Black}]]] -makeHistogram[gdata[[1,-1]],choices] -makeHistogram[gdata[[-1,-1]],choices] +normalize[h_]:=Transpose[{h[[All,1]],h[[All,2]]/Total[h[[All,2]]]}]; + +makeHistogram2[run_,plotrange_,choices_]:=Module[{gridsize,labels,uni,probs}, +uni=WeightedData[run[[3,All,1]],run[[3,All,2]]];(* Only in case of histogram-type file input *) +labels="t = "<>ToString[NumberForm[N[#/run[[1,1]]],{3,1}]]<>" n"&/@run[[2,choices,1]]; +probs=run[[2,choices,2]]; +probs=Map[normalize,probs]; +Show[ +ListPlot[probs,Joined->True, +PlotRange->plotrange, +PlotLegends->Placed[labels,Scaled[{0.7,0.7}]], +Epilog->Text["n = "<>ToString[run[[1,1]]]<>", \[Tau] = "<>ToString[run[[1,2]]],Scaled[{0.15,0.95}]], +ImageSize->300, +Frame->True, +FrameLabel->{"triangles","probability"} +], +ListPlot[normalize[run[[3]]],Joined->True,PlotRange->plotrange,PlotLegends->Placed[{"uniform"},Scaled[{0.7,0.7}]],PlotStyle->{Thick,Black}]]] + +plot1=makeHistogram[gdata[[1,-1]],{{3500,7800},{0,0.005}},{10,50,100}] +plot2=makeHistogram2[gdata[[3,-1]],{{50,300},{0,0.04}},{10,20,30}] +plot3=makeHistogram2[gdata[[5,-1]],{{0,70},{0,0.18}},{5,10,20}] + + +Export[NotebookDirectory[]<>"plots/triangle_distributions_over_time.pdf",plot2] + + +(* ::Section:: *) +(*Total variation distance*) (* Get some sort of total variation distance between to sets of samples *) -getTVDistance[samples1_,samples2_]:=Module[{max,probs1,probs2}, +getTVDistance1[samples1_,samples2_]:=Module[{max,probs1,probs2,binsize}, max=Max[Max[samples1],Max[samples2]]; -probs1=BinCounts[samples1,{0,max+1,1}]; +binsize=Max[1,Floor[Mean[samples2]/500]]; +binsize=1; +probs1=BinCounts[samples1,{0,max+binsize,binsize}]; probs1=probs1/Total[probs1]; -probs2=BinCounts[samples2,{0,max+1,1}]; +probs2=BinCounts[samples2,{0,max+binsize,binsize}]; probs2=probs2/Total[probs2]; Total[Abs[probs1-probs2]]/2 ] -getRatios[run_]:=Module[{avg,sd,scalefactor}, +(* Get some sort of total variation distance between to HistogramList-like objects *) +getTVDistance2[hist1_,hist2_]:=Module[{m,M,probs,probs1,probs2}, +m=Min[hist1[[1,1]],hist2[[1,1]]]; +M=Max[hist1[[-1,1]],hist2[[-1,1]]]; +probs=ConstantArray[0,M-m+1]; +probs1=hist1[[All,2]]; +probs1=probs1/Total[probs1]; +probs2=hist2[[All,2]]; +probs2=probs2/Total[probs2]; +probs[[1+hist1[[1,1]]-m;;1+hist1[[-1,1]]-m]]+=probs1; +probs[[1+hist2[[1,1]]-m;;1+hist2[[-1,1]]-m]]-=probs2; +Total[Abs[probs]]/2 +] + + +(* ::Subsection:: *) +(*Plots*) + + +(*getRatios[run_]:=Module[{avg,sd,scalefactor}, avg=Mean[run[[3]]]; sd=StandardDeviation[run[[3]]]; -scalefactor=1/(run[[1,1]]*Log[run[[1,1]]]^0); +scalefactor=1/(run[[1,1]]*Log[2,run[[1,1]]]^(1*(4-run[[1,2]]))); {"n,tau = "<>ToString[run[[1]]], Map[{#[[1]]*scalefactor,Mean[#[[2]]]/avg}&,run[[2]]], Map[{#[[1]]*scalefactor,(Mean[#[[2]]]-avg)/sd}&,run[[2]]], -Map[{#[[1]]*scalefactor,getTVDistance[#[[2]],run[[3]]]}&,run[[2]]] } ] ratios=Map[getRatios,gdata,{2}]; +(*Map[ListPlot[#[[All,2]],Joined->True,PlotMarkers->Automatic,PlotRange->(1+{-0.15,+0.15}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]*) +(*Map[ListPlot[#[[All,3]],Joined->True,PlotMarkers->Automatic,PlotRange->(0+{-1,+1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]*) +*) +getTVDs[run_]:=Module[{scalefactor}, +scalefactor=1/(run[[1,1]]*Log[2,run[[1,1]]]^(1*(4-run[[1,2]]))); +{"\[Tau] = "<>ToString[run[[1,2]]], +Map[{#[[1]]*scalefactor,getTVDistance2[#[[2]],run[[3]]]}&,run[[2]]]} +] +TVDs=Map[getTVDs,gdata,{2}]; + + +Map[Show[ListPlot[#[[All,2]], +Joined->True,(*PlotMarkers->Automatic,*) +PlotLegends->#[[All,1]],ImageSize->500, +PlotRange->{0,1},Frame->True,FrameLabel->{"steps / (n (\!\(\*SubscriptBox[\(log\), \(2\)]\)n\!\(\*SuperscriptBox[\()\), \(4 - \[Tau]\)]\))","TV distance"}], +Plot[{0.1,0.05},{x,0,20},PlotStyle->Directive[Black,Dashed]] +]&,TVDs] -Map[ListPlot[#[[All,2]],Joined->True,PlotMarkers->Automatic,PlotRange->(1+{-0.15,+0.15}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios] -Map[ListPlot[#[[All,3]],Joined->True,PlotMarkers->Automatic,PlotRange->(0+{-1,+1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios] -Map[ListPlot[#[[All,4]],Joined->True,PlotMarkers->Automatic,PlotRange->({0,1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios] +(* ::Subsection:: *) +(*Mixing time from TV distance*) +getMixingTime[run_]:=Module[{i=1,timestamp=0,prevtimestamp=0}, +While[i<=Length[run[[2]]], + timestamp=run[[2,i,1]]; + If[getTVDistance2[run[[2,i,2]],run[[3]]]<=0.1, + Break[]; + ]; + prevtimestamp=timestamp; + i++; +]; +{run[[1,1]],run[[1,2]],prevtimestamp, timestamp} (* Range in which mixingtime lies *) +] +mixingtimes=Map[getMixingTime,gdata,{2}]; + +scaling[n_,tau_]:=n*Log[n]^(4-tau); +scaling[n_,tau_]:=n; +plotData=Map[{#[[1]],#[[4]]/scaling[#[[1]],#[[2]]]}&,mixingtimes,{2}]~Join~Map[{#[[1]],#[[3]]/scaling[#[[1]],#[[2]]]}&,mixingtimes,{2}]; +fillings={1->{6},2->{7},3->{8},4->{9},5->{10}}; +taulabels=Map["\[Tau] = "<>ToString[#[[1,2]]]&,mixingtimes]; +ListPlot[plotData, +Joined->True,PlotMarkers->Automatic,ImageSize->500, +PlotRange->All, +PlotLegends->taulabels, +Filling->fillings,PlotStyle->{Automatic,Automatic,Automatic,Automatic,Automatic,None,None,None,None,None}, +Frame->True,FrameLabel->{"n","ETMT / n"}]