Changeset - 530154e12814
[Not reviewed]
0 3 0
Tom Bannink - 8 years ago 2017-08-29 17:51:39
tom.bannink@cwi.nl
Add Histogram class
3 files changed with 140 insertions and 37 deletions:
0 comments (0 inline, 0 general)
cpp/exports.hpp
Show inline comments
 
#pragma once
 
#include "graph.hpp"
 
#include "histogram.hpp"
 
#include <array>
 
#include <iostream>
 
#include <vector>
 
@@ -64,3 +65,9 @@ template <typename T, std::size_t N>
 
std::ostream& operator<<(std::ostream& s, const std::array<T, N>& v) {
 
    return output_array(s, std::begin(v), std::end(v));
 
}
 

	
 
std::ostream &operator<<(std::ostream &s, const Histogram &h) {
 
    s << h.getHistogram();
 
    return s;
 
}
 

	
cpp/switchchain_canonical_mixingtime.cpp
Show inline comments
 
@@ -2,6 +2,7 @@
 
#include "graph.hpp"
 
#include "graph_powerlaw.hpp"
 
#include "graph_spectrum.hpp"
 
#include "histogram.hpp"
 
#include "switchchain.hpp"
 
#include <algorithm>
 
#include <fstream>
 
@@ -12,19 +13,21 @@
 

	
 
int main(int argc, char* argv[]) {
 
    // Simulation parameters
 
    const int numVerticesMin = 200;
 
    const int numVerticesMax = 1000;
 
    const int numVerticesStep = 200;
 
    const int numVerticesMin = 2000;
 
    const int numVerticesMax = 5000;
 
    const int numVerticesStep = 1000;
 

	
 
    float tauValues[] = {2.1f, 2.3f, 2.5f, 2.7f, 2.9f};
 

	
 
    const int sampleRuns = 100000;
 

	
 
    auto getMixingTime = [](int n, float tau) {
 
        return int(50.0f * (50.0f - 5.0f * (tau - 2.0f)) * n);
 
    };
 
    auto getMeasurements = [](int n, float tau) {
 
        (void)n;
 
        (void)tau;
 
        return 10000;
 
        return 100000;
 
    };
 
    auto getMeasureSkip = [](int n, float tau) {
 
        (void)tau;
 
@@ -48,17 +51,16 @@ int main(int argc, char* argv[]) {
 
            << " step " << numVerticesStep << std::endl;
 
    outfile << "tauValues: " << tauValues << std::endl;
 
    outfile << "Canonical degree sequence.\n";
 
    outfile << "max time: 20 n\n";
 
    outfile << "samples per time: 1000\n";
 
    outfile << "time skip: n\n";
 
    outfile << "sample runs: " << sampleRuns << std::endl;
 
    outfile << "time stamps: {0.1 n, 0.2 n, ... , 20.0 n}\n";
 
    outfile << "For uniform samples:\n";
 
    outfile << "mixingTime: 50 * (50 - 5 (tau - 2)) n\n";
 
    outfile << "measurements: 10000\n";
 
    outfile << "measurements: 100000\n";
 
    outfile << "measureSkip: 30 n\n";
 
    outfile << "data:\n";
 
    outfile << "1: {n,tau}\n";
 
    outfile << "2: { {timestamp 1, {samples}}, {timestamp 2, {samples}} }\n";
 
    outfile << "3: {uniform samples}}\n";
 
    outfile << "2: { {timestamp 1, {histogram}}, {timestamp 2, {histogram}} }\n";
 
    outfile << "3: {uniform histogram}\n";
 
    outfile << "*)" << std::endl;
 

	
 
    // Mathematica does not accept normal scientific notation
 
@@ -100,19 +102,18 @@ int main(int argc, char* argv[]) {
 
            }
 
            outfile << '}';
 
#else
 
            std::vector<std::pair<int, std::vector<int>>> samples;
 
            for (int maxTime = numVertices; maxTime <= 20 * numVertices;
 
                 maxTime += numVertices) {
 
                samples.push_back(make_pair(maxTime, std::vector<int>()));
 
            std::vector<std::pair<int, Histogram>> samples;
 
            for (int i = 1; i <= 200; i++) {
 
                samples.push_back({(i * numVertices / 10), Histogram()});
 
            }
 

	
 
            for (int sample = 0; sample < 5000; ++sample) {
 
            for (int sample = 0; sample < 100000; ++sample) {
 
                chain.initialize(g, true);
 
                int curTime = 0;
 
                for (auto &piv : samples) {
 
                    for (; curTime < piv.first; ++curTime)
 
                        chain.doMove(true);
 
                    piv.second.push_back(chain.g.getTrackedTriangles());
 
                    piv.second.add(chain.g.getTrackedTriangles());
 
                }
 
            }
 
            outfile << ',' << samples;
 
@@ -128,11 +129,11 @@ int main(int argc, char* argv[]) {
 
            chain.g.getTrackedTriangles() = chain.g.countTriangles();
 
            int measurements = getMeasurements(numVertices, tau);
 
            int measureSkip = getMeasureSkip(numVertices, tau);
 
            std::vector<int> usamples;
 
            Histogram usamples;
 
            for (int i = 0; i < measurements; ++i) {
 
                for (int j = 0; j < measureSkip; ++j)
 
                    chain.doMove(true);
 
                usamples.push_back(chain.g.getTrackedTriangles());
 
                usamples.add(chain.g.getTrackedTriangles());
 
            }
 
            outfile << ',' << usamples;
 
            outfile << '}' << std::flush;
triangle_canonical_mixingtime.m
Show inline comments
 
(* ::Package:: *)
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_canonical_mixingtime.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_canonical_mixingtime_histogram.m"];
 
gdata=GatherBy[gsraw,{#[[1,2]]&}];
 
(* Data format: *)
 
(* gdata[[ tau index, n index, datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: { {time1, {samples1}}, {time2, {samples2}} , ... }
 
2: { {time1, {samples1}}, {time2, {samples2}} , ... }  samples can also be a HistogramList kind of list
 
3: {uniform samples}
 
*)
 

	
 

	
 
choices={1,2,3,6,8,12,14};
 
(*Histogram[histogramdata[[All,2]],Automatic,"Probability",ChartLegends\[Rule]histogramdata[[All,1]]]*)
 

	
 
makeHistogram[run_,choices_]:=Module[{gridsize,labels},
 
gridsize=Max[0.5,Mean[run[[3]]]/200];
 
labels="t = "<>ToString[#/run[[1,1]]]<>"\[CenterDot]n"&/@run[[2,choices,1]];
 
makeHistogram[run_,plotrange_,choices_]:=Module[{gridsize,labels,wd,uni},
 
uni=WeightedData[run[[3,All,1]],run[[3,All,2]]];(* Only in case of histogram-type file input *)
 
gridsize=Max[1,Mean[uni]/100];
 
labels="t = "<>ToString[NumberForm[N[#/run[[1,1]]],{3,1}]]<>" n"&/@run[[2,choices,1]];
 
wd=run[[2,choices,2]];
 
wd=Map[WeightedData[#[[All,1]],#[[All,2]]]&,wd];(* Only in case of histogram-type file input *)
 
Show[
 
SmoothHistogram[run[[2,choices,2]],gridsize,PlotRange->{All,Automatic},PlotLegends->labels,PlotLabel->"n = "<>ToString[run[[1,1]]],ImageSize->500],
 
SmoothHistogram[run[[3]],gridsize,PlotLegends->{"uniform"},PlotStyle->{Thick,Black}]]]
 
SmoothHistogram[wd,gridsize,
 
PlotRange->plotrange,
 
PlotLegends->Placed[labels,Scaled[{0.7,0.7}]],
 
Epilog->Text["n = "<>ToString[run[[1,1]]]<>", \[Tau] = "<>ToString[run[[1,2]]],Scaled[{0.15,0.95}]],
 
ImageSize->300,
 
Frame->True,
 
FrameLabel->{"triangles","probability"}
 
],
 
SmoothHistogram[uni,gridsize,PlotRange->plotrange,PlotLegends->Placed[{"uniform"},Scaled[{0.7,0.7}]],PlotStyle->{Thick,Black}]]]
 

	
 
makeHistogram[gdata[[1,-1]],choices]
 
makeHistogram[gdata[[-1,-1]],choices]
 
normalize[h_]:=Transpose[{h[[All,1]],h[[All,2]]/Total[h[[All,2]]]}];
 

	
 
makeHistogram2[run_,plotrange_,choices_]:=Module[{gridsize,labels,uni,probs},
 
uni=WeightedData[run[[3,All,1]],run[[3,All,2]]];(* Only in case of histogram-type file input *)
 
labels="t = "<>ToString[NumberForm[N[#/run[[1,1]]],{3,1}]]<>" n"&/@run[[2,choices,1]];
 
probs=run[[2,choices,2]];
 
probs=Map[normalize,probs];
 
Show[
 
ListPlot[probs,Joined->True,
 
PlotRange->plotrange,
 
PlotLegends->Placed[labels,Scaled[{0.7,0.7}]],
 
Epilog->Text["n = "<>ToString[run[[1,1]]]<>", \[Tau] = "<>ToString[run[[1,2]]],Scaled[{0.15,0.95}]],
 
ImageSize->300,
 
Frame->True,
 
FrameLabel->{"triangles","probability"}
 
],
 
ListPlot[normalize[run[[3]]],Joined->True,PlotRange->plotrange,PlotLegends->Placed[{"uniform"},Scaled[{0.7,0.7}]],PlotStyle->{Thick,Black}]]]
 

	
 
plot1=makeHistogram[gdata[[1,-1]],{{3500,7800},{0,0.005}},{10,50,100}]
 
plot2=makeHistogram2[gdata[[3,-1]],{{50,300},{0,0.04}},{10,20,30}]
 
plot3=makeHistogram2[gdata[[5,-1]],{{0,70},{0,0.18}},{5,10,20}]
 

	
 

	
 
Export[NotebookDirectory[]<>"plots/triangle_distributions_over_time.pdf",plot2]
 

	
 

	
 
(* ::Section:: *)
 
(*Total variation distance*)
 

	
 

	
 
(* Get some sort of total variation distance between to sets of samples *)
 
getTVDistance[samples1_,samples2_]:=Module[{max,probs1,probs2},
 
getTVDistance1[samples1_,samples2_]:=Module[{max,probs1,probs2,binsize},
 
max=Max[Max[samples1],Max[samples2]];
 
probs1=BinCounts[samples1,{0,max+1,1}];
 
binsize=Max[1,Floor[Mean[samples2]/500]];
 
binsize=1;
 
probs1=BinCounts[samples1,{0,max+binsize,binsize}];
 
probs1=probs1/Total[probs1];
 
probs2=BinCounts[samples2,{0,max+1,1}];
 
probs2=BinCounts[samples2,{0,max+binsize,binsize}];
 
probs2=probs2/Total[probs2];
 
Total[Abs[probs1-probs2]]/2
 
]
 

	
 

	
 
getRatios[run_]:=Module[{avg,sd,scalefactor},
 
(* Get some sort of total variation distance between to HistogramList-like objects *)
 
getTVDistance2[hist1_,hist2_]:=Module[{m,M,probs,probs1,probs2},
 
m=Min[hist1[[1,1]],hist2[[1,1]]];
 
M=Max[hist1[[-1,1]],hist2[[-1,1]]];
 
probs=ConstantArray[0,M-m+1];
 
probs1=hist1[[All,2]];
 
probs1=probs1/Total[probs1];
 
probs2=hist2[[All,2]];
 
probs2=probs2/Total[probs2];
 
probs[[1+hist1[[1,1]]-m;;1+hist1[[-1,1]]-m]]+=probs1;
 
probs[[1+hist2[[1,1]]-m;;1+hist2[[-1,1]]-m]]-=probs2;
 
Total[Abs[probs]]/2
 
]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plots*)
 

	
 

	
 
(*getRatios[run_]:=Module[{avg,sd,scalefactor},
 
avg=Mean[run[[3]]];
 
sd=StandardDeviation[run[[3]]];
 
scalefactor=1/(run[[1,1]]*Log[run[[1,1]]]^0);
 
scalefactor=1/(run[[1,1]]*Log[2,run[[1,1]]]^(1*(4-run[[1,2]])));
 
{"n,tau = "<>ToString[run[[1]]],
 
Map[{#[[1]]*scalefactor,Mean[#[[2]]]/avg}&,run[[2]]],
 
Map[{#[[1]]*scalefactor,(Mean[#[[2]]]-avg)/sd}&,run[[2]]],
 
Map[{#[[1]]*scalefactor,getTVDistance[#[[2]],run[[3]]]}&,run[[2]]]
 
}
 
]
 
ratios=Map[getRatios,gdata,{2}];
 
(*Map[ListPlot[#[[All,2]],Joined->True,PlotMarkers->Automatic,PlotRange->(1+{-0.15,+0.15}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]*)
 
(*Map[ListPlot[#[[All,3]],Joined->True,PlotMarkers->Automatic,PlotRange->(0+{-1,+1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]*)
 
*)
 
getTVDs[run_]:=Module[{scalefactor},
 
scalefactor=1/(run[[1,1]]*Log[2,run[[1,1]]]^(1*(4-run[[1,2]])));
 
{"\[Tau] = "<>ToString[run[[1,2]]],
 
Map[{#[[1]]*scalefactor,getTVDistance2[#[[2]],run[[3]]]}&,run[[2]]]}
 
]
 
TVDs=Map[getTVDs,gdata,{2}];
 

	
 

	
 
Map[Show[ListPlot[#[[All,2]],
 
Joined->True,(*PlotMarkers->Automatic,*)
 
PlotLegends->#[[All,1]],ImageSize->500,
 
PlotRange->{0,1},Frame->True,FrameLabel->{"steps / (n (\!\(\*SubscriptBox[\(log\), \(2\)]\)n\!\(\*SuperscriptBox[\()\), \(4 - \[Tau]\)]\))","TV distance"}],
 
Plot[{0.1,0.05},{x,0,20},PlotStyle->Directive[Black,Dashed]]
 
]&,TVDs]
 

	
 

	
 
Map[ListPlot[#[[All,2]],Joined->True,PlotMarkers->Automatic,PlotRange->(1+{-0.15,+0.15}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]
 
Map[ListPlot[#[[All,3]],Joined->True,PlotMarkers->Automatic,PlotRange->(0+{-1,+1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]
 
Map[ListPlot[#[[All,4]],Joined->True,PlotMarkers->Automatic,PlotRange->({0,1}),PlotLegends->#[[All,1]],ImageSize->300]&,ratios]
 
(* ::Subsection:: *)
 
(*Mixing time from TV distance*)
 

	
 

	
 
getMixingTime[run_]:=Module[{i=1,timestamp=0,prevtimestamp=0},
 
While[i<=Length[run[[2]]],
 
    timestamp=run[[2,i,1]];
 
    If[getTVDistance2[run[[2,i,2]],run[[3]]]<=0.1,
 
        Break[];
 
    ];
 
    prevtimestamp=timestamp;
 
    i++;
 
];
 
{run[[1,1]],run[[1,2]],prevtimestamp, timestamp} (* Range in which mixingtime lies *)
 
]
 
mixingtimes=Map[getMixingTime,gdata,{2}];
 

	
 

	
 
scaling[n_,tau_]:=n*Log[n]^(4-tau);
 
scaling[n_,tau_]:=n;
 
plotData=Map[{#[[1]],#[[4]]/scaling[#[[1]],#[[2]]]}&,mixingtimes,{2}]~Join~Map[{#[[1]],#[[3]]/scaling[#[[1]],#[[2]]]}&,mixingtimes,{2}];
 
fillings={1->{6},2->{7},3->{8},4->{9},5->{10}};
 
taulabels=Map["\[Tau] = "<>ToString[#[[1,2]]]&,mixingtimes];
 
ListPlot[plotData,
 
Joined->True,PlotMarkers->Automatic,ImageSize->500,
 
PlotRange->All,
 
PlotLegends->taulabels,
 
Filling->fillings,PlotStyle->{Automatic,Automatic,Automatic,Automatic,Automatic,None,None,None,None,None},
 
Frame->True,FrameLabel->{"n","ETMT / n"}]
0 comments (0 inline, 0 general)