Changeset - e65372736dde
[Not reviewed]
Merge
0 7 3
Tom Bannink - 8 years ago 2017-06-02 11:50:31
tom.bannink@cwi.nl
Merge branch 'master' of https://scm.cwi.nl/AC/switchchain
7 files changed with 105 insertions and 45 deletions:
0 comments (0 inline, 0 general)
cpp/Makefile
Show inline comments
 
#CXX=clang++
 

	
 
INCLUDES += -I.
 

	
 
CXXFLAGS += -std=c++14 -O3 -Wall -Wextra -Wfatal-errors -Wno-deprecated-declarations $(INCLUDES)
 
CXXFLAGS += -std=c++14 -O3 -Wall -Wextra -Wfatal-errors -Werror -pedantic -Wno-deprecated-declarations $(INCLUDES)
 

	
 

	
 
all: switchchain switchchain_exponent switchchain_initialtris
 

	
 

	
 
switchchain:
 

	
 

	
 
switchchain_exponent:
 

	
 

	
 
switchchain_initialtris:
cpp/switchchain.cpp
Show inline comments
 
@@ -64,42 +64,43 @@ class SwitchChain {
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
void getTriangleDegrees(const Graph& g) {
 
    std::vector<std::array<std::size_t,3>> trids;
 
    const auto& adj = g.getAdj();
 
    int triangles = 0;
 
    for (auto& v : adj) {
 
        for (unsigned int i = 0; i < v.size(); ++i) {
 
            for (unsigned int j = i + 1; j < v.size(); ++j) {
 
                if (g.hasEdge({v[i], v[j]})) {
 
                    ++triangles;
 
                    std::array<std::size_t, 3> ds = {v.size(), adj[v[i]].size(),
 
                                                     adj[v[j]].size()};
 
                    std::array<std::size_t, 3> ds = {{v.size(), adj[v[i]].size(),
 
                                                     adj[v[j]].size()}};
 
                    std::sort(ds.begin(), ds.end());
 
                    trids.push_back(ds);
 
                }
 
            }
 
        }
 
    }
 
    assert(triangles % 3 == 0);
 
    return;
 
}
 

	
 
//
 
// Assumes degree sequence does NOT contain any zeroes!
 
//
 
// method2 = true  -> take highest degree and finish its pairing completely
 
// method2 = false -> take new highest degree after every pairing
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
 
template <typename RNG>
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, RNG& rng, bool method2) {
 
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 

	
 
    std::vector<decltype(degrees.begin())> available;
 
@@ -185,205 +186,228 @@ bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool meth
 
                uIter->first--;
 
                vIter->first--;
 
                if (vIter->first == 0)
 
                    degrees.erase(vIter);
 
            }
 
            //degrees.erase(std::remove_if(degrees.begin(), degrees.end(),
 
            //                             [](auto x) { return x.first == 0; }));
 
        }
 
    }
 
    return true;
 
}
 

	
 
int main() {
 
int main(int argc, char* argv[]) {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f};
 

	
 
    Graph g;
 
    Graph g1;
 
    Graph g2;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    std::ofstream outfile;
 

	
 
    if (argc >= 2)
 
        outfile.open(argv[1]);
 
    else   
 
        outfile.open("graphdata.m");
 

	
 
    if (!outfile.is_open()) {
 
        std::cout << "ERROR: Could not open output file.\n";
 
        return 1;
 
    }
 

	
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 2000; numVertices += 400) {
 
    for (int numVertices = 500; numVertices <= 500; numVertices += 1000) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution.
 
            // 500 samples seems to give reasonable results
 
            for (int degreeSample = 0; degreeSample < 1; ++degreeSample) {
 
            for (int degreeSample = 0; degreeSample < 5; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    // First make the sum even
 
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
 
                    if (sum % 2) {
 
                        continue;
 
                        // Can we do this: ??
 
                        ds.back()++;
 
                    }
 

	
 
                    if (g.createFromDegreeSequence(ds))
 
                        break;
 

	
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
#if 0
 
                //
 
                // Test the GCM1 and GCM2 success rate
 
                //
 
                std::vector<int> greedyTriangles1;
 
                std::vector<int> greedyTriangles2;
 
                int successrate1 = 0;
 
                int successrate2 = 0;
 
                for (int i = 0; i < 100; ++i) {
 
                    Graph gtemp;
 
                    // Take new highest degree every time
 
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
 
                        ++successrate1;
 
                        greedyTriangles1.push_back(gtemp.countTriangles());
 
                        g1 = gtemp;
 
                    }
 
                    // Finish all pairings of highest degree first
 
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
 
                        ++successrate2;
 
                        greedyTriangles2.push_back(gtemp.countTriangles());
 
                        g2 = gtemp;
 
                    }
 
                }
 
#endif
 

	
 
                for (int i = 1; i < 5; ++i) {
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                SwitchChain chain1, chain2;
 
                bool do1 = true, do2 = true;
 
                if (!chain1.initialize(g1)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    do1 = false;
 
                }
 
                if (!chain2.initialize(g2)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    do2 = false;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                //int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                //constexpr int measurements = 50;
 
                //constexpr int measureSkip =
 
                //    200; // Take a sample every ... steps
 
                int mixingTime = 0;
 
                constexpr int measurements = 50000;
 
                constexpr int measureSkip = 1;
 

	
 

	
 

	
 
                int movesDone = 0;
 
                int movesTotal = 0;
 
                int movesSuccess = 0;
 

	
 
                int triangles[measurements];
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                    ++movesTotal;
 
                    if (chain.doMove()) {
 
                        ++movesSuccess;
 
                    }
 
                }
 

	
 
                std::vector<int> successRates;
 
                successRates.reserve(measurements * measureSkip);
 
                int successrate = 0;
 
                for (int i = 0; i < measurements; ++i) {
 
                    for (int j = 0; j < measureSkip; ++j)
 
                        if (chain.doMove())
 
                            ++movesDone;
 
                    for (int j = 0; j < measureSkip; ++j) {
 
                        ++movesTotal;
 
                        if (chain.doMove()) {
 
                            ++movesSuccess;
 
                            ++successrate;
 
                        }
 
                    }
 
                    triangles[i] = chain.g.countTriangles();
 

	
 
                    if ((i+1) % 100 == 0) {
 
                        successRates.push_back(successrate);
 
                        successrate = 0;
 
                    }
 
                }
 

	
 
                std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                          << " moves succeeded ("
 
                          << 100.0f * float(movesDone) /
 
                                 float(mixingTime + measurements * measureSkip)
 
                          << "%).";
 
                //std::cout << std::endl;
 
                std::cout << '('
 
                          << 100.0f * float(movesSuccess) / float(movesTotal)
 
                          << "% successrate). " << std::flush;
 
                // std::cout << std::endl;
 

	
 
                if (outputComma)
 
                    outfile << ',' << '\n';
 
                outputComma = true;
 

	
 
                std::sort(ds.begin(), ds.end());
 
                outfile << '{' << '{' << numVertices << ',' << tau << '}';
 
                outfile << ',' << triangles;
 
                outfile << ',' << ds;
 
#if 0
 
                outfile << ',' << greedyTriangles1;
 
                outfile << ',' << greedyTriangles2;
 

	
 
                if (do1) {
 
                SwitchChain chain1, chain2;
 
                if (chain1.initialize(g1)) {
 
                    movesDone = 0;
 
                    SwitchChain& c = chain1;
 
                    for (int i = 0; i < mixingTime; ++i) {
 
                        if (c.doMove())
 
                            ++movesDone;
 
                    }
 
                    for (int i = 0; i < measurements; ++i) {
 
                        for (int j = 0; j < measureSkip; ++j)
 
                            if (c.doMove())
 
                                ++movesDone;
 
                        triangles[i] = c.g.countTriangles();
 
                    }
 

	
 
                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                        << " moves succeeded ("
 
                        << 100.0f * float(movesDone) /
 
                        float(mixingTime + measurements * measureSkip)
 
                        << "%).";
 

	
 
                    outfile << ',' << triangles;
 
                }
 
                if (do2) {
 
                if (chain2.initialize(g2)) {
 
                    movesDone = 0;
 
                    SwitchChain& c = chain2;
 
                    for (int i = 0; i < mixingTime; ++i) {
 
                        if (c.doMove())
 
                            ++movesDone;
 
                    }
 
                    for (int i = 0; i < measurements; ++i) {
 
                        for (int j = 0; j < measureSkip; ++j)
 
                            if (c.doMove())
 
                                ++movesDone;
 
                        triangles[i] = c.g.countTriangles();
 
                    }
 

	
 
                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                        << " moves succeeded ("
 
                        << 100.0f * float(movesDone) /
 
                        float(mixingTime + measurements * measureSkip)
 
                        << "%).";
 

	
 
                    outfile << ',' << triangles;
 
                }
 
#endif
 

	
 
                outfile << ',' << successRates;
 

	
 
                outfile << '}' << std::flush;
 

	
 
                std::cout << std::endl;
 
                }
 
            }
 
        }
 
    }
 
    outfile << '}';
 
    return 0;
 
}
cpp/switchchain_initialtris.cpp
Show inline comments
 
@@ -59,25 +59,26 @@ class SwitchChain {
 
    Graph g;
 
    std::mt19937 mt;
 
    std::uniform_int_distribution<> edgeDistribution;
 
    //std::uniform_int_distribution<> permutationDistribution;
 
    std::bernoulli_distribution permutationDistribution;
 
};
 

	
 
//
 
// Assumes degree sequence does NOT contain any zeroes!
 
//
 
// method2 = true  -> take highest degree and finish its pairing completely
 
// method2 = false -> take new highest degree after every pairing
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
 
template <typename RNG>
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, RNG& rng, bool method2) {
 
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 

	
 
    std::vector<decltype(degrees.begin())> available;
plots/avgtris_n.pdf
Show inline comments
 
new file 100644
 
binary diff not shown
plots/triangle_exponent.pdf
Show inline comments
 
new file 100644
 
binary diff not shown
triangle_analysis.m
Show inline comments
 
@@ -62,25 +62,25 @@ Needs["ErrorBarPlots`"]
 
(*For tau > ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*)
 
(*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*)
 
(**)
 
(*Success rate of GCM seems to be correlated with mixing time from Erdos-Gallai: higher success rate implies lower mixing time.  *)
 
(**)
 
(*Initial #triangles in both GCM1 and GCM2 is always below the average #triangles whereas Erdos-Gallai is usually many times higher than average.*)
 

	
 

	
 
(* ::Section:: *)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_partial.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_temp2.m"];
 
(* gsraw=SortBy[gsraw,{#[[1,1]]&,#[[1,2]]&}]; (* Sort by n and then by tau. The {} forces a *stable* sort because otherwise Mathematica sorts also on triangle count and other things. *) *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* Data format: *)
 
(* gdata[[ tau index, n index, run index , datatype index ]] *)
 
(* datatype index:
 
1: {n,tau}
 
2: #triangles time sequence
 
3: degree sequence
 
4: GCM1 starting triangle counts
 
5: GCM2 starting triangle counts
 
@@ -137,37 +137,65 @@ Sum[tmp[[i,j-i]]*tmp[[j,k-j]]*tmp[[i,k-i]],{i,1,n-2},{j,i+1,n-1},{k,j+1,n}]
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
avgAndProp=ParallelMap[{getProperty[#[[3]]],Mean[#[[2,1;;-1]]]}&,gdata[[2,2,1;;100]]];
 

	
 

	
 
Show[ListPlot[avgAndProp,AxesOrigin->{0,0},AxesLabel->{"degree-sequence-property","<#triangles>"},AspectRatio->1],Plot[x,{x,1,1000}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot triangle count over "time" in Markov chain instances*)
 

	
 

	
 
numPlots=20;
 
selectedData=gdata[[4,-1]][[-numPlots;;-1]];
 
numPlots=10;
 
selectedData=gdata[[1,-1]][[-numPlots;;-1]];
 
measureSkip=1;
 
minCount=Min[Map[Min[#[[2]]]&,selectedData]];
 
maxCount=Max[Map[Max[#[[2]]]&,selectedData]];
 
maxTime=Max[Map[Length[#[[2]]]&,selectedData]];
 
skipPts=Max[1,Round[maxTime/200]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;-1;;skipPts]]&,selectedData];
 
maxTime=30000;
 
skipPts=Max[1,Round[maxTime/400]]; (* Plotting every point is slow. Plot only once per `skipPts` timesteps *)
 
coarseData=Map[#[[2,1;;maxTime;;skipPts]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels]
 
ListPlot[coarseData,Joined->True,PlotRange->{0*minCount,maxCount},DataRange->{0,measureSkip*maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot success rate over "time"*)
 

	
 

	
 
numPlots=10;
 
selectedData=gdata[[1,-1]][[-numPlots;;-1]];
 
measureSkip=100;
 
maxTime=Max[Map[Length[#[[4]]]&,selectedData]];
 
maxTime=10000;
 
coarseData=Map[#[[4,1;;maxTime/measureSkip]]&,selectedData];
 
labels=Map["{n,tau} = "<>ToString[#[[1]]]&,selectedData];
 
ListPlot[coarseData,Joined->True,PlotRange->{0,100},DataRange->{0,maxTime},PlotLegends->labels]
 
(* Map[ListPlot[#,Joined->True,PlotRange\[Rule]{minCount,maxCount},DataRange\[Rule]{0,maxTime}]&,coarseData] *)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Correlation of avgsuccess rate vs other things*)
 

	
 

	
 
compare1=Map[{Mean[#[[4]]],Mean[#[[2]]]}&,gdata,{3}];
 
(* { GCM1 rate, GCM2 rate, mixing time from ErdosGallai } *)
 

	
 

	
 
scatterPlots=Map[ListPlot[#,PlotRange->{{0,100},All},PlotStyle->PointSize[Large]]&,compare1,{2}];
 
TableForm[scatterPlots,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Compute 'mixing time'*)
 

	
 

	
 
(* Compute average of last part and check when the value drops below that for the first time *)
 
getMixingTime[values_]:=Module[{avg},
 
    (* average over the last 20 percent *)
 
    avg=Mean[values[[-Round[Length[values]/5];;-1]]];
 
    FirstPosition[values,_?(#<=avg&)][[1]]
 
]
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
measureSkip=1;
triangle_exponent_plots.m
Show inline comments
 
@@ -7,25 +7,25 @@ Needs["ErrorBarPlots`"]
 
(*Triangle exponent*)
 

	
 

	
 
(* ::Text:: *)
 
(*Expected number of triangles is the following powerlaw*)
 

	
 

	
 
(* ::DisplayFormula:: *)
 
(*#triangles = const\[Cross]n^(T(\[Tau]))      where    T(\[Tau])=3/2 (3-\[Tau])*)
 

	
 

	
 
(* When importing from exponent-only-data file *)
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_exponent.m"];
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata_exponent_mix32.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 
averagesGrouped=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 

	
 

	
 
(* averagesGrouped[[ tau index, n index, run index , {ntau, avgtri} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,averagesGrouped[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,averagesGrouped[[All,1,1,1,2]]];
 
averagesErrorBars=Map[
 
{{#[[1,1,1]],Mean[#[[All,2]]]},
 
ErrorBar[StandardDeviation[#[[All,2]]]]
 
}&,averagesGrouped,{2}];
 

	
 
@@ -42,32 +42,38 @@ ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automati
 

	
 
loglogdata=Log[averagesErrorBars[[All,All,1]]];
 
fits=Map[Fit[#,{1,logn},logn]&,loglogdata];
 
fitsExtra=Map[LinearModelFit[#,logn,logn]&,loglogdata];
 

	
 

	
 
fitsExtra[[1]]["ParameterConfidenceIntervalTable"]
 
fitsExtra[[1]]["BestFitParameters"]
 
fitsExtra[[1]]["ParameterErrors"]
 
fitsExtra[[1]]["ParameterConfidenceIntervals"]
 

	
 

	
 
Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->True,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels],Plot[fits,{logn,1,2000}]]
 
plot1=Show[ListLogLogPlot[averagesErrorBars[[All,All,1]],Joined->False,PlotMarkers->Automatic,AxesLabel->{"n","\[LeftAngleBracket]triangles\[RightAngleBracket]"},PlotLegends->taulabels],Plot[fits,{logn,1,2000}]]
 

	
 

	
 
Export[NotebookDirectory[]<>"plots/avgtris_n.pdf",plot1]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot of T(\[Tau])*)
 

	
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponents=Transpose[{tauValues,fits[[All,2,1]]}];
 
Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","T(\[Tau])"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 
Show[ListPlot[exponents,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","exponent"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 

	
 

	
 
(* ::Subsection:: *)
 
(*T(\[Tau]) including error bars*)
 

	
 

	
 
tauValues=averagesGrouped[[All,1,1,1,2]];
 
exponentsErrorBars=Map[{{#[[1]],#[[2]]["BestFitParameters"][[2]]},ErrorBar[#[[2]]["ParameterConfidenceIntervals"][[2]]-#[[2]]["BestFitParameters"][[2]]]}&,
 
Transpose[{tauValues,fitsExtra}]];
 
Show[ErrorListPlot[exponentsErrorBars,Joined->True,PlotMarkers->Automatic,AxesLabel->{"tau","T(\[Tau])"},PlotRange->{{2,3},{0,1.6}}],Plot[3/2(3-tau),{tau,2,3}]]
 
plot2=Show[ErrorListPlot[exponentsErrorBars,Joined->True,PlotMarkers->Automatic,Frame->True,FrameLabel->{"tau","triangle exponent"},PlotRange->{{2,3},{0,1.6}},ImageSize->300],Plot[3/2(3-tau),{tau,2,3},PlotStyle->{Dashed}]]
 

	
 

	
 
Export[NotebookDirectory[]<>"plots/triangle_exponent.pdf",plot2]
0 comments (0 inline, 0 general)