Changeset - 5b1472bd5ca9
[Not reviewed]
Merge
0 2 0
Tom Bannink - 8 years ago 2017-05-08 12:11:51
tombannink@gmail.com
Merge branch 'master' of https://scm.cwi.nl/AC/switchchain
2 files changed with 139 insertions and 29 deletions:
0 comments (0 inline, 0 general)
cpp/switchchain.cpp
Show inline comments
 
@@ -69,205 +69,294 @@ class SwitchChain {
 
bool greedyConfigurationModel(DegreeSequence& ds, Graph& g, auto& rng, bool method2) {
 
    // Similar to Havel-Hakimi but instead of pairing up with the highest ones
 
    // that remain, simply pair up with random ones
 
    unsigned int n = ds.size();
 

	
 
    // degree, vertex index
 
    std::vector<std::pair<unsigned int, unsigned int>> degrees(n);
 
    for (unsigned int i = 0; i < n; ++i) {
 
        degrees[i].first = ds[i];
 
        degrees[i].second = i;
 
    }
 

	
 
    std::vector<decltype(degrees.begin())> available;
 
    available.reserve(n);
 

	
 
    // Clear the graph
 
    g.reset(n);
 

	
 
    while (!degrees.empty()) {
 
        std::shuffle(degrees.begin(), degrees.end(), rng);
 
        // Get the highest degree:
 
        // If there are multiple highest ones, we pick a random one,
 
        // ensured by the shuffle.
 
        // The shuffle is needed anyway for the remaining part
 
        unsigned int dmax = 0;
 
        auto uIter = degrees.begin();
 
        for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
            if (iter->first >= dmax) {
 
                dmax = iter->first;
 
                uIter = iter;
 
            }
 
        }
 

	
 
        if (dmax > degrees.size() - 1)
 
            return false;
 

	
 
        if (dmax == 0) {
 
            std::cerr << "ERROR 1 in GCM.\n";
 
        }
 

	
 
        unsigned int u = uIter->second;
 

	
 
        if (method2) {
 
            // Take the highest degree out of the vector
 
            degrees.erase(uIter);
 

	
 
            // Now assign randomly to the remaining vertices
 
            // Since its shuffled, we can pick the first 'dmax' ones
 
            auto vIter = degrees.begin();
 
            while (dmax--) {
 
                if (vIter->first == 0)
 
                    std::cerr << "ERROR in GCM2.\n";
 
                if (!g.addEdge({u, vIter->second}))
 
                    std::cerr << "ERROR. Could not add edge in GCM2.\n";
 
                vIter->first--;
 
                if (vIter->first == 0)
 
                    vIter = degrees.erase(vIter);
 
                else
 
                    vIter++;
 
            }
 
        } else {
 
            // Pair with a random vertex that is not u itself
 
            std::uniform_int_distribution<> distr(0, degrees.size() - 2);
 
            auto vIter = degrees.begin() + distr(rng);
 
            if (vIter == uIter)
 
                vIter++;
 
            // Pair with a random vertex that is not u itself and to which
 
            // u has not paired yet!
 
            available.clear();
 
            for (auto iter = degrees.begin(); iter != degrees.end(); ++iter) {
 
                if (iter->second != u && !g.hasEdge({u, iter->second}))
 
                    available.push_back(iter);
 
            }
 
            if (available.empty())
 
                return false;
 
            std::uniform_int_distribution<> distr(0, available.size() - 1);
 
            auto vIter = available[distr(rng)];
 
            // pair u to v
 
            if (vIter->first == 0)
 
                std::cerr << "ERROR in GCM1.\n";
 
            if (!g.addEdge({uIter->second, vIter->second}))
 
                std::cerr << "ERROR 2 in GCM1.\n";
 
            if (!g.addEdge({u, vIter->second}))
 
                std::cerr << "ERROR. Could not add edge in GCM1.\n";
 
            // Purge anything with degree zero
 
            // Be careful with invalidating the other iterator!
 
            // Degree of u is always greater or equal to the degree of v
 
            if (dmax == 1) {
 
                // Remove both
 
                // Erasure invalidates all iterators AFTER the erased one
 
                if (vIter > uIter) {
 
                    degrees.erase(vIter);
 
                    degrees.erase(uIter);
 
                } else {
 
                    degrees.erase(uIter);
 
                    degrees.erase(vIter);
 
                }
 
            } else {
 
                // Remove only v if it reaches zero
 
                uIter->first--;
 
                vIter->first--;
 
                if (vIter->first == 0)
 
                    degrees.erase(vIter);
 
            }
 
            //degrees.erase(std::remove_if(degrees.begin(), degrees.end(),
 
            //                             [](auto x) { return x.first == 0; }));
 
        }
 
    }
 
    return true;
 
}
 

	
 
int main() {
 
    // Generate a random degree sequence
 
    std::mt19937 rng(std::random_device{}());
 

	
 
    // Goal:
 
    // Degrees follow a power-law distribution with some parameter tau
 
    // Expect:  #tri = const * n^{ something }
 
    // The goal is to find the 'something' by finding the number of triangles
 
    // for different values of n and tau
 
    float tauValues[] = {2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f};
 

	
 
    Graph g;
 
    Graph g1;
 
    Graph g2;
 

	
 
    std::ofstream outfile("graphdata.m");
 
    outfile << '{';
 
    bool outputComma = false;
 

	
 
    for (int numVertices = 200; numVertices <= 1000; numVertices += 100) {
 
        for (float tau : tauValues) {
 

	
 
            DegreeSequence ds(numVertices);
 
            powerlaw_distribution degDist(tau, 1, numVertices);
 
            //std::poisson_distribution<> degDist(12);
 

	
 
            // For a single n,tau take samples over several instances of
 
            // the degree distribution
 
            for (int degreeSample = 0; degreeSample < 500; ++degreeSample) {
 
            // the degree distribution.
 
            // 500 samples seems to give reasonable results
 
            for (int degreeSample = 0; degreeSample < 200; ++degreeSample) {
 
                // Generate a graph
 
                // might require multiple tries
 
                for (int i = 1; ; ++i) {
 
                    std::generate(ds.begin(), ds.end(),
 
                                  [&degDist, &rng] { return degDist(rng); });
 
                    // First make the sum even
 
                    unsigned int sum = std::accumulate(ds.begin(), ds.end(), 0);
 
                    if (sum % 2) {
 
                        continue;
 
                        // Can we do this: ??
 
                        ds.back()++;
 
                    }
 

	
 
                    if (g.createFromDegreeSequence(ds))
 
                        break;
 

	
 
                    // When 10 tries have not worked, output a warning
 
                    if (i % 10 == 0) {
 
                        std::cerr << "Warning: could not create graph from "
 
                                     "degree sequence. Trying again...\n";
 
                    }
 
                }
 

	
 
                //
 
                // Test the greedy configuration model success rate
 
                // Test the GCM1 and GCM2 success rate
 
                //
 
                std::vector<int> greedyTriangles;
 
                int successrate = 0;
 
                std::vector<int> greedyTriangles1;
 
                std::vector<int> greedyTriangles2;
 
                int successrate1 = 0;
 
                int successrate2 = 0;
 
                for (int i = 0; i < 100; ++i) {
 
                    if (greedyConfigurationModel(ds, g2, rng, true)) {
 
                        ++successrate;
 
                        greedyTriangles.push_back(g2.countTriangles());
 
                    Graph gtemp;
 
                    if (greedyConfigurationModel(ds, gtemp, rng, false)) {
 
                        ++successrate1;
 
                        greedyTriangles1.push_back(gtemp.countTriangles());
 
                        g1 = gtemp;
 
                    }
 
                    if (greedyConfigurationModel(ds, gtemp, rng, true)) {
 
                        ++successrate2;
 
                        greedyTriangles2.push_back(gtemp.countTriangles());
 
                        g2 = gtemp;
 
                    }
 
                }
 

	
 
                SwitchChain chain;
 
                if (!chain.initialize(g)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    return 1;
 
                }
 

	
 
                SwitchChain chain1, chain2;
 
                bool do1 = true, do2 = true;
 
                if (!chain1.initialize(g1)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    do1 = false;
 
                }
 
                if (!chain2.initialize(g2)) {
 
                    std::cerr << "Could not initialize Markov chain.\n";
 
                    do2 = false;
 
                }
 

	
 
                std::cout << "Running n = " << numVertices << ", tau = " << tau
 
                          << ". \t" << std::flush;
 

	
 
                //int mixingTime = (32.0f - 26.0f*(tau - 2.0f)) * numVertices; //40000;
 
                //constexpr int measurements = 50;
 
                //constexpr int measureSkip =
 
                //    200; // Take a sample every ... steps
 
                int mixingTime = 0;
 
                constexpr int measurements = 10000;
 
                constexpr int measurements = 5000;
 
                constexpr int measureSkip = 1;
 

	
 

	
 
                int movesDone = 0;
 

	
 
                int triangles[measurements];
 

	
 
                for (int i = 0; i < mixingTime; ++i) {
 
                    if (chain.doMove())
 
                        ++movesDone;
 
                }
 
                for (int i = 0; i < measurements; ++i) {
 
                    for (int j = 0; j < measureSkip; ++j)
 
                        if (chain.doMove())
 
                            ++movesDone;
 
                    triangles[i] = chain.g.countTriangles();
 
                }
 

	
 
                std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                          << " moves succeeded ("
 
                          << 100.0f * float(movesDone) /
 
                                 float(mixingTime + measurements * measureSkip)
 
                          << "%)." << std::endl;
 
                          << "%).";
 
                //std::cout << std::endl;
 

	
 
                if (outputComma)
 
                    outfile << ',' << '\n';
 
                outputComma = true;
 

	
 
                std::sort(ds.begin(), ds.end());
 
                outfile << '{' << '{' << numVertices << ',' << tau << '}';
 
                outfile << ',' << triangles << ',' << ds;
 
                outfile << ',' << greedyTriangles << '}' << std::flush;
 
                outfile << ',' << triangles;
 
                outfile << ',' << ds;
 
                outfile << ',' << greedyTriangles1;
 
                outfile << ',' << greedyTriangles2;
 

	
 
                if (do1) {
 
                    movesDone = 0;
 
                    SwitchChain& c = chain1;
 
                    for (int i = 0; i < mixingTime; ++i) {
 
                        if (c.doMove())
 
                            ++movesDone;
 
                    }
 
                    for (int i = 0; i < measurements; ++i) {
 
                        for (int j = 0; j < measureSkip; ++j)
 
                            if (c.doMove())
 
                                ++movesDone;
 
                        triangles[i] = c.g.countTriangles();
 
                    }
 

	
 
                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                        << " moves succeeded ("
 
                        << 100.0f * float(movesDone) /
 
                        float(mixingTime + measurements * measureSkip)
 
                        << "%).";
 

	
 
                    outfile << ',' << triangles;
 
                }
 
                if (do2) {
 
                    movesDone = 0;
 
                    SwitchChain& c = chain2;
 
                    for (int i = 0; i < mixingTime; ++i) {
 
                        if (c.doMove())
 
                            ++movesDone;
 
                    }
 
                    for (int i = 0; i < measurements; ++i) {
 
                        for (int j = 0; j < measureSkip; ++j)
 
                            if (c.doMove())
 
                                ++movesDone;
 
                        triangles[i] = c.g.countTriangles();
 
                    }
 

	
 
                    std::cout << movesDone << '/' << mixingTime + measurements * measureSkip
 
                        << " moves succeeded ("
 
                        << 100.0f * float(movesDone) /
 
                        float(mixingTime + measurements * measureSkip)
 
                        << "%).";
 

	
 
                    outfile << ',' << triangles;
 
                }
 

	
 
                outfile << '}' << std::flush;
 

	
 
                std::cout << std::endl;
 
            }
 
        }
 
    }
 
    outfile << '}';
 
    return 0;
 
}
showgraphs.m
Show inline comments
 
@@ -13,95 +13,108 @@ Needs["ErrorBarPlots`"]
 
(*- Why does GCM-2 start with very low #triangles*)
 
(*  Do not only consider number of standard deviations but also relative number of triangles.*)
 
(*  Look at the following: for all triangles (v1, v2, v3) consider the degrees d1<d2<d3) and make a scatter plot of di vs dj. Make such a scatter plot for the initial GCM-2 graph and for a mixed graph and see how it changes.*)
 
(**)
 
(*- GCM success rates: for the degree sequences where it "always fails", look at the degree sequence. Does it have a low/high number of degree 1 nodes? Is the maximum degree very low/high?*)
 
(**)
 
(*- Use different starting point for switch chain that is closer to uniform:*)
 
(*   Do configuration model, starting with the vertex with highest degree and keeping track of a "forbidden list" meaning dont pair something that is not allowed*)
 
(*   (a) How close is this to uniform ? At least w.r.t. the measure of #triangles*)
 
(*   (b) How often does this procedure work/fail. Might still be faster to do switchings from Erdos-Gallai.*)
 
(*   (c) Compare two greedy ways: (c1) first take highest and finish all its pairings  (c2) take new highest after every single pairing*)
 
(*   (d) Time evolution for GCM on top of Erdos-Gallai time evolution.*)
 
(* The initial #triangles in GCM2 is somewhere between 0 and 5 standard deviations below the average #triangles, whereas the #triangles in Erdos-Gallai can be as high as 100 standard deviations above the average.*)
 
(* TODO: Not only compare number of standard deviations but also percentage above/below average.*)
 
(**)
 
(*- Count #moves that result in +-k triangles (one move could create many triangles at once!)*)
 
(**)
 
(*- For a graph snapshot: for all V shapes, compute the number of ways to make it into a triangle:*)
 
(*  Let u1,u2 be the endpoints of the V. For all neighbors v1 of u1 and v2 of u2, see of v1,v2 has an edge. Meaning, if we were to select randomly an u1 edge and an u2 edge, then whats the probability that it can be used to switch the V into a triangle.*)
 
(**)
 
(*- Improve runtime*)
 
(*   (a) Better direct triangle counting? (I doubt it)*)
 
(*   (b) Better triangle counting by only keeping track of CHANGES in #triangles*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Done*)
 

	
 

	
 
(* ::Text:: *)
 
(*- Do a single very long run: nothing weird seems to happen with the triangle counts. Tried 10 million steps.*)
 
(**)
 
(*- Compute  Sum over i<j<k  of  (1-Exp[- d_i d_j / (2E)]) * (1 - Exp[-d_j d_k / (2E)]) * (1 - Exp[-d_k d_i / (2E)]) .*)
 
(*  Computing the f(i,j) = (1-Exp[..]) terms is fine, but then computing Sum[ f(i,j) f(j,k) f(i,k) ) ] over n^3 entries is very slow.*)
 
(*  *)
 
(*  - Improve runtime*)
 
(*- Improve runtime*)
 
(*   (a) Don't remove/add edges from the std::vector. Simply replace them. Done, is way faster for large n.*)
 
(*   (b) Do not choose the three permutations with 1/3 probability: choose the "staying" one with zero probability. Should still be a valid switch chain?*)
 
(*   *)
 
(*   - Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*     Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(*  *)
 
(*- Experimental mixing time as function of n. At (n,tau)=(1000,2.5) it seems to be between 10.000 and 20.000 steps.*)
 
(*   Done. Seems to be something like  (1/2)(32-26(tau-2))n  so we run it for that time without the factor (1/2).*)
 
(**)
 
(*- GCM1: Greedy Configuration Model 1: take highest degree and completely do all its pairings (at random)*)
 
(*- GCM2: Greedy Configuration Model 2: take highest degree and do a single pairing, then take new highest degree. So this matters mostly if there are multiple high degree nodes*)
 
(*The success rates, conditioned on the degree sequence being graphical, is almost always higher using GCM2. For certain degree sequences the success rate of GCM2 can be 0.9 higher than that of GCM1. (i.e. amost always works vs almost always fails).*)
 
(*For tau > ~2.3 the success rate of GCM2 seems to be higher than 80% for most sequences.*)
 
(*For tau < ~2.3 the success rate of GCM2 can drop to less than 10% for some sequences but for many sequences it is still larger than 80%.*)
 
(**)
 
(**)
 
(*  *)
 

	
 

	
 
(* ::Section:: *)
 
(*Visualize graphs*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"graphdata.m"];
 

	
 

	
 
ListPlot[gsraw[[2]],Joined->True,PlotRange->All,AxesLabel->{"Step","Triangles"}]
 

	
 

	
 
gs=Map[Graph[#,GraphLayout->"CircularEmbedding"]&,gsraw[[1]]];
 
gs2=Map[Graph[#,GraphLayout->Automatic]&,gsraw[[1]]];
 

	
 

	
 
Grid[Partition[gs,10],Frame->All]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Data import and data merge*)
 
(*Data import*)
 

	
 

	
 
gsraw=Import[NotebookDirectory[]<>"data/graphdata.m"];
 
gsraw=SortBy[gsraw,#[[1,1]]&]; (* Sort by n *)
 

	
 

	
 
gdata=GatherBy[gsraw,{#[[1,2]]&,#[[1,1]]&}];
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds} ]] *)
 
nlabels=Map["n = "<>ToString[#]&,gdata[[1,All,1,1,1]]];
 
taulabels=Map["tau = "<>ToString[#]&,gdata[[All,1,1,1,2]]];
 

	
 

	
 
(* ::Subsection:: *)
 
(*Merge data*)
 

	
 

	
 
newData=Import[NotebookDirectory[]<>"data/graphdata_3.m"];
 
mergedData=Import[NotebookDirectory[]<>"data/graphdata_merged.m"];
 
Export[NotebookDirectory[]<>"data/graphdata_merged_new.m",Join[mergedData,newData]]
 

	
 

	
 
(* ::Section:: *)
 
(*Plot triangle counts*)
 

	
 

	
 
(* ::Subsection:: *)
 
(*Plot empirical distribution of maximum degree*)
 

	
 

	
 
maxDegrees=Map[{#[[1]],Max[#[[3]]]}&,gsraw];
 
maxDegrees=GatherBy[maxDegrees,{#[[1,2]]&,#[[1,1]]&}];
 
(* maxDegrees[[ tau index, n index, run index,  ntau or dmax ]] *)
 

	
 

	
 
Histogram[maxDegrees[[1,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[2,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
Histogram[maxDegrees[[3,-1,All,2]],PlotRange->{{0,2000},{0,100}},AxesLabel->{"d_max","frequency"}]
 
@@ -249,17 +262,25 @@ stats=Map[getStats,gdata,{3}];
 

	
 
histograms=Map[Histogram[{#[[1,4]]},PlotRange->{{0,2},Automatic},PlotLabel->"ErdosGallai="<>ToString[NumberForm[#[[1,3]],3]]<>"\[Cross]average. stddev="<>ToString[NumberForm[#[[1,2]],3]]<>"\[Cross]average"]&,stats,{2}];
 

	
 

	
 
TableForm[histograms,TableHeadings->{taulabels,nlabels}]
 

	
 

	
 
(* ::Subsection:: *)
 
(*Greedy CM success rates*)
 

	
 

	
 
(* gdata[[ tau index, n index, run index , {ntau, #tris, ds, greedyTriangles} ]] *)
 
successrates=Map[Length[#[[4]]]&,gdata,{3}];
 
successrates=Map[{Length[#[[4]]],Length[#[[5]]]}&,gdata,{3}];
 
successrates=Map[Transpose,successrates,{2}];
 
successratesDelta=Map[Length[#[[5]]]-Length[#[[4]]]&,gdata,{3}];
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{0,100},Automatic}]&,successrates,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 

	
 
rateHistograms=Map[Histogram[#,{10},PlotRange->{{-100,100},Automatic}]&,successratesDelta,{2}];
 
TableForm[rateHistograms,TableHeadings->{taulabels,nlabels}]
 
(*TableForm[Transpose[rateHistograms],TableHeadings->{nlabels,taulabels}]*)
 

	
 

	
 

	
0 comments (0 inline, 0 general)