Files @ 32a7f1c13790
Branch filter:

Location: AENC/switchchain/cpp/powerlaw.hpp - annotation

Tom Bannink
Add cannonical powerlaw ds
#pragma once
#include <cassert>
#include <cmath>
#include <iostream>
#include <random>

// Discrete powerlaw distribution
// obtained by rounding a continuous powerlaw distribution
// Let m = minimum and M = maximum and a = alpha
// Then for continuous version we have
// PDF: f(x) = C x^{-a}
// with C = (1-a)/(M^{1-a} - m^{1-a})
// This gives CDF: F(x) = (x^{1-a} - m^{1-a}) / (M^{1-a} - m^{1-a})
// with inversion F^{-1}(y) = ( y M^{1-a} + (1-y) m^{1-a} )^{1/(1-a)}
// i.e. linear interpolate between M^{1-a} < m^{1-a}
// For M = infty this becomes
//  F'^{-1}(y) = m (1-y)^{1/(1-a)}
//             = m (y')^{-1/(a-1)}
// where higher y' means lower x

class powerlaw_distribution {
  public:
    // xmin and xmax are inclusive
    powerlaw_distribution(float a, unsigned int _xmin, unsigned int _xmax)
        : alpha(a), xmin(_xmin), xmax(_xmax) {
        assert(xmin > 0);
        assert(xmin <= xmax);
        assert(alpha > 1.0f);
        _exponent = -1.0f / (alpha - 1.0f);
    }
    ~powerlaw_distribution() {}

    template <class Generator>
    unsigned int operator()(Generator& rng) const {
        unsigned int x = xmax + 1;
        while (x > xmax) {
            // Generate uniform value in [0,1)
            double r = std::generate_canonical<
                double, std::numeric_limits<double>::digits>(rng);
            x = std::round(std::pow(r, _exponent) * xmin);
        }
        return x;
    }

    // Generate non-random ``canonical'' powerlaw distribution
    // Same as above operator but where the random number between
    // 0 and 1 is replaced by stepping from 0 to 1 in fixed steps
    template <class FwdIterator>
    void getFixedDistribution(int n, FwdIterator output) const {
        // go in linear steps over interval
        // [ M^{1-a} , m^{1-a} ]
        double minVal = std::pow(double(xmax), 1.0f - alpha);
        double maxVal = std::pow(double(xmin), 1.0f - alpha);
        unsigned int x;
        // Now it outputs in increasing order
        for (int i = 0; i < n; ++i) {
            double r1 = double(i) / double(n);
            double r2 = r1 * minVal + (1.0 - r1) * maxVal;
            x = std::round(std::pow(r2, _exponent));
            if (x > xmax || x < xmin) {
                std::cerr << "ERROR: x not in [xmin,xmax] : " << x
                          << " not in [" << xmin << ',' << xmax
                          << "]; i = " << i << std::endl;
            }
            *output++ = x;
        }
    }

  private:
    float alpha;
    double _exponent;
    unsigned int xmin, xmax;
};