Making binnings from an estimated pdf

#! /usr/bin/env python

from __future__ import division, print_function
import numpy as np

def pdfspace(nbins, xmin, xmax, fn, nsample=10000):
    """Make Nbins+1 bin edges between xmin..xmax according to density function fn,
    manually integrated by the Trapezium Rule with Nsample points

    The density function @a fn will be evaluated at @a nsample uniformly
    distributed points between @a xmin and @a xmax, its integral approximated via
    the Trapezium Rule and used to normalize the distribution, and @a nbins + 1
    edges then selected to (approximately) divide into bins each containing
    fraction 1/@a nbins of the integral.

    @note The function @a fn does not need to be a normalized pdf, but it should
    be non-negative.  Any negative values returned by @a fn(x) will be truncated
    to zero and contribute nothing to the estimated integral and binning
    density.

    @note The arg ordering and the meaning of the nbins variable is
    "histogram-like", as opposed to the Numpy/Matlab version, and the xmin and
    xmax arguments are expressed in "normal" space, rather than in the
    function-mapped space as with Numpy/Matlab.

    @note The naming of this function differs from the other Matlab-inspired
    ones: the bin spacing is uniform in the CDF of the density function given,
    rather than in the function itself. For most use-cases this is more
    intuitive.
    """
    dx = (xmax-xmin)/nsample
    xs = np.linspace(xmin, xmax, nsample+1)
    ys = np.array([max(fn(x), 0) for x in xs])
    #areas = np.array([ys[i]*dx + (ys[i+1]-ys[i])*dx/2 for i in range(nsample)])
    areas = (ys[:-1] + ys[1:])*dx/2
    fracs = areas / areas.sum()
    df = 1/nbins
    xedges = [xmin]
    fsum = 0
    for i in range(nsample-1):
        fsum += fracs[i]
        if fsum > df:
            fsum = 0
            xedges.append(xs[i+1])
    xedges.append(xmax)
    # print(len(xedges), nbins+1)
    assert(len(xedges) == nbins+1)
    return xedges


NBINS = 50
from math import *
import scipy.stats as st
xss = []
xss.append( ("Uniform:", pdfspace(NBINS, 1, 11, lambda x : 1)) )
xss.append( ("Linear:",  pdfspace(NBINS, 1, 11, lambda x : 11-x)) )
xss.append( ("Recip:",   pdfspace(NBINS, 1, 11, lambda x : 1/x)) )
xss.append( ("Quad:",    pdfspace(NBINS, 1, 11, lambda x : x**2)) )
xss.append( ("Norm:",    pdfspace(NBINS, 1, 11, lambda x : 1/sqrt(2*pi*3**2) * exp(-(x-5)**2/2/3**2))) )
xss.append( ("Log:",     pdfspace(NBINS, 1, 11, lambda x : log(x))) )
xss.append( ("Exp:",     pdfspace(NBINS, 1, 11, lambda x : exp(x))) )
xss.append( ("NExp:",    pdfspace(NBINS, 1, 11, lambda x : exp(-x))) )

import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
for n, (name, xs) in enumerate(xss):
    #print(name, len(xs), xs)
    plt.text(-0.3, -n, name)
    plt.plot(xs, [-n]*len(xs), "o", markersize=1.5)
    plt.xlim(-1,12)
plt.gca().get_yaxis().set_visible(False)
plt.tight_layout()
plt.savefig("binnings.pdf")
plt.savefig("binnings.png")