Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- from __future__ import division, print_function
- import numpy as np
- def pdfspace(nbins, xmin, xmax, fn, nsample=10000):
- """Make Nbins+1 bin edges between xmin..xmax according to density function fn,
- manually integrated by the Trapezium Rule with Nsample points
- The density function @a fn will be evaluated at @a nsample uniformly
- distributed points between @a xmin and @a xmax, its integral approximated via
- the Trapezium Rule and used to normalize the distribution, and @a nbins + 1
- edges then selected to (approximately) divide into bins each containing
- fraction 1/@a nbins of the integral.
- @note The function @a fn does not need to be a normalized pdf, but it should
- be non-negative. Any negative values returned by @a fn(x) will be truncated
- to zero and contribute nothing to the estimated integral and binning
- density.
- @note The arg ordering and the meaning of the nbins variable is
- "histogram-like", as opposed to the Numpy/Matlab version, and the xmin and
- xmax arguments are expressed in "normal" space, rather than in the
- function-mapped space as with Numpy/Matlab.
- @note The naming of this function differs from the other Matlab-inspired
- ones: the bin spacing is uniform in the CDF of the density function given,
- rather than in the function itself. For most use-cases this is more
- intuitive.
- """
- dx = (xmax-xmin)/nsample
- xs = np.linspace(xmin, xmax, nsample+1)
- ys = np.array([max(fn(x), 0) for x in xs])
- #areas = np.array([ys[i]*dx + (ys[i+1]-ys[i])*dx/2 for i in range(nsample)])
- areas = (ys[:-1] + ys[1:])*dx/2
- fracs = areas / areas.sum()
- df = 1/nbins
- xedges = [xmin]
- fsum = 0
- for i in range(nsample-1):
- fsum += fracs[i]
- if fsum > df:
- fsum = 0
- xedges.append(xs[i+1])
- xedges.append(xmax)
- # print(len(xedges), nbins+1)
- assert(len(xedges) == nbins+1)
- return xedges
- NBINS = 50
- from math import *
- import scipy.stats as st
- xss = []
- xss.append( ("Uniform:", pdfspace(NBINS, 1, 11, lambda x : 1)) )
- xss.append( ("Linear:", pdfspace(NBINS, 1, 11, lambda x : 11-x)) )
- xss.append( ("Recip:", pdfspace(NBINS, 1, 11, lambda x : 1/x)) )
- xss.append( ("Quad:", pdfspace(NBINS, 1, 11, lambda x : x**2)) )
- xss.append( ("Norm:", pdfspace(NBINS, 1, 11, lambda x : 1/sqrt(2*pi*3**2) * exp(-(x-5)**2/2/3**2))) )
- xss.append( ("Log:", pdfspace(NBINS, 1, 11, lambda x : log(x))) )
- xss.append( ("Exp:", pdfspace(NBINS, 1, 11, lambda x : exp(x))) )
- xss.append( ("NExp:", pdfspace(NBINS, 1, 11, lambda x : exp(-x))) )
- import matplotlib.pyplot as plt
- plt.figure(figsize=(8,6))
- for n, (name, xs) in enumerate(xss):
- #print(name, len(xs), xs)
- plt.text(-0.3, -n, name)
- plt.plot(xs, [-n]*len(xs), "o", markersize=1.5)
- plt.xlim(-1,12)
- plt.gca().get_yaxis().set_visible(False)
- plt.tight_layout()
- plt.savefig("binnings.pdf")
- plt.savefig("binnings.png")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement