Advertisement
andybuckley

Making binnings from an estimated pdf

Feb 2nd, 2020
297
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.97 KB | None | 0 0
  1. #! /usr/bin/env python
  2.  
  3. from __future__ import division, print_function
  4. import numpy as np
  5.  
  6. def pdfspace(nbins, xmin, xmax, fn, nsample=10000):
  7.     """Make Nbins+1 bin edges between xmin..xmax according to density function fn,
  8.    manually integrated by the Trapezium Rule with Nsample points
  9.  
  10.    The density function @a fn will be evaluated at @a nsample uniformly
  11.    distributed points between @a xmin and @a xmax, its integral approximated via
  12.    the Trapezium Rule and used to normalize the distribution, and @a nbins + 1
  13.    edges then selected to (approximately) divide into bins each containing
  14.    fraction 1/@a nbins of the integral.
  15.  
  16.    @note The function @a fn does not need to be a normalized pdf, but it should
  17.    be non-negative.  Any negative values returned by @a fn(x) will be truncated
  18.    to zero and contribute nothing to the estimated integral and binning
  19.    density.
  20.  
  21.    @note The arg ordering and the meaning of the nbins variable is
  22.    "histogram-like", as opposed to the Numpy/Matlab version, and the xmin and
  23.    xmax arguments are expressed in "normal" space, rather than in the
  24.    function-mapped space as with Numpy/Matlab.
  25.  
  26.    @note The naming of this function differs from the other Matlab-inspired
  27.    ones: the bin spacing is uniform in the CDF of the density function given,
  28.    rather than in the function itself. For most use-cases this is more
  29.    intuitive.
  30.    """
  31.     dx = (xmax-xmin)/nsample
  32.     xs = np.linspace(xmin, xmax, nsample+1)
  33.     ys = np.array([max(fn(x), 0) for x in xs])
  34.     #areas = np.array([ys[i]*dx + (ys[i+1]-ys[i])*dx/2 for i in range(nsample)])
  35.     areas = (ys[:-1] + ys[1:])*dx/2
  36.     fracs = areas / areas.sum()
  37.     df = 1/nbins
  38.     xedges = [xmin]
  39.     fsum = 0
  40.     for i in range(nsample-1):
  41.         fsum += fracs[i]
  42.         if fsum > df:
  43.             fsum = 0
  44.             xedges.append(xs[i+1])
  45.     xedges.append(xmax)
  46.     # print(len(xedges), nbins+1)
  47.     assert(len(xedges) == nbins+1)
  48.     return xedges
  49.  
  50.  
  51. NBINS = 50
  52. from math import *
  53. import scipy.stats as st
  54. xss = []
  55. xss.append( ("Uniform:", pdfspace(NBINS, 1, 11, lambda x : 1)) )
  56. xss.append( ("Linear:",  pdfspace(NBINS, 1, 11, lambda x : 11-x)) )
  57. xss.append( ("Recip:",   pdfspace(NBINS, 1, 11, lambda x : 1/x)) )
  58. xss.append( ("Quad:",    pdfspace(NBINS, 1, 11, lambda x : x**2)) )
  59. xss.append( ("Norm:",    pdfspace(NBINS, 1, 11, lambda x : 1/sqrt(2*pi*3**2) * exp(-(x-5)**2/2/3**2))) )
  60. xss.append( ("Log:",     pdfspace(NBINS, 1, 11, lambda x : log(x))) )
  61. xss.append( ("Exp:",     pdfspace(NBINS, 1, 11, lambda x : exp(x))) )
  62. xss.append( ("NExp:",    pdfspace(NBINS, 1, 11, lambda x : exp(-x))) )
  63.  
  64. import matplotlib.pyplot as plt
  65. plt.figure(figsize=(8,6))
  66. for n, (name, xs) in enumerate(xss):
  67.     #print(name, len(xs), xs)
  68.     plt.text(-0.3, -n, name)
  69.     plt.plot(xs, [-n]*len(xs), "o", markersize=1.5)
  70.     plt.xlim(-1,12)
  71. plt.gca().get_yaxis().set_visible(False)
  72. plt.tight_layout()
  73. plt.savefig("binnings.pdf")
  74. plt.savefig("binnings.png")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement