Advertisement
andybuckley

Inverse transform sampling from a PyROOT TH2 histogram

Jan 6th, 2014
291
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.31 KB | None | 0 0
  1. import random, ROOT
  2. ROOT.gROOT.SetBatch(True)
  3. ROOT.gROOT.SetStyle("Pub")
  4. #ROOT.gROOT.SetStyle("Plain")
  5. #ROOT.gStyle.SetOptStat(False);
  6.  
  7.  
  8. def get_sampling_vars(h):
  9.     """
  10.    Get the following from a TH2 histogram h, since the ROOT API sucks:
  11.    * list of global bin IDs (not even contiguous, gee thanks ROOT)
  12.    * dict mapping global bin IDs to a tuple of axis bin IDs
  13.    * list of nbins+1 cumulative bin values, in the same order as globalbins
  14.    """
  15.     globalbin_to_axisbin = {} # for reverse axis bin lookup to get edges
  16.     globalbins = [] # because they aren't easily predicted, nor contiguous
  17.     cheights = [0] # cumulative "histogram" from which to uniformly sample
  18.     for ix in xrange(1, h.GetNbinsX()+1):
  19.         for iy in xrange(1, h.GetNbinsY()+1):
  20.             iglobal = h.GetBin(ix, iy)
  21.             globalbins.append(iglobal)
  22.             globalbin_to_axisbin[iglobal] = (ix, iy)
  23.             cheights.append(cheights[-1] + h.GetBinContent(iglobal))
  24.     return globalbins, globalbin_to_axisbin, cheights
  25.  
  26. def get_random_bin(globalbins, cheights):
  27.     """
  28.    Choose a random bin from the cumulative distribution list of nbins+1 entries.
  29.  
  30.    TODO: Search more efficiently (lin and log guesses, then lin search or
  31.    binary split depending on vector size).
  32.    """
  33.     assert len(cheights) == len(globalbins)+1
  34.     randomheight = random.uniform(0, cheights[-1])
  35.     randombin = None
  36.     for i, iglobal in enumerate(globalbins):
  37.         if randomheight >= cheights[i] and randomheight < cheights[i+1]:
  38.             return iglobal
  39.     raise Exception("Sample fell outside range of cumulative distribution?!?!")
  40.  
  41. def get_random_xy(globalbins, cheights, globalbin_to_axisbin):
  42.     """
  43.    Choose a random bin via get_random_bin, then pick a uniform random x,y
  44.    point in that bin (without any attempt at estimating the in-bin distribution).
  45.    """
  46.     irand = get_random_bin(globalbins, cheights)
  47.     axisids = globalbin_to_axisbin.get(irand)
  48.     #print irand, axisids
  49.     assert axisids is not None
  50.     xrand = random.uniform(h.GetXaxis().GetBinLowEdge(axisids[0]), h.GetXaxis().GetBinUpEdge(axisids[0]))
  51.     yrand = random.uniform(h.GetYaxis().GetBinLowEdge(axisids[1]), h.GetYaxis().GetBinUpEdge(axisids[1]))
  52.     #print "{:d} -> {:f}, {:f} ".format(irand, xrand, yrand)
  53.     return xrand, yrand
  54.  
  55.  
  56. ## Create and populate a histo to sample from
  57. h = ROOT.TH2D("/h", "", 3,0,1, 3,0,1)
  58. for ix in xrange(1, h.GetNbinsX()+1):
  59.     for iy in xrange(1, h.GetNbinsY()+1):
  60.         i = h.GetBin(ix, iy)
  61.         h.SetBinContent(i, i**2)
  62.  
  63. ## Get some necessary properties for sampling (ROOT's API is very unhelpful)
  64. globalbins, globalbin_to_axisbin, cheights = get_sampling_vars(h)
  65.  
  66. ## Sample into a new histo with different binning and wider range
  67. NUMFILLS = 2000
  68. h2 = ROOT.TH2D("/h2", "", 12,-0.1,1.1, 12,-0.1,1.1)
  69. for i in xrange(NUMFILLS):
  70.     if i % 500 == 0:
  71.         print "Fill #{:d}/{:d}".format(i, NUMFILLS)
  72.     xrand, yrand = get_random_xy(globalbins, cheights, globalbin_to_axisbin)
  73.     h2.Fill(xrand, yrand)
  74.  
  75. ## Plot sampling and sampled histos overlayed on one canvas
  76. c = ROOT.TCanvas()
  77. h2.Draw("col")
  78. sf = h2.GetBinContent(h2.GetMaximumBin()) / h.GetBinContent(h.GetMaximumBin())
  79. h.Scale(sf, "")
  80. h.SetLineColor(ROOT.kBlack)
  81. h.Draw("box,same")
  82. c.SaveAs("sampled.pdf")
  83. #c.SaveAs("sampled.png")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement