Advertisement
larsupilami73

Inverse CDF sampling

Dec 8th, 2019
405
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.77 KB | None | 0 0
  1. #!/usr/bin/python3
  2. #
  3. #How to generate new samples from an unknown distribution of which you only have a given set of samples?
  4. #
  5. #This script illustrates the Inverse-Empirical-Cumulative-Distribution-Sampling method:
  6. #1. make an ECDF of the given samples x_1...x_n of unknown distribution ECDF_x
  7. #2. sample u_1...u_k from a uniform distribution [0,1]
  8. #3. find the image of u_1...u_k under the inverse of ECDF_x.
  9. #
  10. #larsupilami73
  11.  
  12. import os
  13. from scipy import *
  14. import matplotlib.pyplot as plt
  15.  
  16.  
  17. random.seed(42)
  18. N = 10000 #number of samples to construct the ecdf from
  19.  
  20. #ecdf from samples
  21. def ecdf(x):
  22.     X = sort(x)
  23.     Y = linspace(0,1,len(X))
  24.     return X,Y
  25.  
  26. #a simple sampler class
  27. class sampler():
  28.     def __init__(self,x,name='unknown'):
  29.         self.X, self.Y = ecdf(x)
  30.         self.name = name
  31.    
  32.     def get_ecdf(self):
  33.         return self.X,self.Y
  34.        
  35.     def get_samples(self,size=1000):
  36.         """Sample the unknown distribution by first sampling a uniform distribution between 0 and 1 and
  37.         then calculating the inverse ecdf on these samples"""
  38.         U = random.uniform(low=0,high=1,size=size)
  39.         #Find the index in Y where a uniform sample u is closest to.
  40.         #Then find the corresponding X value: this is the inverse ecdf and your sample!
  41.         #Note this only gives back samples identical to the original unknown samples...
  42.         #S = [self.X[abs(u-self.Y).argmin()] for u in U] #works!
  43.        
  44.         #...however better is to interpolate X between corresponding Y values that U falls inbetween,
  45.         #assuming
  46.         S = interp(U,self.Y,self.X)
  47.         return array(S)
  48.    
  49.    
  50. #generate samples from some 'unknown' and weird distribution
  51. def some_weird_distribution(Nsamples):
  52.     X= []
  53.     n = 0
  54.     while(n<Nsamples):
  55.         x = random.beta(2,5) #see https://www.statisticshowto.datasciencecentral.com/beta-distribution/
  56.         if x<0.3 or x>0.5:
  57.             X.append(x)
  58.             n +=1
  59.        
  60.     return array(X)
  61.  
  62. #get the samples from the unknown distribution we want to 'copy'
  63. x_unknown1 = some_weird_distribution(N)
  64.  
  65. #make a sampler
  66. sampler1= sampler(x_unknown1)
  67.  
  68. #make a nice figure
  69. fig, ((ax1,ax2,ax3),(ax4,ax5,ax6)) = plt.subplots(2,3,figsize=(8,4),dpi=150)
  70.  
  71. #see more color names: plt.cm.colors.cnames
  72. c1 = 'tomato'
  73. c2 = 'steelblue'
  74. c3 = 'ivory'
  75. c4 = 'black'
  76. c5 = 'green'
  77.  
  78. #samples
  79. ax1.scatter(range(N),x_unknown1, c=c1, marker='.',alpha=0.4,)
  80. ax1.set_title('From unknown distribution',fontsize=10)
  81. #ax1.set_xlabel('sample nr.')
  82. ax1.set_ylabel('sample values')
  83. ax1.text(0,0.05,'N={}'.format(x_unknown1.shape[0]),color=c4)
  84. ax1.set_facecolor(c3)
  85.  
  86. #histogram
  87. ax2.hist(x_unknown1, bins=50,color=c1,density=True,edgecolor=c4)
  88. ax2.set_title('Histogram',fontsize=10)
  89. #ax2.set_xlabel('sample values')
  90. ax2.set_ylabel('bin values')
  91. ax2.set_facecolor(c3)
  92.  
  93. #ECDF
  94. x1,y1 = sampler1.get_ecdf()
  95. ax3.set_title('ECDF',fontsize=10)
  96. ax3.scatter(x1,y1,s=5,c=c1,marker='.',alpha=0.5)
  97. ax3.set_facecolor(c3)
  98.  
  99. #temp directory for the frames
  100. if not os.path.exists('./frames'):
  101.     os.mkdir('./frames')
  102. try:
  103.     os.system('rm ./frames/frame*.png')
  104. except:
  105.     pass
  106.  
  107. #now generate *new* samples via inverse ecdf method
  108. #and show that these follow the distribution of the given samples of the unknown distribution
  109. #same histogram, same ecdf
  110. for k,Ns in enumerate([20,20,50,100,200,500,1000,2000,4000,8000,10000]):
  111.     s = sampler1.get_samples(Ns)
  112.  
  113.     #show the generated samples
  114.     ax4.scatter(range(Ns),s, c=c2, marker='.',alpha=0.4)
  115.     ax4.set_title('Inverse ECDF samples',fontsize=10)
  116.     ax4.set_xlabel('sample nr.')
  117.     ax4.set_ylabel('sample values')
  118.     ax4.set_ylim(ax1.get_ylim())
  119.     #ax4.set_xlim(ax1.get_xlim())
  120.     #ax4.set_xticks(ax1.get_xticks())
  121.     ax4.text(0,0.05,'N={}'.format(s.shape[0]),color=c4)
  122.     ax4.set_facecolor(c3)
  123.  
  124.     #show the histogram of generated samples
  125.     ax5.hist(s, bins=50,color=c2,density=True,edgecolor=c4)
  126.     ax5.set_title('Histogram',fontsize=10)
  127.     ax5.set_xlabel('samples values')
  128.     ax5.set_ylabel('bin values')
  129.     ax5.set_xlim(ax2.get_xlim())
  130.     #ax5.set_ylim(ax2.get_ylim())
  131.     ax5.set_facecolor(c3)
  132.  
  133.     #ecdf of generated samples
  134.     xe,ye = ecdf(s)
  135.     ax6.set_title('ECDF',fontsize=10)
  136.     ax6.scatter(xe,ye,s=5,c=c2,marker='.',alpha=0.5)
  137.     ax6.set_xlabel('sample values')
  138.     ax6.set_xlim(ax3.get_xlim())
  139.     ax6.set_facecolor(c3)
  140.     #ax6.set_xticks(ax3.get_xticks())
  141.  
  142.     #save a frame
  143.     fig.tight_layout()
  144.     fig.savefig('./frames/frame%05d.png' %k)
  145.     ax4.clear()
  146.     ax5.clear()
  147.     ax6.clear()
  148.    
  149.     #fig.show()
  150.  
  151. #for some reason, there's a 'jump' between 1st and 2nd frame
  152. #simply removing duplicate of frame solves this...
  153. os.remove('./frames/frame%05d.png' %0)
  154.  
  155. plt.close('all')   
  156. #now assemble all the pictures in a movie
  157. print('converting to gif!')
  158. #requires imagemagick
  159. os.system('convert -delay 100 -loop 0 ./frames/*.png inverse_sampling.gif')
  160. #or to mp4
  161. #print('converting to mp4!')
  162. #os.system("ffmpeg -y -r 5 -i ./frames/frame%05d.png -c:v libx264 -vf fps=10 inverse_sampling.mp4")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement