Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.63 KB | None | 0 0
  1. from sklearn.neighbors import KernelDensity
  2. from scipy.stats import gaussian_kde
  3. from statsmodels.nonparametric.kde import KDEUnivariate
  4. from statsmodels.nonparametric.kernel_density import KDEMultivariate
  5.  
  6.  
  7. def kde_scipy(x, x_grid, bandwidth=0.2, **kwargs):
  8. """Kernel Density Estimation with Scipy"""
  9. # Note that scipy weights its bandwidth by the covariance of the
  10. # input data. To make the results comparable to the other methods,
  11. # we divide the bandwidth by the sample standard deviation here.
  12. kde = gaussian_kde(x, bw_method=bandwidth / x.std(ddof=1), **kwargs)
  13. return kde.evaluate(x_grid)
  14.  
  15.  
  16. def kde_statsmodels_u(x, x_grid, bandwidth=0.2, **kwargs):
  17. """Univariate Kernel Density Estimation with Statsmodels"""
  18. kde = KDEUnivariate(x)
  19. kde.fit(bw=bandwidth, **kwargs)
  20. return kde.evaluate(x_grid)
  21.  
  22.  
  23. def kde_statsmodels_m(x, x_grid, bandwidth=0.2, **kwargs):
  24. """Multivariate Kernel Density Estimation with Statsmodels"""
  25. kde = KDEMultivariate(x, bw=bandwidth * np.ones_like(x),
  26. var_type='c', **kwargs)
  27. return kde.pdf(x_grid)
  28.  
  29.  
  30. def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
  31. """Kernel Density Estimation with Scikit-learn"""
  32. kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
  33. kde_skl.fit(x[:, np.newaxis])
  34. # score_samples() returns the log-likelihood of the samples
  35. log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
  36. return np.exp(log_pdf)
  37.  
  38.  
  39. kde_funcs = [kde_statsmodels_u, kde_statsmodels_u, kde_scipy, kde_sklearn]
  40. kde_funcnames = ['Statsmodels-U', 'Statsmodels-M', 'Scipy', 'Scikit-learn']
  41.  
  42. print("Package Versions:")
  43. import sklearn; print(" scikit-learn:", sklearn.__version__)
  44. import scipy; print(" scipy:", scipy.__version__)
  45. import statsmodels; print(" statsmodels:", statsmodels.__version__)
  46.  
  47. import numpy as np
  48. import matplotlib.pyplot as plt
  49.  
  50. from scipy.stats.distributions import norm
  51.  
  52. # The grid we'll use for plotting
  53. #x_grid = np.linspace(0, 0.2, 1000)
  54.  
  55. # Draw points from a bimodal distribution in 1D
  56. #np.random.seed(0)
  57. #x = np.concatenate([norm(-1, 1.).rvs(400),
  58. # norm(1, 0.3).rvs(100)])
  59.  
  60. x = df_exhibitors2['AnnoFondazione'].dropna()
  61. x_grid = np.linspace(min(x), max(x), 1000)
  62. pdf_true = (0.8 * norm(-1, 1).pdf(x_grid) +
  63. 0.2 * norm(1, 0.3).pdf(x_grid))
  64.  
  65. # Plot the three kernel density estimates
  66. fig, ax = plt.subplots(1, 4, sharey=True,
  67. figsize=(13, 3))
  68. fig.subplots_adjust(wspace=0)
  69.  
  70. for i in range(4):
  71. pdf = kde_funcs[i](x, x_grid, bandwidth=0.2)
  72. ax[i].plot(x_grid, pdf, color='blue', alpha=0.5, lw=3)
  73. ax[i].fill(x_grid, pdf_true, ec='gray', fc='gray', alpha=0.4)
  74. ax[i].set_title(kde_funcnames[i])
  75. #ax[i].set_xlim(0, 0.2)
  76.  
  77. plt.show()
  78.  
  79. fig, ax = plt.subplots()
  80. for bandwidth in [0.0001, 0.003, 1.0]:
  81. ax.plot(x_grid, kde_sklearn(x, x_grid, bandwidth=bandwidth),
  82. label='bw={0}'.format(bandwidth), linewidth=3, alpha=0.5)
  83. ax.hist(x, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
  84. #ax.set_xlim(0, 0.2)
  85. ax.legend(loc='upper left')
  86. plt.show()
  87.  
  88.  
  89. grid = GridSearchCV(KernelDensity(),
  90. {'bandwidth': np.linspace(0.0001, 0.5, 30)},
  91. cv=20) # 20-fold cross-validation
  92. grid.fit(x[:, None])
  93. print(grid.best_params_)
  94.  
  95. kde = grid.best_estimator_
  96. pdf = np.exp(kde.score_samples(x_grid[:, None]))
  97.  
  98. fig, ax = plt.subplots()
  99. ax.plot(x_grid, pdf, linewidth=3, alpha=0.5, label='bw=%.2f' % kde.bandwidth)
  100. ax.hist(x, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
  101. ax.legend(loc='upper left')
  102. #ax.set_xlim(0, 0.2)
  103. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement