Advertisement
Guest User

Untitled

a guest
Jun 30th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.52 KB | None | 0 0
  1. import warnings
  2. import numpy as np
  3. import pandas as pd
  4. import scipy.stats as st
  5. import statsmodels as sm
  6. import matplotlib
  7. import matplotlib.pyplot as plt
  8.  
  9. matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
  10. #matplotlib.style.use('ggplot')
  11.  
  12.  
  13. xaxis = [7.82, 7.49, 4.97, 5.14, 6.64, 7.6, 8.1, 5.78, 6.42, 6.19, 5.72, 6.71, 6.1, 7.7, 6.77, 7.89, 7.13, 5.92, 5.23, 5.97, 6.05, 7.94, 6.37, 6.44, 7.09, 7.24, 5.82, 6.5, 8.12, 6.82, 6.51, 7.25, 6.49, 6.24, 6.95, 6.3, 7.45, 5.71, 7.38, 7.61, 7.62, 7.01, 5.94, 7.08, 7.1, 6.62, 7.11, 5.89, 6.56, 7.63, 7.8, 5.93, 7.28, 6.08, 7.53, 5.88, 7.98, 7.29, 6.15, 7.23, 7.81, 8.13, 5.99, 6.96, 6.98, 7.71, 6.17, 6.01, 7.91, 7.95, 6.76, 7.85, 6.36, 7.41, 5.96, 6.33, 6.21, 6.61, 7.14, 7.64, 7.34, 6.53, 6.91, 6.97, 7.21, 7.59, 7.96, 6.86, 7.69, 6.0, 6.68, 7.77, 6.92, 7.55, 7.02, 6.83, 7.07, 7.87, 6.27, 7.51, 5.84, 7.79, 6.07, 7.0, 6.52, 5.86, 5.79, 7.05, 5.95, 6.72, 6.69, 6.88, 7.37, 6.25, 7.31, 7.9, 8.0, 6.32, 6.79, 5.69, 7.35, 6.87, 6.22, 6.18, 6.23, 6.74, 7.83, 7.48, 6.43, 6.57, 7.17, 6.78, 7.36, 7.67, 5.9, 6.67, 7.5, 8.04, 7.26, 7.65, 7.2, 6.47, 7.32, 6.46, 6.04, 7.03, 6.65, 7.22, 6.16, 5.75, 6.45, 7.3, 6.39, 6.93, 7.54, 7.99, 5.91, 6.9, 7.4, 5.85, 6.13, 6.59, 5.8, 7.33, 6.6, 7.27, 7.56, 6.26, 6.58, 7.47, 6.12, 6.34, 7.76, 6.4, 7.66, 6.8, 7.57, 6.7, 7.19, 6.14, 7.88, 6.48, 5.81, 6.02, 6.35, 6.28, 7.16, 6.03, 6.29, 7.75, 7.44, 7.58, 5.98, 6.2, 7.86, 6.75, 7.74, 7.06, 7.93, 7.18, 6.89, 6.63, 6.73, 7.78, 6.84, 7.68, 5.87, 5.83, 6.11, 7.52, 6.55, 7.04, 6.54, 6.31, 7.43, 8.14, 7.72, 7.46, 7.73, 6.99, 5.2, 6.38, 6.41, 7.92, 6.94, 6.66, 7.15, 7.39, 6.85, 7.84, 7.12, 8.05, 8.02, 6.09, 6.81, 7.42]
  14.  
  15. yaxis = [1, 120, 3, 1, 16, 16, 2, 2, 8, 9, 3, 19, 11, 9, 52, 6, 312, 39, 1, 6, 10, 2, 3, 9, 240, 338, 9, 64, 1, 96, 16, 314, 61, 7, 219, 3, 48, 3, 41, 22, 17, 213, 11, 226, 224, 14, 251, 35, 61, 14, 2, 18, 221, 8, 45, 8, 4, 155, 13, 418, 3, 1, 2, 221, 225, 14, 7, 6, 6, 2, 60, 6, 3, 44, 20, 3, 12, 37, 263, 15, 73, 54, 115, 233, 367, 17, 5, 118, 18, 5, 28, 7, 133, 36, 332, 92, 215, 8, 9, 108, 3, 9, 4, 239, 38, 6, 5, 273, 26, 49, 28, 181, 93, 5, 167, 8, 1, 6, 43, 1, 108, 173, 5, 10, 3, 26, 3, 109, 10, 63, 417, 63, 67, 8, 23, 10, 170, 1, 346, 14, 389, 33, 125, 30, 4, 248, 25, 352, 7, 1, 25, 127, 5, 206, 81, 1, 11, 169, 29, 7, 6, 18, 8, 84, 19, 209, 24, 2, 34, 103, 13, 6, 4, 3, 9, 73, 13, 26, 391, 18, 10, 4, 8, 4, 6, 4, 335, 1, 6, 8, 55, 17, 9, 9, 5, 46, 15, 213, 12, 374, 128, 13, 26, 8, 102, 6, 4, 8, 19, 106, 34, 272, 35, 8, 16, 3, 18, 107, 7, 266, 1, 8, 11, 3, 176, 13, 338, 30, 99, 8, 350, 1, 2, 16, 75, 35]
  16.  
  17. newx = []
  18. newy = []
  19. bothax = []
  20.  
  21. for (tx, ty) in sorted(zip(xaxis, yaxis)):
  22. newx.append(tx)
  23. newy.append(ty)
  24. bothax.append((tx, ty))
  25.  
  26. # Create models from data
  27. def best_fit_distribution(data, bins=200, ax=None):
  28. """Model data by finding best fit distribution to data"""
  29. # Get histogram of original data
  30. y, x = np.histogram(data, bins=bins, normed=True)
  31. x = (x + np.roll(x, -1))[:-1] / 2.0
  32.  
  33. # Distributions to check
  34. DISTRIBUTIONS = [
  35. st.alpha,st.anglit,st.arcsine,st.beta,st.betaprime,st.bradford,st.burr,st.cauchy,st.chi,st.chi2,st.cosine,
  36. st.dgamma,st.dweibull,st.erlang,st.expon,st.exponweib,st.exponpow,st.f,st.fatiguelife,st.fisk,
  37. st.foldcauchy,st.foldnorm,st.frechet_r,st.frechet_l,st.genlogistic,st.genpareto,st.genexpon,
  38. st.genextreme,st.gausshyper,st.gamma,st.gengamma,st.genhalflogistic,st.gilbrat,st.gompertz,st.gumbel_r,
  39. st.gumbel_l,st.halfcauchy,st.halflogistic,st.halfnorm,st.hypsecant,st.invgamma,st.invgauss,
  40. st.invweibull,st.johnsonsu,st.ksone,st.kstwobign,st.laplace,st.levy,st.levy_l,st.levy_stable,
  41. st.logistic,st.loggamma,st.loglaplace,st.lognorm,st.lomax,st.maxwell,st.mielke,st.nakagami,st.ncx2,st.ncf,
  42. st.nct,st.norm,st.pareto,st.pearson3,st.powerlaw,st.powerlognorm,st.powernorm,st.rdist,st.reciprocal,
  43. st.rayleigh,st.rice,st.recipinvgauss,st.semicircular,st.t,st.triang,st.truncexpon,st.truncnorm,st.tukeylambda,
  44. st.uniform,st.vonmises,st.wald,st.weibull_min,st.weibull_max,st.wrapcauchy
  45. ]
  46.  
  47. ### st.exponnorm, st.gennorm, st.halfgennorm, st.vonmises_line,st.johnsonsb,
  48.  
  49. # Best holders
  50. best_distribution = st.norm
  51. best_params = (0.0, 1.0)
  52. best_sse = np.inf
  53.  
  54. # Estimate distribution parameters from data
  55. for distribution in DISTRIBUTIONS:
  56.  
  57. # Try to fit the distribution
  58. try:
  59. # Ignore warnings from data that can't be fit
  60. with warnings.catch_warnings():
  61. warnings.filterwarnings('ignore')
  62.  
  63. # fit dist to data
  64. params = distribution.fit(data)
  65.  
  66. # Separate parts of parameters
  67. arg = params[:-2]
  68. loc = params[-2]
  69. scale = params[-1]
  70.  
  71. # Calculate fitted PDF and error with fit in distribution
  72. pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)
  73. sse = np.sum(np.power(y - pdf, 2.0))
  74.  
  75. # if axis pass in add to plot
  76. try:
  77. if ax:
  78. pd.Series(pdf, x).plot(ax=ax)
  79. end
  80. except Exception:
  81. pass
  82.  
  83. # identify if this distribution is better
  84. if best_sse > sse > 0:
  85. print("Updating best fit to %s." %distribution)
  86. best_distribution = distribution
  87. best_params = params
  88. best_sse = sse
  89.  
  90. except Exception:
  91. pass
  92.  
  93. return (best_distribution.name, best_params)
  94.  
  95. def make_pdf(dist, params, size=10000):
  96. """Generate distributions's Propbability Distribution Function """
  97.  
  98. # Separate parts of parameters
  99. arg = params[:-2]
  100. loc = params[-2]
  101. scale = params[-1]
  102.  
  103. # Get sane start and end points of distribution
  104. start = dist.ppf(0.01, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)
  105. end = dist.ppf(0.99, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)
  106.  
  107. # Build PDF and turn into pandas Series
  108. x = np.linspace(start, end, size)
  109. y = dist.pdf(x, loc=loc, scale=scale, *arg)
  110. pdf = pd.Series(y, x)
  111.  
  112. return pdf
  113.  
  114. # Load data from statsmodels datasets
  115. #data = pd.Series(sm.datasets.elnino.load_pandas().data.set_index('YEAR').values.ravel())
  116.  
  117.  
  118. # Plot for comparison
  119. #plt.figure(figsize=(12,8))
  120. #ax = data.plot(kind='hist', bins=50, normed=True, alpha=0.5, color=plt.rcParams['axes.color_cycle'][1])
  121. ax = plt.plot(newx, newy, 'ro')
  122.  
  123.  
  124. # Save plot limits
  125. #dataYLim = ax.get_ylim()
  126.  
  127. # Find best fit distribution
  128. best_fit_name, best_fir_paramms = best_fit_distribution(bothax) #, 100, ax)
  129.  
  130. print("best_fit_name =")
  131. print(best_fit_name)
  132. print("best_fir_paramms =")
  133. print(best_fir_paramms)
  134. best_dist = getattr(st, best_fit_name)
  135.  
  136. # Update plots
  137. #ax.set_ylim(dataYLim)
  138. plt.title(u'All Fitted Distributions')
  139. plt.xlabel(u'Distance (feet)')
  140. plt.ylabel('Frequency')
  141.  
  142. # Make PDF
  143. pdf = make_pdf(best_dist, best_fir_paramms)
  144.  
  145. # Display
  146. plt.figure(figsize=(12,8))
  147. ax = pdf.plot(lw=2, label='PDF', legend=True)
  148. #plt.plot(bothax, 'ro')
  149. #data.plot(kind='hist', bins=50, normed=True, alpha=0.5, label='Data', legend=True, ax=ax)
  150. plt.plot(bothax, alpha=0.5, label='Data')
  151.  
  152.  
  153. param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']
  154. param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])
  155. dist_str = '{}({})'.format(best_fit_name, param_str)
  156.  
  157. ax.set_title(u'Just the best fit..n' + dist_str)
  158. ax.set_xlabel(u'Distance')
  159. ax.set_ylabel('Frequency')
  160.  
  161. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement