Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import warnings
- import numpy as np
- import pandas as pd
- import scipy.stats as st
- import statsmodels as sm
- import matplotlib
- import matplotlib.pyplot as plt
- matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
- #matplotlib.style.use('ggplot')
- xaxis = [7.82, 7.49, 4.97, 5.14, 6.64, 7.6, 8.1, 5.78, 6.42, 6.19, 5.72, 6.71, 6.1, 7.7, 6.77, 7.89, 7.13, 5.92, 5.23, 5.97, 6.05, 7.94, 6.37, 6.44, 7.09, 7.24, 5.82, 6.5, 8.12, 6.82, 6.51, 7.25, 6.49, 6.24, 6.95, 6.3, 7.45, 5.71, 7.38, 7.61, 7.62, 7.01, 5.94, 7.08, 7.1, 6.62, 7.11, 5.89, 6.56, 7.63, 7.8, 5.93, 7.28, 6.08, 7.53, 5.88, 7.98, 7.29, 6.15, 7.23, 7.81, 8.13, 5.99, 6.96, 6.98, 7.71, 6.17, 6.01, 7.91, 7.95, 6.76, 7.85, 6.36, 7.41, 5.96, 6.33, 6.21, 6.61, 7.14, 7.64, 7.34, 6.53, 6.91, 6.97, 7.21, 7.59, 7.96, 6.86, 7.69, 6.0, 6.68, 7.77, 6.92, 7.55, 7.02, 6.83, 7.07, 7.87, 6.27, 7.51, 5.84, 7.79, 6.07, 7.0, 6.52, 5.86, 5.79, 7.05, 5.95, 6.72, 6.69, 6.88, 7.37, 6.25, 7.31, 7.9, 8.0, 6.32, 6.79, 5.69, 7.35, 6.87, 6.22, 6.18, 6.23, 6.74, 7.83, 7.48, 6.43, 6.57, 7.17, 6.78, 7.36, 7.67, 5.9, 6.67, 7.5, 8.04, 7.26, 7.65, 7.2, 6.47, 7.32, 6.46, 6.04, 7.03, 6.65, 7.22, 6.16, 5.75, 6.45, 7.3, 6.39, 6.93, 7.54, 7.99, 5.91, 6.9, 7.4, 5.85, 6.13, 6.59, 5.8, 7.33, 6.6, 7.27, 7.56, 6.26, 6.58, 7.47, 6.12, 6.34, 7.76, 6.4, 7.66, 6.8, 7.57, 6.7, 7.19, 6.14, 7.88, 6.48, 5.81, 6.02, 6.35, 6.28, 7.16, 6.03, 6.29, 7.75, 7.44, 7.58, 5.98, 6.2, 7.86, 6.75, 7.74, 7.06, 7.93, 7.18, 6.89, 6.63, 6.73, 7.78, 6.84, 7.68, 5.87, 5.83, 6.11, 7.52, 6.55, 7.04, 6.54, 6.31, 7.43, 8.14, 7.72, 7.46, 7.73, 6.99, 5.2, 6.38, 6.41, 7.92, 6.94, 6.66, 7.15, 7.39, 6.85, 7.84, 7.12, 8.05, 8.02, 6.09, 6.81, 7.42]
- yaxis = [1, 120, 3, 1, 16, 16, 2, 2, 8, 9, 3, 19, 11, 9, 52, 6, 312, 39, 1, 6, 10, 2, 3, 9, 240, 338, 9, 64, 1, 96, 16, 314, 61, 7, 219, 3, 48, 3, 41, 22, 17, 213, 11, 226, 224, 14, 251, 35, 61, 14, 2, 18, 221, 8, 45, 8, 4, 155, 13, 418, 3, 1, 2, 221, 225, 14, 7, 6, 6, 2, 60, 6, 3, 44, 20, 3, 12, 37, 263, 15, 73, 54, 115, 233, 367, 17, 5, 118, 18, 5, 28, 7, 133, 36, 332, 92, 215, 8, 9, 108, 3, 9, 4, 239, 38, 6, 5, 273, 26, 49, 28, 181, 93, 5, 167, 8, 1, 6, 43, 1, 108, 173, 5, 10, 3, 26, 3, 109, 10, 63, 417, 63, 67, 8, 23, 10, 170, 1, 346, 14, 389, 33, 125, 30, 4, 248, 25, 352, 7, 1, 25, 127, 5, 206, 81, 1, 11, 169, 29, 7, 6, 18, 8, 84, 19, 209, 24, 2, 34, 103, 13, 6, 4, 3, 9, 73, 13, 26, 391, 18, 10, 4, 8, 4, 6, 4, 335, 1, 6, 8, 55, 17, 9, 9, 5, 46, 15, 213, 12, 374, 128, 13, 26, 8, 102, 6, 4, 8, 19, 106, 34, 272, 35, 8, 16, 3, 18, 107, 7, 266, 1, 8, 11, 3, 176, 13, 338, 30, 99, 8, 350, 1, 2, 16, 75, 35]
- newx = []
- newy = []
- bothax = []
- for (tx, ty) in sorted(zip(xaxis, yaxis)):
- newx.append(tx)
- newy.append(ty)
- bothax.append((tx, ty))
- # Create models from data
- def best_fit_distribution(data, bins=200, ax=None):
- """Model data by finding best fit distribution to data"""
- # Get histogram of original data
- y, x = np.histogram(data, bins=bins, normed=True)
- x = (x + np.roll(x, -1))[:-1] / 2.0
- # Distributions to check
- DISTRIBUTIONS = [
- st.alpha,st.anglit,st.arcsine,st.beta,st.betaprime,st.bradford,st.burr,st.cauchy,st.chi,st.chi2,st.cosine,
- st.dgamma,st.dweibull,st.erlang,st.expon,st.exponweib,st.exponpow,st.f,st.fatiguelife,st.fisk,
- st.foldcauchy,st.foldnorm,st.frechet_r,st.frechet_l,st.genlogistic,st.genpareto,st.genexpon,
- st.genextreme,st.gausshyper,st.gamma,st.gengamma,st.genhalflogistic,st.gilbrat,st.gompertz,st.gumbel_r,
- st.gumbel_l,st.halfcauchy,st.halflogistic,st.halfnorm,st.hypsecant,st.invgamma,st.invgauss,
- st.invweibull,st.johnsonsu,st.ksone,st.kstwobign,st.laplace,st.levy,st.levy_l,st.levy_stable,
- st.logistic,st.loggamma,st.loglaplace,st.lognorm,st.lomax,st.maxwell,st.mielke,st.nakagami,st.ncx2,st.ncf,
- st.nct,st.norm,st.pareto,st.pearson3,st.powerlaw,st.powerlognorm,st.powernorm,st.rdist,st.reciprocal,
- st.rayleigh,st.rice,st.recipinvgauss,st.semicircular,st.t,st.triang,st.truncexpon,st.truncnorm,st.tukeylambda,
- st.uniform,st.vonmises,st.wald,st.weibull_min,st.weibull_max,st.wrapcauchy
- ]
- ### st.exponnorm, st.gennorm, st.halfgennorm, st.vonmises_line,st.johnsonsb,
- # Best holders
- best_distribution = st.norm
- best_params = (0.0, 1.0)
- best_sse = np.inf
- # Estimate distribution parameters from data
- for distribution in DISTRIBUTIONS:
- # Try to fit the distribution
- try:
- # Ignore warnings from data that can't be fit
- with warnings.catch_warnings():
- warnings.filterwarnings('ignore')
- # fit dist to data
- params = distribution.fit(data)
- # Separate parts of parameters
- arg = params[:-2]
- loc = params[-2]
- scale = params[-1]
- # Calculate fitted PDF and error with fit in distribution
- pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)
- sse = np.sum(np.power(y - pdf, 2.0))
- # if axis pass in add to plot
- try:
- if ax:
- pd.Series(pdf, x).plot(ax=ax)
- end
- except Exception:
- pass
- # identify if this distribution is better
- if best_sse > sse > 0:
- print("Updating best fit to %s." %distribution)
- best_distribution = distribution
- best_params = params
- best_sse = sse
- except Exception:
- pass
- return (best_distribution.name, best_params)
- def make_pdf(dist, params, size=10000):
- """Generate distributions's Propbability Distribution Function """
- # Separate parts of parameters
- arg = params[:-2]
- loc = params[-2]
- scale = params[-1]
- # Get sane start and end points of distribution
- start = dist.ppf(0.01, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)
- end = dist.ppf(0.99, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)
- # Build PDF and turn into pandas Series
- x = np.linspace(start, end, size)
- y = dist.pdf(x, loc=loc, scale=scale, *arg)
- pdf = pd.Series(y, x)
- return pdf
- # Load data from statsmodels datasets
- #data = pd.Series(sm.datasets.elnino.load_pandas().data.set_index('YEAR').values.ravel())
- # Plot for comparison
- #plt.figure(figsize=(12,8))
- #ax = data.plot(kind='hist', bins=50, normed=True, alpha=0.5, color=plt.rcParams['axes.color_cycle'][1])
- ax = plt.plot(newx, newy, 'ro')
- # Save plot limits
- #dataYLim = ax.get_ylim()
- # Find best fit distribution
- best_fit_name, best_fir_paramms = best_fit_distribution(bothax) #, 100, ax)
- print("best_fit_name =")
- print(best_fit_name)
- print("best_fir_paramms =")
- print(best_fir_paramms)
- best_dist = getattr(st, best_fit_name)
- # Update plots
- #ax.set_ylim(dataYLim)
- plt.title(u'All Fitted Distributions')
- plt.xlabel(u'Distance (feet)')
- plt.ylabel('Frequency')
- # Make PDF
- pdf = make_pdf(best_dist, best_fir_paramms)
- # Display
- plt.figure(figsize=(12,8))
- ax = pdf.plot(lw=2, label='PDF', legend=True)
- #plt.plot(bothax, 'ro')
- #data.plot(kind='hist', bins=50, normed=True, alpha=0.5, label='Data', legend=True, ax=ax)
- plt.plot(bothax, alpha=0.5, label='Data')
- param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']
- param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])
- dist_str = '{}({})'.format(best_fit_name, param_str)
- ax.set_title(u'Just the best fit..n' + dist_str)
- ax.set_xlabel(u'Distance')
- ax.set_ylabel('Frequency')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement