Advertisement
EXTREMEXPLOIT

Estadística

Apr 6th, 2020
441
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.25 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. from mpl_toolkits.mplot3d import Axes3D
  3. import numpy as np
  4. from numpy import quantile
  5. from math import sqrt
  6. from matplotlib import style, cm
  7. from prettytable import PrettyTable
  8. from scipy.stats import norm, t
  9.  
  10. def BasicInformation(SAMPLE, plot=True):
  11.     n = len(SAMPLE)
  12.  
  13.     SampleMean = sum(SAMPLE) / n
  14.     SampleVariance = sum([(Xi - SampleMean)**2 for Xi in SAMPLE]) / (n - 1)
  15.     def GetQuartiles(SAMPLE): return quantile(SAMPLE, .25), quantile(SAMPLE, .50), quantile(SAMPLE, .75)
  16.     Q1, Q2, Q3 = GetQuartiles(SAMPLE)
  17.     IQR = Q3 - Q1
  18.     OutlierRange = [Q1 - 1.5*IQR, Q3 + 1.5*IQR]
  19.     outliersList = [Xi for Xi in SAMPLE if Xi > OutlierRange[-1] or Xi < OutlierRange[0]]
  20.     if plot:
  21.         print('Sample Mean:', SampleMean)
  22.         print('Sample Variance:', SampleVariance)
  23.         print('Sample Standard Deviation:', sqrt(SampleVariance))
  24.         print('Q1:', Q1, '  Q2:', Q2, '  Q3:', Q3)
  25.         print('Outlier Range:', OutlierRange)
  26.         print('Outliers:', set(outliersList))
  27.         plt.boxplot(SAMPLE)
  28.         plt.show()
  29.         plt.hist(SAMPLE)
  30.         plt.show()
  31.     elif not plot:
  32.         return SampleMean, SampleVariance, sqrt(SampleVariance), n, IQR, Q1, Q2, Q3, OutlierRange, set(outliersList)
  33.  
  34. def LinearRegression(X, Y, printValues=False, prediction=None):
  35.     x = np.linspace(min(X),max(X),100)
  36.     n = len(X)
  37.     X_Mean, Y_Mean = sum(X)/len(X), sum(Y)/len(Y)
  38.     Sxx, Sxy, Syy = sum([(x - X_Mean)**2 for x in X]), sum([(X[i]-X_Mean)*(Y[i]-Y_Mean) for i in range(len(X))]), sum([(y-Y_Mean)**2 for y in Y])
  39.  
  40.     B1 = Sxy / Sxx
  41.     B0 = Y_Mean - B1*X_Mean
  42.     y = B0 + B1*x
  43.     def G(x): return B0 + B1*x
  44.  
  45.     if type(prediction) is list or type(prediction) is tuple:
  46.         for i in prediction: print(f'Predicted Value at G({i}) = {G(i)}')
  47.     elif prediction is not None:
  48.         print(f'Predicted Value at G({prediction}) = {G(prediction)}')
  49.  
  50.     SSReg = B1**2 * Sxx
  51.     SSErr = Syy-SSReg
  52.     MSReg = SSReg
  53.     MSErr = SSErr/(n-2)
  54.     SSTot = SSReg + SSErr
  55.     F = MSReg/MSErr
  56.     R2 = SSReg/Syy
  57.     temp = (1/n) + (X_Mean**2/Sxx)
  58.     t_B0 = (B0) / sqrt(MSErr*temp)
  59.     t_B1 = (B1) / sqrt(MSErr/Sxx)
  60.     if printValues:
  61.         print('Sxx:', Sxx)
  62.         print('Syy:', Syy)
  63.         print('Sxy:', Sxy)
  64.         print('B0:', B0)
  65.         print('B1:', B1)
  66.         print('B0 T-Statistic:', t_B0)
  67.         print('B1 T-Statistic:', t_B1)
  68.  
  69.     # ANOVA Table
  70.     tableObject = PrettyTable()
  71.     tableObject.field_names = ['     ', 'Sum of Squares', 'df', 'Mean of Squares', 'F']
  72.     tableObject.add_row(['Model', round(SSReg, 5), 1, round(MSReg, 5), round(F, 5)])
  73.     tableObject.add_row(['Error', round(SSErr, 5), n-2, round(MSErr, 5), ''])
  74.     tableObject.add_row(['Total', round(SSTot, 5), n-1, '', ''])
  75.     print(tableObject)
  76.     print('R2:', R2)
  77.  
  78.     for currentYear in X:
  79.         plt.plot(currentYear, G(currentYear), 'ro', color="red")
  80.      
  81.     plt.title(f'G(X) = {round(B0, 5)} + {round(B1, 5)}x')
  82.     plt.plot(x, y, color='red')
  83.     plt.plot(X, Y, 'ro', color='blue')
  84.     plt.show()
  85.  
  86. def MultipleLinearRegression(X1, X2, Y):
  87.     X = np.transpose([np.ones(len(X1)), X1, X2])
  88.     Y = np.transpose(Y)
  89.     B = np.matmul(np.linalg.inv(np.matmul(np.transpose(X), X)), np.matmul(np.transpose(X), Y))
  90.      
  91.     x, y = np.meshgrid(np.linspace(min(X1), max(X1), 10), np.linspace(min(X2), max(X2), 10))
  92.     Z = B[0] + B[1]*x + B[2]*y
  93.     print('B0:', B[0])
  94.     print('B1:', B[1])
  95.     print('B2:', B[2])
  96.  
  97.     fig = plt.figure()
  98.     ax = plt.axes(projection='3d')
  99.     ax.scatter3D(X1, X2, Y, color='red')
  100.     ax.plot_surface(x, y, Z, alpha=0.75, cmap=cm.Blues)
  101.     plt.show()
  102.  
  103. def Compute_PValue(Z, testSide):
  104.     """
  105.     Calcula el valor del P-Value para un Z-Test.
  106.     Z = Valor calculado de la Z de la Normal Estándar.
  107.     testSide = Left, Right o Double. Dependiendo del tipo de test.
  108.     """
  109.     P = None
  110.     testSide = testSide.capitalize()
  111.     if testSide == 'Right':
  112.         P = norm.cdf(Z)
  113.         print(P)
  114.     elif testSide == 'Left':
  115.         P = 1 - norm.cdf(Z)
  116.         print(P)
  117.     elif testSide == 'Double':
  118.         P = 2*(1-norm.cdf(abs(Z)))
  119.         print(P)
  120.     else:
  121.         print(f'Error con el tipo de test, {testSide} no es válido.')
  122.         print('El test debe ser Left, Right o Double.')
  123.     if P: return P
  124.  
  125. def t_Test(HypMean, SAMPLE=None, Mean=None, Sigma=None, n=None):
  126.     """
  127.     Pues calcula el valor de t y vas a suspender igualmente.
  128.     """
  129.     if SAMPLE:
  130.         sampleInfo = BasicInformation(SAMPLE, False)
  131.         Mean, Sigma, n = sampleInfo[0], sampleInfo[2], sampleInfo[3]
  132.     t = (Mean - HypMean) / (Sigma/sqrt(n))
  133.     print('T:', t)
  134.     return t, None
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement