Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- from mpl_toolkits.mplot3d import Axes3D
- import numpy as np
- from numpy import quantile
- from math import sqrt
- from matplotlib import style, cm
- from prettytable import PrettyTable
- from scipy.stats import norm, t
- def BasicInformation(SAMPLE, plot=True):
- n = len(SAMPLE)
- SampleMean = sum(SAMPLE) / n
- SampleVariance = sum([(Xi - SampleMean)**2 for Xi in SAMPLE]) / (n - 1)
- def GetQuartiles(SAMPLE): return quantile(SAMPLE, .25), quantile(SAMPLE, .50), quantile(SAMPLE, .75)
- Q1, Q2, Q3 = GetQuartiles(SAMPLE)
- IQR = Q3 - Q1
- OutlierRange = [Q1 - 1.5*IQR, Q3 + 1.5*IQR]
- outliersList = [Xi for Xi in SAMPLE if Xi > OutlierRange[-1] or Xi < OutlierRange[0]]
- if plot:
- print('Sample Mean:', SampleMean)
- print('Sample Variance:', SampleVariance)
- print('Sample Standard Deviation:', sqrt(SampleVariance))
- print('Q1:', Q1, ' Q2:', Q2, ' Q3:', Q3)
- print('Outlier Range:', OutlierRange)
- print('Outliers:', set(outliersList))
- plt.boxplot(SAMPLE)
- plt.show()
- plt.hist(SAMPLE)
- plt.show()
- elif not plot:
- return SampleMean, SampleVariance, sqrt(SampleVariance), n, IQR, Q1, Q2, Q3, OutlierRange, set(outliersList)
- def LinearRegression(X, Y, printValues=False, prediction=None):
- x = np.linspace(min(X),max(X),100)
- n = len(X)
- X_Mean, Y_Mean = sum(X)/len(X), sum(Y)/len(Y)
- Sxx, Sxy, Syy = sum([(x - X_Mean)**2 for x in X]), sum([(X[i]-X_Mean)*(Y[i]-Y_Mean) for i in range(len(X))]), sum([(y-Y_Mean)**2 for y in Y])
- B1 = Sxy / Sxx
- B0 = Y_Mean - B1*X_Mean
- y = B0 + B1*x
- def G(x): return B0 + B1*x
- if type(prediction) is list or type(prediction) is tuple:
- for i in prediction: print(f'Predicted Value at G({i}) = {G(i)}')
- elif prediction is not None:
- print(f'Predicted Value at G({prediction}) = {G(prediction)}')
- SSReg = B1**2 * Sxx
- SSErr = Syy-SSReg
- MSReg = SSReg
- MSErr = SSErr/(n-2)
- SSTot = SSReg + SSErr
- F = MSReg/MSErr
- R2 = SSReg/Syy
- temp = (1/n) + (X_Mean**2/Sxx)
- t_B0 = (B0) / sqrt(MSErr*temp)
- t_B1 = (B1) / sqrt(MSErr/Sxx)
- if printValues:
- print('Sxx:', Sxx)
- print('Syy:', Syy)
- print('Sxy:', Sxy)
- print('B0:', B0)
- print('B1:', B1)
- print('B0 T-Statistic:', t_B0)
- print('B1 T-Statistic:', t_B1)
- # ANOVA Table
- tableObject = PrettyTable()
- tableObject.field_names = [' ', 'Sum of Squares', 'df', 'Mean of Squares', 'F']
- tableObject.add_row(['Model', round(SSReg, 5), 1, round(MSReg, 5), round(F, 5)])
- tableObject.add_row(['Error', round(SSErr, 5), n-2, round(MSErr, 5), ''])
- tableObject.add_row(['Total', round(SSTot, 5), n-1, '', ''])
- print(tableObject)
- print('R2:', R2)
- for currentYear in X:
- plt.plot(currentYear, G(currentYear), 'ro', color="red")
- plt.title(f'G(X) = {round(B0, 5)} + {round(B1, 5)}x')
- plt.plot(x, y, color='red')
- plt.plot(X, Y, 'ro', color='blue')
- plt.show()
- def MultipleLinearRegression(X1, X2, Y):
- X = np.transpose([np.ones(len(X1)), X1, X2])
- Y = np.transpose(Y)
- B = np.matmul(np.linalg.inv(np.matmul(np.transpose(X), X)), np.matmul(np.transpose(X), Y))
- x, y = np.meshgrid(np.linspace(min(X1), max(X1), 10), np.linspace(min(X2), max(X2), 10))
- Z = B[0] + B[1]*x + B[2]*y
- print('B0:', B[0])
- print('B1:', B[1])
- print('B2:', B[2])
- fig = plt.figure()
- ax = plt.axes(projection='3d')
- ax.scatter3D(X1, X2, Y, color='red')
- ax.plot_surface(x, y, Z, alpha=0.75, cmap=cm.Blues)
- plt.show()
- def Compute_PValue(Z, testSide):
- """
- Calcula el valor del P-Value para un Z-Test.
- Z = Valor calculado de la Z de la Normal Estándar.
- testSide = Left, Right o Double. Dependiendo del tipo de test.
- """
- P = None
- testSide = testSide.capitalize()
- if testSide == 'Right':
- P = norm.cdf(Z)
- print(P)
- elif testSide == 'Left':
- P = 1 - norm.cdf(Z)
- print(P)
- elif testSide == 'Double':
- P = 2*(1-norm.cdf(abs(Z)))
- print(P)
- else:
- print(f'Error con el tipo de test, {testSide} no es válido.')
- print('El test debe ser Left, Right o Double.')
- if P: return P
- def t_Test(HypMean, SAMPLE=None, Mean=None, Sigma=None, n=None):
- """
- Pues calcula el valor de t y vas a suspender igualmente.
- """
- if SAMPLE:
- sampleInfo = BasicInformation(SAMPLE, False)
- Mean, Sigma, n = sampleInfo[0], sampleInfo[2], sampleInfo[3]
- t = (Mean - HypMean) / (Sigma/sqrt(n))
- print('T:', t)
- return t, None
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement