Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | None | 0 0
  1. # Multiple Linear Regression
  2.  
  3. # Importing the libraries
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. import pandas as pd
  7.  
  8. # Importing the dataset
  9. dataset = pd.read_csv('50_Startups.csv')
  10. X = dataset.iloc[:, :-1].values
  11. y = dataset.iloc[:, 4].values
  12.  
  13. # Encoding categorical data
  14. from sklearn.preprocessing import OneHotEncoder
  15. from sklearn.compose import make_column_transformer
  16.  
  17. col_trans = make_column_transformer((OneHotEncoder(categories = 'auto'), [3]), remainder='passthrough')
  18. X = col_trans.fit_transform(X).astype(float)
  19.  
  20. # Avoiding the Dummy Trap
  21. X = X[:, 1:]
  22.  
  23. # Splitting data into Training Set and Test Set
  24. from sklearn.model_selection import train_test_split
  25. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
  26.  
  27. # Fitting Multiple Linerar Regression to the Training set
  28. from sklearn.linear_model import LinearRegression
  29. regressor = LinearRegression()
  30. regressor.fit(X_train, y_train)
  31.  
  32. # Predicting the Test set results
  33. y_pred = regressor.predict(X_test)
  34.  
  35. # Building the optimal model using backward elimination
  36. import statsmodels.formula.api as sm
  37. X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1)
  38.  
  39.  
  40. def max_with_iter(arr):
  41.     max_val = arr[0]
  42.     max_iter = 0
  43.    
  44.     for i in range(0, len(arr)):
  45.         if max_val < arr[i]:
  46.             max_val = arr[i]
  47.             max_iter = i
  48.            
  49.     return max_iter, max_val
  50.  
  51. def make_step_back_prop(cols_opt, cols_opt_lbl, max_p_val_iter):
  52.    
  53.     return
  54.  
  55.  
  56. alpha = 0.05
  57. cols_opt = [0, 1, 2, 3, 4, 5]
  58. cols_opt_lbl = ["const", "Florida", "New York", "R&D", "Admin", "Market"]
  59.  
  60. X_opt = X[:, cols_opt]
  61. regressor_OLS = sm.OLS(endog=y, exog = X_opt).fit()
  62. p_vals = regressor_OLS._results.pvalues
  63. max_p_val_iter, max_p_val = max_with_iter(p_vals)
  64.  
  65. while  max_p_val > alpha:
  66.     cols_opt = cols_opt.remove(max_p_val_iter)
  67.     cols_opt_lbl = cols_opt_lbl.remove(cols_opt_lbl[max_p_val_iter])
  68.    
  69.     X_opt = X[:, cols_opt]
  70.     regressor_OLS = sm.OLS(endog=y, exog = X_opt).fit()
  71.     p_vals = regressor_OLS._results.pvalues
  72.     max_p_val_iter, max_p_val = max_with_iter(p_vals)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement