Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Multiple Linear Regression
- # Importing the libraries
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- # Importing the dataset
- dataset = pd.read_csv('50_Startups.csv')
- X = dataset.iloc[:, :-1].values
- y = dataset.iloc[:, 4].values
- # Encoding categorical data
- from sklearn.preprocessing import OneHotEncoder
- from sklearn.compose import make_column_transformer
- col_trans = make_column_transformer((OneHotEncoder(categories = 'auto'), [3]), remainder='passthrough')
- X = col_trans.fit_transform(X).astype(float)
- # Avoiding the Dummy Trap
- X = X[:, 1:]
- # Splitting data into Training Set and Test Set
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
- # Fitting Multiple Linerar Regression to the Training set
- from sklearn.linear_model import LinearRegression
- regressor = LinearRegression()
- regressor.fit(X_train, y_train)
- # Predicting the Test set results
- y_pred = regressor.predict(X_test)
- # Building the optimal model using backward elimination
- import statsmodels.formula.api as sm
- X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1)
- def max_with_iter(arr):
- max_val = arr[0]
- max_iter = 0
- for i in range(0, len(arr)):
- if max_val < arr[i]:
- max_val = arr[i]
- max_iter = i
- return max_iter, max_val
- def make_step_back_prop(cols_opt, cols_opt_lbl, max_p_val_iter):
- return
- alpha = 0.05
- cols_opt = [0, 1, 2, 3, 4, 5]
- cols_opt_lbl = ["const", "Florida", "New York", "R&D", "Admin", "Market"]
- X_opt = X[:, cols_opt]
- regressor_OLS = sm.OLS(endog=y, exog = X_opt).fit()
- p_vals = regressor_OLS._results.pvalues
- max_p_val_iter, max_p_val = max_with_iter(p_vals)
- while max_p_val > alpha:
- cols_opt = cols_opt.remove(max_p_val_iter)
- cols_opt_lbl = cols_opt_lbl.remove(cols_opt_lbl[max_p_val_iter])
- X_opt = X[:, cols_opt]
- regressor_OLS = sm.OLS(endog=y, exog = X_opt).fit()
- p_vals = regressor_OLS._results.pvalues
- max_p_val_iter, max_p_val = max_with_iter(p_vals)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement