Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- Ritam Chakraborty
- B20127
- 7439257709
- '''
- # import libraries
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- from sklearn.preprocessing import PolynomialFeatures
- from sklearn.linear_model import LinearRegression
- # function for rmse
- def rmse(pred, actual):
- return np.sqrt(np.sum(np.square(pred-actual))/len(pred))
- # import the datasets
- df_train = pd.read_csv("abalone-train.csv")
- df_test = pd.read_csv("abalone-test.csv")
- corrs = df_train.corr()["Rings"]
- # take out the input and target variables from the dataset
- P = [2, 3, 4, 5]
- X = pd.DataFrame(df_train[df_train.keys()[:-1]])
- Y = df_train["Rings"]
- X_test = pd.DataFrame(df_test[df_test.keys()[:-1]])
- Y_test = df_test["Rings"]
- RMSE = []
- for p in P:
- # transform each of the input vectors for polynomial regression
- polyFeat = PolynomialFeatures(p)
- poly_inp = polyFeat.fit_transform(X)
- # perform linear regression on the transformed input vectors to perform polynomial regression
- LinReg = LinearRegression()
- LinReg.fit(poly_inp, Y)
- # predict for the training sample and compute rmse with them
- train_pred = LinReg.predict(poly_inp)
- print("RMSE in training dataset when p =", str(p)+":", rmse(train_pred, Y))
- print("Regression accuracy =", 100-len(Y)*rmse(train_pred, Y)/np.sum(Y)*100)
- RMSE.append(rmse(train_pred, Y))
- # bar plot of rmse with values p = 2, 3, 4, 5
- plt.bar(P, RMSE)
- plt.title("RMSE vs degree of Polynomial")
- plt.show()
- RMSE = []
- for p in P:
- # convert into polynomial form
- polyFeat = PolynomialFeatures(p)
- poly_inp = polyFeat.fit_transform(X)
- # perform linear regression on transformed vectors to change into polynomial regression
- LinReg = LinearRegression()
- LinReg.fit(poly_inp, Y)
- # predict for the test dataset and compute rmse
- test_poly = polyFeat.fit_transform(X_test)
- test_pred = LinReg.predict(test_poly)
- print("RMSE in test dataset when p =", str(p)+":", rmse(test_pred, Y_test))
- print("Regression accuracy =", 100-len(Y_test)*rmse(test_pred, Y_test)/np.sum(Y_test)*100)
- RMSE.append(rmse(test_pred, Y_test))
- # bar plot of rmse with values p = 2, 3, 4, 5
- plt.bar(P, RMSE)
- plt.title("RMSE vs degree of Polynomial")
- plt.show()
- # find the best fitting polynomial curve and apply regression on it
- best_fit = P[np.argmin(RMSE)]
- print("Best fit is with p =", best_fit)
- polyFeat = PolynomialFeatures(best_fit)
- regressor = LinearRegression().fit(polyFeat.fit_transform(X), Y)
- # Scatter plot of actual vs predicted values
- plt.scatter(Y_test, regressor.predict(polyFeat.fit_transform(X_test)), alpha = 0.5)
- plt.xlabel("Actual")
- plt.ylabel("Predicted")
- plt.title("Predicted vs Actual")
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement