Advertisement
Ritam_C

lab5b4

Oct 19th, 2021
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.63 KB | None | 0 0
  1. '''
  2. Ritam Chakraborty
  3. B20127
  4. 7439257709
  5. '''
  6.  
  7. # import libraries
  8. import matplotlib.pyplot as plt
  9. import numpy as np
  10. import pandas as pd
  11. from sklearn.preprocessing import PolynomialFeatures
  12. from sklearn.linear_model import LinearRegression
  13.  
  14. # function for rmse
  15. def rmse(pred, actual):
  16.     return np.sqrt(np.sum(np.square(pred-actual))/len(pred))
  17.  
  18. # import the datasets
  19. df_train = pd.read_csv("abalone-train.csv")
  20. df_test = pd.read_csv("abalone-test.csv")
  21. corrs = df_train.corr()["Rings"]
  22.  
  23. # take out the input and target variables from the dataset
  24. P = [2, 3, 4, 5]
  25. X = pd.DataFrame(df_train[df_train.keys()[:-1]])
  26. Y = df_train["Rings"]
  27. X_test = pd.DataFrame(df_test[df_test.keys()[:-1]])
  28. Y_test = df_test["Rings"]
  29.  
  30. RMSE = []
  31. for p in P:
  32.     # transform each of the input vectors for polynomial regression
  33.     polyFeat = PolynomialFeatures(p)
  34.     poly_inp = polyFeat.fit_transform(X)
  35.     # perform linear regression on the transformed input vectors to perform polynomial regression
  36.     LinReg = LinearRegression()
  37.     LinReg.fit(poly_inp, Y)
  38.     # predict for the training sample and compute rmse with them
  39.     train_pred = LinReg.predict(poly_inp)
  40.     print("RMSE in training dataset when p =", str(p)+":", rmse(train_pred, Y))
  41.     print("Regression accuracy =", 100-len(Y)*rmse(train_pred, Y)/np.sum(Y)*100)
  42.     RMSE.append(rmse(train_pred, Y))
  43.  
  44. # bar plot of rmse with values p = 2, 3, 4, 5
  45. plt.bar(P, RMSE)
  46. plt.title("RMSE vs degree of Polynomial")
  47. plt.show()
  48.  
  49. RMSE = []
  50. for p in P:
  51.     # convert into polynomial form
  52.     polyFeat = PolynomialFeatures(p)
  53.     poly_inp = polyFeat.fit_transform(X)
  54.     # perform linear regression on transformed vectors to change into polynomial regression
  55.     LinReg = LinearRegression()
  56.     LinReg.fit(poly_inp, Y)
  57.     # predict for the test dataset and compute rmse
  58.     test_poly = polyFeat.fit_transform(X_test)
  59.     test_pred = LinReg.predict(test_poly)
  60.     print("RMSE in test dataset when p =", str(p)+":", rmse(test_pred, Y_test))
  61.     print("Regression accuracy =", 100-len(Y_test)*rmse(test_pred, Y_test)/np.sum(Y_test)*100)
  62.     RMSE.append(rmse(test_pred, Y_test))
  63.  
  64. # bar plot of rmse with values p = 2, 3, 4, 5
  65. plt.bar(P, RMSE)
  66. plt.title("RMSE vs degree of Polynomial")
  67. plt.show()
  68.  
  69. # find the best fitting polynomial curve and apply regression on it
  70. best_fit = P[np.argmin(RMSE)]
  71. print("Best fit is with p =", best_fit)
  72. polyFeat = PolynomialFeatures(best_fit)
  73. regressor = LinearRegression().fit(polyFeat.fit_transform(X), Y)
  74.  
  75. # Scatter plot of actual vs predicted values
  76. plt.scatter(Y_test, regressor.predict(polyFeat.fit_transform(X_test)), alpha = 0.5)
  77. plt.xlabel("Actual")
  78. plt.ylabel("Predicted")
  79. plt.title("Predicted vs Actual")
  80. plt.show()
  81.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement