Advertisement
mikolajmki

si_lab03

Oct 20th, 2022
192
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.25 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import sklearn
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.linear_model import LinearRegression
  7. from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
  8.  
  9. # x = np.arange(-3,3, 0.1).reshape((-1,1))
  10. # y = np.tanh(x) + np.random.randn(*x.shape)*0.2
  11. # ypred = LinearRegression().fit(x,y).predict(x)
  12. # plt.scatter(x,y)
  13. # plt.xlabel('x')
  14. # plt.ylabel('y')
  15. # plt.plot(x, ypred)
  16. # plt.legend([ 'F(x) - aproksymująca',
  17. #  'f(x) - aproksymowana zaszumiona'])
  18. # plt.show()
  19.  
  20. bh_data = pd.read_excel('data.xlsx')
  21. bh_arr = bh_data.values
  22. X, y = bh_arr[:, :-1], bh_arr[:, -1]
  23. # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=221, shuffle=False)
  24. # linReg = LinearRegression()
  25. # linReg.fit(X_train, y_train)
  26. # y_pred = linReg.predict(X_test)
  27. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
  28.  
  29. # linReg = LinearRegression()
  30. # linReg.fit(X_train, y_train)
  31. # y_pred = linReg.predict(X_test)
  32. # mse = mean_squared_error(y_test, y_pred)
  33. # mae = mean_absolute_error(y_test, y_pred)
  34. # mape = mean_absolute_percentage_error(y_test, y_pred)
  35. #
  36. # minval = min(y_test.min(), y_pred.min())
  37. # maxval = max(y_test.max(), y_pred.max())\
  38. #
  39. # plt.scatter(y_test, y_pred)
  40. # plt.plot([minval, maxval], [minval, maxval])
  41.  
  42. outliers = np.abs((y_train - y_train.mean())/
  43.  y_train.std())>3
  44.  
  45. nowe_dane = np.stack([X[:, 4]/X[:, 7],
  46.  X[:, 4]/X[:, 5],
  47.  X[:, 4]*X[:, 3],
  48. X[:, 4]/X[:, -1]], axis=-1)
  49. X_additional = np.concatenate([X, nowe_dane], axis=-1)
  50.  
  51. X_train_no_outliers = X_train[~outliers,:]
  52. y_train_no_outliers = y_train[~outliers]
  53. y_train_mean = y_train.copy()
  54. y_train_mean[outliers] = y_train.mean()
  55.  
  56. linReg = LinearRegression()
  57. linReg.fit(X_train, y_train_mean)
  58.  
  59. bh_cechy = list(bh_data.columns.values)
  60.  
  61. print(bh_cechy)
  62.  
  63. niezalezne_cechy = bh_cechy[:-1]
  64.  
  65. fig, ax = plt.subplots(1, 1)
  66. x = np.arange(len(niezalezne_cechy))
  67. wagi = linReg.coef_
  68. ax.bar(x, wagi)
  69. ax.set_xticks(x)
  70. ax.set_xticklabels(niezalezne_cechy, rotation=90)
  71.  
  72. plt.boxplot(X_additional)
  73. # plt.title("Medianowa wartosc mieszkania")
  74. plt.xlabel('y_test')
  75. plt.ylabel('y_pred')
  76.  
  77.  
  78.  
  79. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement