Advertisement
Guest User

Untitled

a guest
Dec 16th, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.85 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4. import seaborn as sns
  5. from sklearn.datasets import load_boston
  6. from sklearn.model_selection import train_test_split
  7.  
  8. boston_dataset = load_boston()
  9.  
  10. #zad1
  11. print("Zad 1")
  12. print(boston_dataset.keys())
  13. bos = pd.DataFrame(boston_dataset.data)
  14. bos.columns = boston_dataset.feature_names
  15. bos['MEDV'] = boston_dataset.target
  16. print(bos.head(10))
  17. print(bos.tail(10))
  18.  
  19. #zad2
  20. print("Zad 2")
  21. print(bos.info(verbose=True))
  22.  
  23. #zad3
  24. print("Zad 3")
  25. print(bos.describe())
  26. # a) 3.613524 i 8.601545
  27. # b) 5.000000 i 50.000000
  28. # c) 11.360000
  29.  
  30. #zad4
  31. sns.set(rc={'figure.figsize':(11.7,8.27)})
  32. sns.distplot(bos['MEDV'], bins=30)
  33. plt.show()
  34.  
  35. #zad5
  36. correlation_matrix = bos.corr().round(2)
  37. sns.heatmap(data=correlation_matrix, annot=True)
  38. plt.show()
  39. # a) RM - liczba pokoi
  40. # b) LSTAT - % ludzi o nizszym stanie
  41. # c) RAD-TAX - dostepnosc autostrad, podatek od nierucomosci
  42.  
  43. # dodanio skolerowane - RM
  44. ax1 = sns.regplot(x=bos['MEDV'], y=bos['RM'], data=boston_dataset)
  45. plt.show()
  46. # ujemnie skolerowane - LSTAT
  47. ax2 = sns.regplot(x=bos['MEDV'], y=bos['LSTAT'], data=boston_dataset)
  48. plt.show()
  49. # najmniej skolerowane - CHAS
  50. ax3 = sns.regplot(x=bos['MEDV'], y=bos['CHAS'], data=boston_dataset)
  51. plt.show()
  52.  
  53. #zad6
  54. print("Zad 6")
  55. X = pd.DataFrame(np.c_[bos['LSTAT'], bos['RM']], columns = ['LSTAT','RM'])
  56. Y = bos['MEDV']
  57.  
  58. X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
  59. print(X_train.shape)
  60. print(X_test.shape)
  61. print(Y_train.shape)
  62. print(Y_test.shape)
  63.  
  64. #zad7
  65.  
  66. from sklearn.linear_model import LinearRegression
  67.  
  68. lm = LinearRegression()
  69. lm.fit(X_train, Y_train)
  70.  
  71. #a
  72. Y_train_predict = lm.predict(X_train)
  73. plt.scatter(Y_train, Y_train_predict)
  74. plt.xlabel("Train Prices: $Y_i$")
  75. plt.ylabel("Predicted train prices: $\hat{Y}_i$")
  76. plt.title("Train rices vs Predicted train prices: $Y_i$ vs $\hat{Y}_i$")
  77. plt.show()
  78.  
  79. #b
  80. Y_test_predict = lm.predict(X_test)
  81. plt.scatter(Y_test, Y_test_predict)
  82. plt.xlabel("Test prices: $Y_i$")
  83. plt.ylabel("Predicted test prices: $\hat{Y}_i$")
  84. plt.title("Test prices vs Predicted test prices: $Y_i$ vs $\hat{Y}_i$")
  85. plt.show()
  86.  
  87. #zad8
  88.  
  89. from sklearn.metrics import mean_absolute_error
  90. from sklearn.metrics import mean_squared_error
  91.  
  92. print("Zad 8")
  93. rmse = (np.sqrt(mean_squared_error(Y_train, Y_train_predict)))
  94. mae = mean_absolute_error(Y_train, Y_train_predict)
  95.  
  96. print("The model performance for training set")
  97. print("--------------------------------------")
  98. print('RMSE is {}'.format(rmse))
  99. print('MAE is {}'.format(mae))
  100. print("\n")
  101.  
  102. rmse = (np.sqrt(mean_squared_error(Y_test, Y_test_predict)))
  103. mae = mean_absolute_error(Y_test, Y_test_predict)
  104.  
  105. print("The model performance for testing set")
  106. print("--------------------------------------")
  107. print('RMSE is {}'.format(rmse))
  108. print('MAE is {}'.format(mae))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement