daily pastebin goal
3%
SHARE
TWEET

Untitled

a guest Dec 16th, 2018 53 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4. import seaborn as sns
  5. from sklearn.datasets import load_boston
  6. from sklearn.model_selection import train_test_split
  7.  
  8. boston_dataset = load_boston()
  9.  
  10. #zad1
  11. print("Zad 1")
  12. print(boston_dataset.keys())
  13. bos = pd.DataFrame(boston_dataset.data)
  14. bos.columns = boston_dataset.feature_names
  15. bos['MEDV'] = boston_dataset.target
  16. print(bos.head(10))
  17. print(bos.tail(10))
  18.  
  19. #zad2
  20. print("Zad 2")
  21. print(bos.info(verbose=True))
  22.  
  23. #zad3
  24. print("Zad 3")
  25. print(bos.describe())
  26. # a) 3.613524 i 8.601545
  27. # b) 5.000000 i 50.000000
  28. # c) 11.360000
  29.  
  30. #zad4
  31. sns.set(rc={'figure.figsize':(11.7,8.27)})
  32. sns.distplot(bos['MEDV'], bins=30)
  33. plt.show()
  34.  
  35. #zad5
  36. correlation_matrix = bos.corr().round(2)
  37. sns.heatmap(data=correlation_matrix, annot=True)
  38. plt.show()
  39. # a) RM - liczba pokoi
  40. # b) LSTAT - % ludzi o nizszym stanie
  41. # c) RAD-TAX - dostepnosc autostrad, podatek od nierucomosci
  42.  
  43. # dodanio skolerowane - RM
  44. ax1 = sns.regplot(x=bos['MEDV'], y=bos['RM'], data=boston_dataset)
  45. plt.show()
  46. # ujemnie skolerowane - LSTAT
  47. ax2 = sns.regplot(x=bos['MEDV'], y=bos['LSTAT'], data=boston_dataset)
  48. plt.show()
  49. # najmniej skolerowane - CHAS
  50. ax3 = sns.regplot(x=bos['MEDV'], y=bos['CHAS'], data=boston_dataset)
  51. plt.show()
  52.  
  53. #zad6
  54. print("Zad 6")
  55. X = pd.DataFrame(np.c_[bos['LSTAT'], bos['RM']], columns = ['LSTAT','RM'])
  56. Y = bos['MEDV']
  57.  
  58. X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
  59. print(X_train.shape)
  60. print(X_test.shape)
  61. print(Y_train.shape)
  62. print(Y_test.shape)
  63.  
  64. #zad7
  65.  
  66. from sklearn.linear_model import LinearRegression
  67.  
  68. lm = LinearRegression()
  69. lm.fit(X_train, Y_train)
  70.  
  71. #a
  72. Y_train_predict = lm.predict(X_train)
  73. plt.scatter(Y_train, Y_train_predict)
  74. plt.xlabel("Train Prices: $Y_i$")
  75. plt.ylabel("Predicted train prices: $\hat{Y}_i$")
  76. plt.title("Train rices vs Predicted train prices: $Y_i$ vs $\hat{Y}_i$")
  77. plt.show()
  78.  
  79. #b
  80. Y_test_predict = lm.predict(X_test)
  81. plt.scatter(Y_test, Y_test_predict)
  82. plt.xlabel("Test prices: $Y_i$")
  83. plt.ylabel("Predicted test prices: $\hat{Y}_i$")
  84. plt.title("Test prices vs Predicted test prices: $Y_i$ vs $\hat{Y}_i$")
  85. plt.show()
  86.  
  87. #zad8
  88.  
  89. from sklearn.metrics import mean_absolute_error
  90. from sklearn.metrics import mean_squared_error
  91.  
  92. print("Zad 8")
  93. rmse = (np.sqrt(mean_squared_error(Y_train, Y_train_predict)))
  94. mae = mean_absolute_error(Y_train, Y_train_predict)
  95.  
  96. print("The model performance for training set")
  97. print("--------------------------------------")
  98. print('RMSE is {}'.format(rmse))
  99. print('MAE  is {}'.format(mae))
  100. print("\n")
  101.  
  102. rmse = (np.sqrt(mean_squared_error(Y_test, Y_test_predict)))
  103. mae = mean_absolute_error(Y_test, Y_test_predict)
  104.  
  105. print("The model performance for testing set")
  106. print("--------------------------------------")
  107. print('RMSE is {}'.format(rmse))
  108. print('MAE  is {}'.format(mae))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand