• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# Untitled

a guest Dec 16th, 2018 53 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import numpy as np
2. import matplotlib.pyplot as plt
3. import pandas as pd
4. import seaborn as sns
6. from sklearn.model_selection import train_test_split
7.
9.
12. print(boston_dataset.keys())
13. bos = pd.DataFrame(boston_dataset.data)
14. bos.columns = boston_dataset.feature_names
15. bos['MEDV'] = boston_dataset.target
17. print(bos.tail(10))
18.
21. print(bos.info(verbose=True))
22.
25. print(bos.describe())
26. # a) 3.613524 i 8.601545
27. # b) 5.000000 i 50.000000
28. # c) 11.360000
29.
31. sns.set(rc={'figure.figsize':(11.7,8.27)})
32. sns.distplot(bos['MEDV'], bins=30)
33. plt.show()
34.
36. correlation_matrix = bos.corr().round(2)
37. sns.heatmap(data=correlation_matrix, annot=True)
38. plt.show()
39. # a) RM - liczba pokoi
40. # b) LSTAT - % ludzi o nizszym stanie
42.
43. # dodanio skolerowane - RM
44. ax1 = sns.regplot(x=bos['MEDV'], y=bos['RM'], data=boston_dataset)
45. plt.show()
46. # ujemnie skolerowane - LSTAT
47. ax2 = sns.regplot(x=bos['MEDV'], y=bos['LSTAT'], data=boston_dataset)
48. plt.show()
49. # najmniej skolerowane - CHAS
50. ax3 = sns.regplot(x=bos['MEDV'], y=bos['CHAS'], data=boston_dataset)
51. plt.show()
52.
55. X = pd.DataFrame(np.c_[bos['LSTAT'], bos['RM']], columns = ['LSTAT','RM'])
56. Y = bos['MEDV']
57.
58. X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
59. print(X_train.shape)
60. print(X_test.shape)
61. print(Y_train.shape)
62. print(Y_test.shape)
63.
65.
66. from sklearn.linear_model import LinearRegression
67.
68. lm = LinearRegression()
69. lm.fit(X_train, Y_train)
70.
71. #a
72. Y_train_predict = lm.predict(X_train)
73. plt.scatter(Y_train, Y_train_predict)
74. plt.xlabel("Train Prices: $Y_i$")
75. plt.ylabel("Predicted train prices: $\hat{Y}_i$")
76. plt.title("Train rices vs Predicted train prices: $Y_i$ vs $\hat{Y}_i$")
77. plt.show()
78.
79. #b
80. Y_test_predict = lm.predict(X_test)
81. plt.scatter(Y_test, Y_test_predict)
82. plt.xlabel("Test prices: $Y_i$")
83. plt.ylabel("Predicted test prices: $\hat{Y}_i$")
84. plt.title("Test prices vs Predicted test prices: $Y_i$ vs $\hat{Y}_i$")
85. plt.show()
86.
88.
89. from sklearn.metrics import mean_absolute_error
90. from sklearn.metrics import mean_squared_error
91.
93. rmse = (np.sqrt(mean_squared_error(Y_train, Y_train_predict)))
94. mae = mean_absolute_error(Y_train, Y_train_predict)
95.
96. print("The model performance for training set")
97. print("--------------------------------------")
98. print('RMSE is {}'.format(rmse))
99. print('MAE  is {}'.format(mae))
100. print("\n")
101.
102. rmse = (np.sqrt(mean_squared_error(Y_test, Y_test_predict)))
103. mae = mean_absolute_error(Y_test, Y_test_predict)
104.
105. print("The model performance for testing set")
106. print("--------------------------------------")
107. print('RMSE is {}'.format(rmse))
108. print('MAE  is {}'.format(mae))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top