Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- import seaborn as sns
- from sklearn.datasets import load_boston
- from sklearn.model_selection import train_test_split
- boston_dataset = load_boston()
- #zad1
- print("Zad 1")
- print(boston_dataset.keys())
- bos = pd.DataFrame(boston_dataset.data)
- bos.columns = boston_dataset.feature_names
- bos['MEDV'] = boston_dataset.target
- print(bos.head(10))
- print(bos.tail(10))
- #zad2
- print("Zad 2")
- print(bos.info(verbose=True))
- #zad3
- print("Zad 3")
- print(bos.describe())
- # a) 3.613524 i 8.601545
- # b) 5.000000 i 50.000000
- # c) 11.360000
- #zad4
- sns.set(rc={'figure.figsize':(11.7,8.27)})
- sns.distplot(bos['MEDV'], bins=30)
- plt.show()
- #zad5
- correlation_matrix = bos.corr().round(2)
- sns.heatmap(data=correlation_matrix, annot=True)
- plt.show()
- # a) RM - liczba pokoi
- # b) LSTAT - % ludzi o nizszym stanie
- # c) RAD-TAX - dostepnosc autostrad, podatek od nierucomosci
- # dodanio skolerowane - RM
- ax1 = sns.regplot(x=bos['MEDV'], y=bos['RM'], data=boston_dataset)
- plt.show()
- # ujemnie skolerowane - LSTAT
- ax2 = sns.regplot(x=bos['MEDV'], y=bos['LSTAT'], data=boston_dataset)
- plt.show()
- # najmniej skolerowane - CHAS
- ax3 = sns.regplot(x=bos['MEDV'], y=bos['CHAS'], data=boston_dataset)
- plt.show()
- #zad6
- print("Zad 6")
- X = pd.DataFrame(np.c_[bos['LSTAT'], bos['RM']], columns = ['LSTAT','RM'])
- Y = bos['MEDV']
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
- print(X_train.shape)
- print(X_test.shape)
- print(Y_train.shape)
- print(Y_test.shape)
- #zad7
- from sklearn.linear_model import LinearRegression
- lm = LinearRegression()
- lm.fit(X_train, Y_train)
- #a
- Y_train_predict = lm.predict(X_train)
- plt.scatter(Y_train, Y_train_predict)
- plt.xlabel("Train Prices: $Y_i$")
- plt.ylabel("Predicted train prices: $\hat{Y}_i$")
- plt.title("Train rices vs Predicted train prices: $Y_i$ vs $\hat{Y}_i$")
- plt.show()
- #b
- Y_test_predict = lm.predict(X_test)
- plt.scatter(Y_test, Y_test_predict)
- plt.xlabel("Test prices: $Y_i$")
- plt.ylabel("Predicted test prices: $\hat{Y}_i$")
- plt.title("Test prices vs Predicted test prices: $Y_i$ vs $\hat{Y}_i$")
- plt.show()
- #zad8
- from sklearn.metrics import mean_absolute_error
- from sklearn.metrics import mean_squared_error
- print("Zad 8")
- rmse = (np.sqrt(mean_squared_error(Y_train, Y_train_predict)))
- mae = mean_absolute_error(Y_train, Y_train_predict)
- print("The model performance for training set")
- print("--------------------------------------")
- print('RMSE is {}'.format(rmse))
- print('MAE is {}'.format(mae))
- print("\n")
- rmse = (np.sqrt(mean_squared_error(Y_test, Y_test_predict)))
- mae = mean_absolute_error(Y_test, Y_test_predict)
- print("The model performance for testing set")
- print("--------------------------------------")
- print('RMSE is {}'.format(rmse))
- print('MAE is {}'.format(mae))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement