Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import sklearn
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LinearRegression
- from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
- # x = np.arange(-3,3, 0.1).reshape((-1,1))
- # y = np.tanh(x) + np.random.randn(*x.shape)*0.2
- # ypred = LinearRegression().fit(x,y).predict(x)
- # plt.scatter(x,y)
- # plt.xlabel('x')
- # plt.ylabel('y')
- # plt.plot(x, ypred)
- # plt.legend([ 'F(x) - aproksymująca',
- # 'f(x) - aproksymowana zaszumiona'])
- # plt.show()
- bh_data = pd.read_excel('data.xlsx')
- bh_arr = bh_data.values
- X, y = bh_arr[:, :-1], bh_arr[:, -1]
- # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=221, shuffle=False)
- # linReg = LinearRegression()
- # linReg.fit(X_train, y_train)
- # y_pred = linReg.predict(X_test)
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
- # linReg = LinearRegression()
- # linReg.fit(X_train, y_train)
- # y_pred = linReg.predict(X_test)
- # mse = mean_squared_error(y_test, y_pred)
- # mae = mean_absolute_error(y_test, y_pred)
- # mape = mean_absolute_percentage_error(y_test, y_pred)
- #
- # minval = min(y_test.min(), y_pred.min())
- # maxval = max(y_test.max(), y_pred.max())\
- #
- # plt.scatter(y_test, y_pred)
- # plt.plot([minval, maxval], [minval, maxval])
- outliers = np.abs((y_train - y_train.mean())/
- y_train.std())>3
- nowe_dane = np.stack([X[:, 4]/X[:, 7],
- X[:, 4]/X[:, 5],
- X[:, 4]*X[:, 3],
- X[:, 4]/X[:, -1]], axis=-1)
- X_additional = np.concatenate([X, nowe_dane], axis=-1)
- X_train_no_outliers = X_train[~outliers,:]
- y_train_no_outliers = y_train[~outliers]
- y_train_mean = y_train.copy()
- y_train_mean[outliers] = y_train.mean()
- linReg = LinearRegression()
- linReg.fit(X_train, y_train_mean)
- bh_cechy = list(bh_data.columns.values)
- print(bh_cechy)
- niezalezne_cechy = bh_cechy[:-1]
- fig, ax = plt.subplots(1, 1)
- x = np.arange(len(niezalezne_cechy))
- wagi = linReg.coef_
- ax.bar(x, wagi)
- ax.set_xticks(x)
- ax.set_xticklabels(niezalezne_cechy, rotation=90)
- plt.boxplot(X_additional)
- # plt.title("Medianowa wartosc mieszkania")
- plt.xlabel('y_test')
- plt.ylabel('y_pred')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement