Advertisement
Guest User

Untitled

a guest
Mar 21st, 2019
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.86 KB | None | 0 0
  1. from sklearn import datasets
  2. import pandas as pd
  3. import numpy as np
  4. desired_width=320
  5. pd.set_option('display.width', desired_width)
  6. np.set_printoptions(linewidth=desired_width)
  7. pd.set_option('display.max_columns',100)
  8. import matplotlib.pyplot as plt
  9. from sklearn.model_selection import train_test_split
  10. from sklearn.impute import SimpleImputer
  11. from sklearn.pipeline import Pipeline
  12. import seaborn as sns
  13. from sklearn.linear_model import LinearRegression
  14. from sklearn.metrics import mean_squared_error
  15. from sklearn.tree import DecisionTreeRegressor
  16. from sklearn.ensemble import RandomForestRegressor
  17.  
  18. boston = datasets.load_boston()
  19. #print(boston.keys())
  20. print(boston.DESCR)
  21.  
  22. boston_df = pd.DataFrame(data = boston.data, columns= boston.feature_names)
  23. boston_df['PRICE'] = pd.Series(boston.target)
  24.  
  25. #boston_df.info()
  26. #print(boston_df.head())
  27. #print(boston_df.describe())
  28. #boston_df.plot(kind = 'scatter',x = 'CRIM',y = 'RAD')
  29. #plt.show()
  30.  
  31. train , test = train_test_split(boston_df,test_size=0.2,random_state=42)
  32. #print(len(train),len(test))
  33. data = boston_df.columns.drop('PRICE')
  34. target = ['PRICE']
  35. data_train = np.array(train[data])
  36. data_test = np.array(test[data])
  37. target_train = np.array(train[target])
  38. target_test = np.array(test[target])
  39.  
  40. corr_matrix = boston_df.corr()
  41. #print(corr_matrix['RAD'].sort_values(ascending=False))
  42.  
  43. attributes = ['CRIM','RAD','TAX','ZN']
  44. sns.pairplot(boston_df[attributes])
  45.  
  46. simputer = SimpleImputer(strategy='median')
  47. simputer.fit(boston_df)
  48. #print(simputer.statistics_)
  49.  
  50. pipe = Pipeline([('simputer',SimpleImputer(strategy='median'))])
  51.  
  52. #boston_num = pipe.fit_transform(boston_df)
  53. #print(boston_num.shape)
  54.  
  55. lin_reg = LinearRegression()
  56. lin_reg.fit(data_train,target_train)
  57.  
  58. predicted_target_lin_reg = lin_reg.predict(data_test)
  59. #print("Predicted: ",list(predicted_target_lin_reg))
  60. #print("Original Values: ",list(target_test))
  61.  
  62. lin_mse = mean_squared_error(predicted_target_lin_reg,target_test)
  63. lin_rmse = np.sqrt(lin_mse)
  64. error_lin_reg = np.int(lin_rmse*1000)
  65. print("ERROR LINEAR REGRESSION: ",error_lin_reg,"$")
  66. #plt.show()
  67.  
  68. tree_reg = DecisionTreeRegressor()
  69. tree_reg.fit(data_train,target_train)
  70.  
  71. predicted_target_tree_reg = tree_reg.predict(data_test)
  72. #print("Predicted: ",list(predicted_target_tree_reg))
  73. #print("Original Values: ",list(target_test))
  74.  
  75. tree_mse = mean_squared_error(predicted_target_tree_reg,target_test)
  76. tree_rmse = np.sqrt(tree_mse)
  77. error_tree_reg = np.int(tree_rmse*1000)
  78. print("ERROR TREE REGRESSION: ",error_tree_reg,"$")
  79.  
  80. forest_reg = RandomForestRegressor()
  81. forest_reg.fit(data_train,target_train)
  82.  
  83. predicted_target_forest_reg = forest_reg.predict(data_test)
  84. #print("Predicted: ",list(predicted_target_forest_reg))
  85. #print("Original Values: ",list(target_test))
  86.  
  87. forest_mse = mean_squared_error(predicted_target_forest_reg,target_test)
  88. forest_rmse = np.sqrt(forest_mse)
  89. error_forest_reg = np.int(forest_rmse*1000)
  90. print("ERROR FOREST_REGRESSION: ",error_forest_reg,"$")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement