govarthenan

Untitled

Jun 19th, 2021
673
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. from sklearn.tree import DecisionTreeRegressor as dtr
  3. from sklearn.metrics import mean_absolute_error as mae
  4. from sklearn.model_selection import train_test_split as tts
  5.  
  6. data_path = "/home/gova/Pyscripts/study/ml/kaggle/data.csv"
  7. unfiltered_data = pd.read_csv(data_path)
  8. data = unfiltered_data.dropna(axis=0)  # To remove the columns that have any null values
  9.  
  10. # print(data.columns.tolist())
  11.  
  12. y = data.Price  # Target
  13.  
  14. data_features = ['Rooms', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car', 'Landsize', 'BuildingArea',
  15.                 'YearBuilt', 'Lattitude', 'Longtitude', 'Propertycount']
  16. # We have excluded the columns with string values as they would cause errors
  17. X = data[data_features]  # Features to learn from
  18.  
  19. train_X, test_X, train_y, test_y = tts(X, y, random_state=0)  # Splitting the table into two to train and test
  20.  
  21. real_model = dtr(random_state=1)  # Defining the model
  22. real_model.fit(train_X, train_y)  # Fitting the model with the variables to learn with and predict
  23.  
  24. real_predictions = real_model.predict(test_X)  # generating predictions
  25. print(real_predictions)
  26.  
  27. real_mae = mae(test_y, real_predictions)  # Calculating mean absolute error between ground truth values and predictions
  28. print(real_mae)
  29.  
RAW Paste Data