Guest User

Untitled

a guest
May 21st, 2018
147
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.39 KB | None | 0 0
  1. # Decision Tree Regression
  2.  
  3. rm(list = ls())
  4.  
  5. # Importing the dataset
  6. dataset <- read.csv(file.path(getwd(),'Data/Position_Salaries.csv'))
  7. dataset = dataset[2:3]
  8.  
  9. # Splitting the dataset into the Training set and Test set
  10. # # install.packages('caTools')
  11. # library(caTools)
  12. # set.seed(123)
  13. # split = sample.split(dataset$Salary, SplitRatio = 2/3)
  14. # training_set = subset(dataset, split == TRUE)
  15. # test_set = subset(dataset, split == FALSE)
  16.  
  17. # Feature Scaling
  18. # training_set = scale(training_set)
  19. # test_set = scale(test_set)
  20.  
  21. # Fitting Decision Tree Regression to the dataset
  22. if (!require("rpart"))
  23. install.packages("rpart") # to install the package if you don't have it
  24.  
  25. library(rpart)
  26.  
  27. # DT without pruning
  28. regressor = rpart(formula = Salary ~ .,
  29. data = dataset,
  30. method = "anova",
  31. control = rpart.control(minsplit = 1))
  32. # if we do not specifiy the minsplit in the above code, then the result will be a straight line
  33.  
  34. # DT with pruning
  35. pruned_DT <- prune(regressor,
  36. cp = regressor$cptable[which.min(regressor$cptable[,"xerror"]),"CP"])
  37.  
  38. # Predicting a new result with Decision Tree Regression (without prunning)
  39. y_pred = predict(regressor, data.frame(Level = 6.5))
  40.  
  41. # Predicting a new result with Decision Tree Regression (with post_prunning)
  42. y_pred = predict(pruned_DT, data.frame(Level = 6.5))
  43.  
  44. # Visualising the Decision Tree Regression results (higher resolution)
  45. # if we do not use the high resolution garph, then we will have a line with slope which is not correct
  46. # the results of DT or RF regression are non-continueous values
  47. # therefore, we used 0.01 instead of 0.1
  48.  
  49. library(ggplot2)
  50. x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
  51. ggplot() +
  52. geom_point(aes(x = dataset$Level, y = dataset$Salary),
  53. colour = 'red') +
  54. geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
  55. colour = 'blue') +
  56. ggtitle('Truth or Bluff (Decision Tree Regression)') +
  57. xlab('Level') +
  58. ylab('Salary') +
  59. theme_bw() +
  60. theme(plot.title = element_text(hjust = 0.5))
  61.  
  62. # Plotting the tree
  63. # These generate poor plots
  64. plot(classifier)
  65. text(classifier)
  66.  
  67. # A better approach to plot DT
  68.  
  69. library(rpart)
  70. library(rpart.plot)
  71.  
  72. prp(regressor)
  73. prp(pruned_DT)
  74.  
  75. rpart.plot(regressor)
  76. rpart.plot(pruned_DT)
  77.  
  78. # Diplaying and ploting Complexity Parameter (cp)
  79. printcp(regressor)
  80. plotcp(regressor)
  81. rsq.rpart(regressor)
Add Comment
Please, Sign In to add comment