Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Decision Tree Regression
- rm(list = ls())
- # Importing the dataset
- dataset <- read.csv(file.path(getwd(),'Data/Position_Salaries.csv'))
- dataset = dataset[2:3]
- # Splitting the dataset into the Training set and Test set
- # # install.packages('caTools')
- # library(caTools)
- # set.seed(123)
- # split = sample.split(dataset$Salary, SplitRatio = 2/3)
- # training_set = subset(dataset, split == TRUE)
- # test_set = subset(dataset, split == FALSE)
- # Feature Scaling
- # training_set = scale(training_set)
- # test_set = scale(test_set)
- # Fitting Decision Tree Regression to the dataset
- if (!require("rpart"))
- install.packages("rpart") # to install the package if you don't have it
- library(rpart)
- # DT without pruning
- regressor = rpart(formula = Salary ~ .,
- data = dataset,
- method = "anova",
- control = rpart.control(minsplit = 1))
- # if we do not specifiy the minsplit in the above code, then the result will be a straight line
- # DT with pruning
- pruned_DT <- prune(regressor,
- cp = regressor$cptable[which.min(regressor$cptable[,"xerror"]),"CP"])
- # Predicting a new result with Decision Tree Regression (without prunning)
- y_pred = predict(regressor, data.frame(Level = 6.5))
- # Predicting a new result with Decision Tree Regression (with post_prunning)
- y_pred = predict(pruned_DT, data.frame(Level = 6.5))
- # Visualising the Decision Tree Regression results (higher resolution)
- # if we do not use the high resolution garph, then we will have a line with slope which is not correct
- # the results of DT or RF regression are non-continueous values
- # therefore, we used 0.01 instead of 0.1
- library(ggplot2)
- x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
- ggplot() +
- geom_point(aes(x = dataset$Level, y = dataset$Salary),
- colour = 'red') +
- geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
- colour = 'blue') +
- ggtitle('Truth or Bluff (Decision Tree Regression)') +
- xlab('Level') +
- ylab('Salary') +
- theme_bw() +
- theme(plot.title = element_text(hjust = 0.5))
- # Plotting the tree
- # These generate poor plots
- plot(classifier)
- text(classifier)
- # A better approach to plot DT
- library(rpart)
- library(rpart.plot)
- prp(regressor)
- prp(pruned_DT)
- rpart.plot(regressor)
- rpart.plot(pruned_DT)
- # Diplaying and ploting Complexity Parameter (cp)
- printcp(regressor)
- plotcp(regressor)
- rsq.rpart(regressor)
Add Comment
Please, Sign In to add comment