Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- a <- runif(100, 1, 100)
- b <- runif(100, 1, 100)
- c <- 1:100 + rnorm(100, mean = 0, sd = 5)
- y <- 1:100 + rnorm(100, mean = 0, sd = 5)
- par(mfrow = c(2,2))
- plot(y ~ a); plot(y ~ b); plot(y ~ c)
- Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
- names(Data) <- c("y", "a", "b", "c")
- library(gbm)
- gbm.gaus <- gbm(y ~ a + b + c, data = Data, distribution = "gaussian")
- par(mfrow = c(2,2))
- plot(gbm.gaus, i.var = 1)
- plot(gbm.gaus, i.var = 2)
- plot(gbm.gaus, i.var = 3)
- a <- runif(100, 1, 100)
- b <- runif(100, 1, 100)
- c <- 1:100 + rnorm(100, mean = 0, sd = 5)
- y <- 1:100 + rnorm(100, mean = 0, sd = 5)
- par(mfrow = c(2,2))
- plot(y ~ a); plot(y ~ b); plot(y ~ c)
- Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
- names(Data) <- c("y", "a", "b", "c")
- library(gbm)
- gbm.gaus <- gbm(y ~ a + b + c, data = Data, distribution = "gaussian")
- library(randomForest)
- rf.model <- randomForest(y ~ a + b + c, data = Data)
- x11(height = 8, width = 5)
- par(mfrow = c(3,2))
- par(oma = c(1,1,4,1))
- plot(gbm.gaus, i.var = 1)
- partialPlot(rf.model, Data[,2:4], x.var = "a")
- plot(gbm.gaus, i.var = 2)
- partialPlot(rf.model, Data[,2:4], x.var = "b")
- plot(gbm.gaus, i.var = 3)
- partialPlot(rf.model, Data[,2:4], x.var = "c")
- title(main = "Boosted regression tree", outer = TRUE, adj = 0.15)
- title(main = "Random forest", outer = TRUE, adj = 0.85)
- #more observations are created...
- a <- runif(5000, 1, 100)
- b <- runif(5000, 1, 100)
- c <- (1:5000)/50 + rnorm(100, mean = 0, sd = 0.1)
- y <- (1:5000)/50 + rnorm(100, mean = 0, sd = 0.1)
- par(mfrow = c(1,3))
- plot(y ~ a); plot(y ~ b); plot(y ~ c)
- Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
- names(Data) <- c("y", "a", "b", "c")
- library(randomForest)
- #smaller nodesize "not as important" when there number of observartion is increased
- #more tress can smooth flattening so boundery regions have best possible signal to noise, data specific how many needed
- plot.partial = function() {
- partialPlot(rf.model, Data[,2:4], x.var = "a",xlim=c(1,100),ylim=c(1,100))
- partialPlot(rf.model, Data[,2:4], x.var = "b",xlim=c(1,100),ylim=c(1,100))
- partialPlot(rf.model, Data[,2:4], x.var = "c",xlim=c(1,100),ylim=c(1,100))
- }
- #worst case! : with 100 samples from Data and nodesize=30
- rf.model <- randomForest(y ~ a + b + c, data = Data[sample(5000,100),],nodesize=30)
- plot.partial()
- #reasonble settings for least partial flattening by few observations: 100 samples and nodesize=3 and ntrees=2000
- #more tress can smooth flattening so boundery regions have best possiblefidelity
- rf.model <- randomForest(y ~ a + b + c, data = Data[sample(5000,100),],nodesize=5,ntress=2000)
- plot.partial()
- #more observations is great!
- rf.model <- randomForest(y ~ a + b + c,
- data = Data[sample(5000,5000),],
- nodesize=5,ntress=2000)
- plot.partial()
- gbm.gaus <- gbm(y~., data = Data, dist = "gaussian")
- library(plotmo) # for the plotres function
- plotres(gbm.gaus) # plot the error per ntrees and the residuals
- gbm.gaus1 <- gbm(y~., data = Data, dist = "gaussian",
- n.trees=5000, interact=3)
- plotres(gbm.gaus1)
- library(plotmo)
- plotmo(gbm.gaus1, pmethod="partdep", all1=TRUE, all2=TRUE)
- plotmo(rf.model, pmethod="partdep", all1=TRUE, all2=TRUE)
Add Comment
Please, Sign In to add comment