Guest User

Untitled

a guest
Feb 21st, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.11 KB | None | 0 0
  1. a <- runif(100, 1, 100)
  2. b <- runif(100, 1, 100)
  3. c <- 1:100 + rnorm(100, mean = 0, sd = 5)
  4. y <- 1:100 + rnorm(100, mean = 0, sd = 5)
  5. par(mfrow = c(2,2))
  6. plot(y ~ a); plot(y ~ b); plot(y ~ c)
  7. Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
  8. names(Data) <- c("y", "a", "b", "c")
  9. library(gbm)
  10. gbm.gaus <- gbm(y ~ a + b + c, data = Data, distribution = "gaussian")
  11. par(mfrow = c(2,2))
  12. plot(gbm.gaus, i.var = 1)
  13. plot(gbm.gaus, i.var = 2)
  14. plot(gbm.gaus, i.var = 3)
  15.  
  16. a <- runif(100, 1, 100)
  17. b <- runif(100, 1, 100)
  18. c <- 1:100 + rnorm(100, mean = 0, sd = 5)
  19. y <- 1:100 + rnorm(100, mean = 0, sd = 5)
  20. par(mfrow = c(2,2))
  21. plot(y ~ a); plot(y ~ b); plot(y ~ c)
  22. Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
  23. names(Data) <- c("y", "a", "b", "c")
  24.  
  25. library(gbm)
  26. gbm.gaus <- gbm(y ~ a + b + c, data = Data, distribution = "gaussian")
  27.  
  28. library(randomForest)
  29. rf.model <- randomForest(y ~ a + b + c, data = Data)
  30.  
  31. x11(height = 8, width = 5)
  32. par(mfrow = c(3,2))
  33. par(oma = c(1,1,4,1))
  34. plot(gbm.gaus, i.var = 1)
  35. partialPlot(rf.model, Data[,2:4], x.var = "a")
  36. plot(gbm.gaus, i.var = 2)
  37. partialPlot(rf.model, Data[,2:4], x.var = "b")
  38. plot(gbm.gaus, i.var = 3)
  39. partialPlot(rf.model, Data[,2:4], x.var = "c")
  40. title(main = "Boosted regression tree", outer = TRUE, adj = 0.15)
  41. title(main = "Random forest", outer = TRUE, adj = 0.85)
  42.  
  43. #more observations are created...
  44. a <- runif(5000, 1, 100)
  45. b <- runif(5000, 1, 100)
  46. c <- (1:5000)/50 + rnorm(100, mean = 0, sd = 0.1)
  47. y <- (1:5000)/50 + rnorm(100, mean = 0, sd = 0.1)
  48. par(mfrow = c(1,3))
  49. plot(y ~ a); plot(y ~ b); plot(y ~ c)
  50. Data <- data.frame(matrix(c(y, a, b, c), ncol = 4))
  51. names(Data) <- c("y", "a", "b", "c")
  52. library(randomForest)
  53. #smaller nodesize "not as important" when there number of observartion is increased
  54. #more tress can smooth flattening so boundery regions have best possible signal to noise, data specific how many needed
  55.  
  56. plot.partial = function() {
  57. partialPlot(rf.model, Data[,2:4], x.var = "a",xlim=c(1,100),ylim=c(1,100))
  58. partialPlot(rf.model, Data[,2:4], x.var = "b",xlim=c(1,100),ylim=c(1,100))
  59. partialPlot(rf.model, Data[,2:4], x.var = "c",xlim=c(1,100),ylim=c(1,100))
  60. }
  61.  
  62. #worst case! : with 100 samples from Data and nodesize=30
  63. rf.model <- randomForest(y ~ a + b + c, data = Data[sample(5000,100),],nodesize=30)
  64. plot.partial()
  65.  
  66. #reasonble settings for least partial flattening by few observations: 100 samples and nodesize=3 and ntrees=2000
  67. #more tress can smooth flattening so boundery regions have best possiblefidelity
  68. rf.model <- randomForest(y ~ a + b + c, data = Data[sample(5000,100),],nodesize=5,ntress=2000)
  69. plot.partial()
  70.  
  71. #more observations is great!
  72. rf.model <- randomForest(y ~ a + b + c,
  73. data = Data[sample(5000,5000),],
  74. nodesize=5,ntress=2000)
  75. plot.partial()
  76.  
  77. gbm.gaus <- gbm(y~., data = Data, dist = "gaussian")
  78. library(plotmo) # for the plotres function
  79. plotres(gbm.gaus) # plot the error per ntrees and the residuals
  80.  
  81. gbm.gaus1 <- gbm(y~., data = Data, dist = "gaussian",
  82. n.trees=5000, interact=3)
  83. plotres(gbm.gaus1)
  84.  
  85. library(plotmo)
  86. plotmo(gbm.gaus1, pmethod="partdep", all1=TRUE, all2=TRUE)
  87. plotmo(rf.model, pmethod="partdep", all1=TRUE, all2=TRUE)
Add Comment
Please, Sign In to add comment