Advertisement
xrobot1

Untitled

Mar 29th, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.60 KB | None | 0 0
  1. ## xgboost using Caret
  2. #library(caret)
  3. pacman::p_load(doMC)
  4. registerDoMC(cores=4)
  5. options(scipen = 999)
  6.  
  7. pacman::p_load(mlbench)
  8. data("Glass", package = "mlbench")
  9.  
  10. unique(Glass$Type)
  11.  
  12. levels(Glass$Type)<- c(0:5) # propoer sequence
  13.  
  14. # For multiclass classification problem the response should be numbers starting from zero
  15.  
  16. Glass$Type <- as.integer(as.character(Glass$Type))
  17. unique(Glass$Type)
  18. colnames(Glass)
  19.  
  20. pacman::p_load(xgboost)
  21. pacman::p_load(caret)
  22.  
  23. set.seed(100)
  24.  
  25. trainRows <- createDataPartition(Glass$Type,p=.7,list=FALSE)
  26.  
  27. trainData <- Glass[trainRows,]
  28. testData <- Glass[-trainRows,]
  29.  
  30. '%ni%' <- Negate('%in%')
  31.  
  32. # creating the matrix for training the model
  33.  
  34. trainData_xg <- xgb.DMatrix(data.matrix(trainData[,colnames(trainData) %ni% 'Type']),
  35. label=as.numeric(trainData$Type))
  36.  
  37. testData_xg <- xgb.DMatrix(data.matrix(testData[,colnames(testData) %ni% 'Type']),
  38. label=as.numeric(testData$Type))
  39.  
  40. watchlist <- list(train = trainData_xg, test = testData_xg)
  41.  
  42.  
  43.  
  44. # Define the parameters and cross validate
  45. param <- list("objective"="multi:softprob",
  46. "eval_metric"="mlogloss",
  47. #watchlist=watchlist,
  48. num_class=length(unique(trainData$Type)))
  49.  
  50. cv.nround <- 5
  51. cv.nfold <- 3
  52.  
  53. cvMod <- xgb.cv(param=param,
  54. data=trainData_xg,
  55. nfold=cv.nfold,
  56. nrounds=cv.nround,
  57. watchlist=watchlist)
  58. cvMod
  59.  
  60. # Build the XGBoost model
  61.  
  62. nrounds <- 5000
  63.  
  64. xgMod <- xgboost(param=param,data = trainData_xg, nrounds=nrounds)
  65. xgMod2 <- xgb.train(param=param,data = trainData_xg, nrounds=nrounds,watchlist = watchlist)
  66. # See the xgBoost Model
  67. model <- xgb.dump(xgMod,with_stats = T)
  68. model[1:10]
  69.  
  70. model2 <- xgb.dump(xgMod2,with_stats = T)
  71. model2[1:10]
  72.  
  73. ## get the feature important
  74. #
  75. names <- colnames(trainData)[colnames(trainData) %ni% 'Type']
  76.  
  77. # compute feature importance matrix
  78. featureImp <- xgb.importance(names, model=xgMod)
  79. featureImp
  80.  
  81. featureImp2 <- xgb.importance(names, model=xgMod2)
  82. featureImp2
  83. # graph
  84. xgb.plot.importance(featureImp)
  85. xgb.plot.importance(featureImp2)
  86. # Plot
  87. pacman::p_load(XML)
  88. pacman::p_load(DiagrammeR)
  89.  
  90. xgb.plot.tree(feature_names = names,
  91. model=xgMod, n_first_tree = 2)
  92. xgb.plot.tree(feature_names = names,
  93. model=xgMod2, n_first_tree = 2)
  94.  
  95. # Predict
  96. pred <- predict(xgMod, testData_xg)
  97. pred2 <- predict(xgMod2, testData_xg)
  98.  
  99. # MAPE
  100.  
  101. tab <- table(pred,trainData$Type)
  102. tab <- table(pred2,trainData$Type)
  103.  
  104. caret::confusionMatrix(tab)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement