Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## xgboost using Caret
- #library(caret)
- pacman::p_load(doMC)
- registerDoMC(cores=4)
- options(scipen = 999)
- pacman::p_load(mlbench)
- data("Glass", package = "mlbench")
- unique(Glass$Type)
- levels(Glass$Type)<- c(0:5) # propoer sequence
- # For multiclass classification problem the response should be numbers starting from zero
- Glass$Type <- as.integer(as.character(Glass$Type))
- unique(Glass$Type)
- colnames(Glass)
- pacman::p_load(xgboost)
- pacman::p_load(caret)
- set.seed(100)
- trainRows <- createDataPartition(Glass$Type,p=.7,list=FALSE)
- trainData <- Glass[trainRows,]
- testData <- Glass[-trainRows,]
- '%ni%' <- Negate('%in%')
- # creating the matrix for training the model
- trainData_xg <- xgb.DMatrix(data.matrix(trainData[,colnames(trainData) %ni% 'Type']),
- label=as.numeric(trainData$Type))
- testData_xg <- xgb.DMatrix(data.matrix(testData[,colnames(testData) %ni% 'Type']),
- label=as.numeric(testData$Type))
- watchlist <- list(train = trainData_xg, test = testData_xg)
- # Define the parameters and cross validate
- param <- list("objective"="multi:softprob",
- "eval_metric"="mlogloss",
- #watchlist=watchlist,
- num_class=length(unique(trainData$Type)))
- cv.nround <- 5
- cv.nfold <- 3
- cvMod <- xgb.cv(param=param,
- data=trainData_xg,
- nfold=cv.nfold,
- nrounds=cv.nround,
- watchlist=watchlist)
- cvMod
- # Build the XGBoost model
- nrounds <- 5000
- xgMod <- xgboost(param=param,data = trainData_xg, nrounds=nrounds)
- xgMod2 <- xgb.train(param=param,data = trainData_xg, nrounds=nrounds,watchlist = watchlist)
- # See the xgBoost Model
- model <- xgb.dump(xgMod,with_stats = T)
- model[1:10]
- model2 <- xgb.dump(xgMod2,with_stats = T)
- model2[1:10]
- ## get the feature important
- #
- names <- colnames(trainData)[colnames(trainData) %ni% 'Type']
- # compute feature importance matrix
- featureImp <- xgb.importance(names, model=xgMod)
- featureImp
- featureImp2 <- xgb.importance(names, model=xgMod2)
- featureImp2
- # graph
- xgb.plot.importance(featureImp)
- xgb.plot.importance(featureImp2)
- # Plot
- pacman::p_load(XML)
- pacman::p_load(DiagrammeR)
- xgb.plot.tree(feature_names = names,
- model=xgMod, n_first_tree = 2)
- xgb.plot.tree(feature_names = names,
- model=xgMod2, n_first_tree = 2)
- # Predict
- pred <- predict(xgMod, testData_xg)
- pred2 <- predict(xgMod2, testData_xg)
- # MAPE
- tab <- table(pred,trainData$Type)
- tab <- table(pred2,trainData$Type)
- caret::confusionMatrix(tab)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement