Advertisement
Guest User

Untitled

a guest
Sep 29th, 2016
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.05 KB | None | 0 0
  1. data('ptitanic', package='rpart.plot') # note this is not the default data(Titanic)
  2. ptitanic$died <- 2-as.integer(ptitanic$survived) #survived is fctr w/ 2 levels died/survived
  3. mean(ptitanic$died) # 0.618 death rate
  4. form <- as.formula('died ~ sex + age + pclass + sibsp + parch')
  5.  
  6. library('gbm')
  7. set.seed(1)
  8. m <- gbm(form,
  9. distribution = 'bernoulli',
  10. data = ptitanic,
  11. interaction.depth=4,
  12. n.trees=50)
  13. summary(m)
  14.  
  15. mean(predict(m, ptitanic, type='response',n.trees=50)) # 0.618 death rate
  16.  
  17. # let's look at the 1st tree
  18. t <- pretty.gbm.tree(m, i=1)
  19. # I want to see the split variable names instead of indices
  20. # The indices are -1 for terminal, 0 for first term, 1 for second term, etc.
  21. t$SplitVar <- c('Terminal',attr(terms(form),'term.labels'))[t$SplitVar+2]
  22. # The predictions at nodes look like:
  23. head(t$Prediction)
  24. # [1] -2.066845e-05 -1.472631e-03 -2.374948e-03 -4.808952e-04 -1.472631e-03 7.829118e-04
  25.  
  26. t$OR <- exp(t$Prediction)
  27. t$Prob <- 1/(1/t$OR + 1)
  28. head(t$Prob)
  29. #[1] 0.5000094 0.4996384 0.4994387 0.4998654 0.4996384 0.5002175
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement