Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- data('ptitanic', package='rpart.plot') # note this is not the default data(Titanic)
- ptitanic$died <- 2-as.integer(ptitanic$survived) #survived is fctr w/ 2 levels died/survived
- mean(ptitanic$died) # 0.618 death rate
- form <- as.formula('died ~ sex + age + pclass + sibsp + parch')
- library('gbm')
- set.seed(1)
- m <- gbm(form,
- distribution = 'bernoulli',
- data = ptitanic,
- interaction.depth=4,
- n.trees=50)
- summary(m)
- mean(predict(m, ptitanic, type='response',n.trees=50)) # 0.618 death rate
- # let's look at the 1st tree
- t <- pretty.gbm.tree(m, i=1)
- # I want to see the split variable names instead of indices
- # The indices are -1 for terminal, 0 for first term, 1 for second term, etc.
- t$SplitVar <- c('Terminal',attr(terms(form),'term.labels'))[t$SplitVar+2]
- # The predictions at nodes look like:
- head(t$Prediction)
- # [1] -2.066845e-05 -1.472631e-03 -2.374948e-03 -4.808952e-04 -1.472631e-03 7.829118e-04
- t$OR <- exp(t$Prediction)
- t$Prob <- 1/(1/t$OR + 1)
- head(t$Prob)
- #[1] 0.5000094 0.4996384 0.4994387 0.4998654 0.4996384 0.5002175
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement