Untitled

data('ptitanic', package='rpart.plot') # note this is not the default data(Titanic)
ptitanic$died <- 2-as.integer(ptitanic$survived) #survived is fctr w/ 2 levels died/survived
mean(ptitanic$died) # 0.618 death rate
form <- as.formula('died ~ sex + age + pclass + sibsp + parch')

library('gbm')
set.seed(1)
m <- gbm(form,
         distribution = 'bernoulli',
         data = ptitanic,
         interaction.depth=4,
         n.trees=50)
summary(m)

mean(predict(m, ptitanic, type='response',n.trees=50)) # 0.618 death rate

# let's look at the 1st tree
t <- pretty.gbm.tree(m, i=1)
# I want to see the split variable names instead of indices
# The indices are -1 for terminal, 0 for first term, 1 for second term, etc.
t$SplitVar <- c('Terminal',attr(terms(form),'term.labels'))[t$SplitVar+2]
# The predictions at nodes look like:
head(t$Prediction)
# [1] -2.066845e-05 -1.472631e-03 -2.374948e-03 -4.808952e-04 -1.472631e-03  7.829118e-04

t$OR <- exp(t$Prediction)
t$Prob <- 1/(1/t$OR + 1)
head(t$Prob)
#[1] 0.5000094 0.4996384 0.4994387 0.4998654 0.4996384 0.5002175