Untitled

source('load_data.R')
d = read_and_preprocess_data_file('data/BADS_WS1718_known.csv')
d = subset(d, select = -c(delivery_date)) # remove NAs

classdata = read_and_preprocess_data_file('data/BADS_WS1718_class.csv')
classdata = subset(classdata, select = -c(delivery_date)) # remove NAs

# train the final model with 632 bootstrapping
for (iter in 1:400) {
  # sample with replacement here - to understand why please refer to the book
  sampled_order_ids = sample(nrow(d), replace = TRUE)
  sampled_order_ids = unique(sampled_order_ids)

  training_set = d[sampled_order_ids,]
  test_set     = d[-sampled_order_ids,]

  probs = append(probs, nrow(training_set)/nrow(d))

  # train the model here with the training set, be sure to always train the same model,
  # and not discard and continuously start at 0
  # test the model accuracy with the test set and
  # append it to the log
  accs = append(accs, accuracy)
}

# plot accuracies to see change with higher number of iterations
plot(x=1:length(accs), y=accs, type='p')