Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- source('load_data.R')
- d = read_and_preprocess_data_file('data/BADS_WS1718_known.csv')
- d = subset(d, select = -c(delivery_date)) # remove NAs
- classdata = read_and_preprocess_data_file('data/BADS_WS1718_class.csv')
- classdata = subset(classdata, select = -c(delivery_date)) # remove NAs
- # train the final model with 632 bootstrapping
- for (iter in 1:400) {
- # sample with replacement here - to understand why please refer to the book
- sampled_order_ids = sample(nrow(d), replace = TRUE)
- sampled_order_ids = unique(sampled_order_ids)
- training_set = d[sampled_order_ids,]
- test_set = d[-sampled_order_ids,]
- probs = append(probs, nrow(training_set)/nrow(d))
- # train the model here with the training set, be sure to always train the same model,
- # and not discard and continuously start at 0
- # test the model accuracy with the test set and
- # append it to the log
- accs = append(accs, accuracy)
- }
- # plot accuracies to see change with higher number of iterations
- plot(x=1:length(accs), y=accs, type='p')
Add Comment
Please, Sign In to add comment