Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(caret)
- library(ISLR)
- library(caTools)
- train_data <- read.csv('C:\\Users\\Yevhen\\Documents\\Courses\\Data Mining\\HW6\\train.csv', sep=',', header=TRUE)
- test_data <- read.csv('C:\\Users\\Yevhen\\Documents\\Courses\\Data Mining\\HW6\\test.csv', sep=',', header=TRUE)
- set.seed(101)
- split = sample.split(train_data$agegroup, SplitRatio = 0.90)
- train = subset(train_data, split == TRUE)
- test = subset(train_data, split == FALSE)
- feats <- names(train[, 1:ncol(train)-1])
- f <- paste(feats,collapse=' + ')
- f <- paste('target ~',f)
- f <- as.formula(f)
- # prepare training scheme
- control <- trainControl(method="repeatedcv", number=5, repeats=3)
- # train the GBM model
- set.seed(7)
- modelGbm <- train(f, data=train, method="gbm", trControl=control, verbose=FALSE)
- predvals <- predict(modelGbm, test[,1:ncol(test)-1])
- n_test = nrow(test)
- rmse_ = sqrt(sum((test$target-predvals)^2) / n_test)
- rmse_
- modelGbm <- train(f, data=train, method="gbm", trControl=control, verbose=FALSE)
- test_pred_values <- as.data.frame(predict(modelGbm, newdata=test_data))
- predict <- data.frame(test_data$ID, test_pred_values)
- colnames(predict) <- c("ID", "target")
- write.csv(predict, file = "C:\\Users\\Yevhen\\Documents\\Courses\\Data Mining\\prediction.csv", row.names=FALSE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement