Advertisement
Guest User

Untitled

a guest
Nov 16th, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.27 KB | None | 0 0
  1. ---
  2. title: "Inclass 11-08"
  3. author: "Nino & Theo"
  4. date: "8 novembre 2018"
  5. output: html_document
  6. ---
  7.  
  8. ```{r setup, include=FALSE}
  9. knitr::opts_chunk$set(echo = TRUE)
  10. ```
  11.  
  12. ```{r}
  13.  
  14.  
  15. spor <- as.data.frame(read.table("spor.csv", header = T, sep = ","))
  16.  
  17. spor$Medu <- as.factor(spor$Medu)
  18. levels(spor$Medu) <- c("none", "4th grade", "5th to 9th grade", "secondary eduction", "higher education")
  19. spor$Fedu <- as.factor(spor$Fedu)
  20. levels(spor$Fedu) <- c("none", "4th grade", "5th to 9th grade", "secondary eduction", "higher education")
  21. spor$studytime <- as.factor(spor$studytime)
  22. levels(spor$studytime) <- c("<2", "2 to 5", "5 to 10", ">10")
  23.  
  24. linmod <- glm(alc ~., data = spor, family = "binomial")
  25. summary(linmod)
  26.  
  27. ```
  28.  
  29. ```{r}
  30. library(kernlab)
  31. library(caret)
  32. library(caretEnsemble)
  33. library(forecast)
  34.  
  35. spor <- as.data.frame(read.table("spor.csv", header = T, sep = ","))
  36.  
  37. spor$age2 <- 0
  38. spor$age2[spor$age >= 19] <- 1
  39. spor$age <- NULL
  40.  
  41.  
  42.  
  43. data <- spor
  44.  
  45.  
  46. set.seed(11)
  47.  
  48.  
  49. intrain <- createDataPartition(y = data$alc, p= 0.8, list = FALSE)
  50. data_train <- data[intrain,]
  51. data_test <- data[-intrain,]
  52.  
  53.  
  54. trctrl <- trainControl(method = "repeatedcv", number=10, repeats=5)
  55.  
  56.  
  57. #alc_fit <- train(as.factor(alc) ~., data = data_train, method = "svmLinear", trControl=trctrl, preProcess = c("center", "scale"), tuneLength=20)
  58. #alc_fit
  59.  
  60.  
  61. alc_fit2 <- train(as.factor(alc) ~ sex + famsize * famrel + Pstatus * famrel + studytime * absences + health + absences * goout, data = data_train, method = "svmLinear", trControl=trctrl, preProcess = c("center", "scale"), tuneLength=20)
  62. alc_fit2
  63.  
  64.  
  65.  
  66. ```
  67.  
  68.  
  69. ```{r}
  70. data_test$svm1<-predict(alc_fit2, newdata=data_test)
  71. #data_test$svm1<-as.integer(ifelse(data_test$svm1 >= 0.5, 1, 0))
  72. confusionMatrix(as.factor(data_test$alc), as.factor(data_test$svm1))
  73. ```
  74. The base accuracy and kappa are 65.59% and 25.88%. We will try to increase that by ensembling models
  75.  
  76. ```{r}
  77. library(xgboost)
  78. fit2 <- train(as.factor(alc) ~., data = data_train, metric="Accuracy", method = "xgbLinear", trControl=trctrl, preProcess = c("center", "scale"), tuneLength=1)
  79. data_test$fit2<-predict(fit2, newdata=data_test)
  80. confusionMatrix(as.factor(data_test$alc), as.factor(data_test$fit2))
  81. ```
  82. ```{r}
  83. fit3 <- train(as.factor(alc) ~., data = data_train, metric="Accuracy", method = "xgbTree", trControl=trctrl, preProcess = c("center", "scale"), tuneLength=1)
  84. data_test$fit3<-predict(fit3, newdata=data_test)
  85. confusionMatrix(as.factor(data_test$alc), as.factor(data_test$fit3))
  86. ```
  87.  
  88. ```{r}
  89.  
  90. fit4 <- train(as.factor(alc) ~., data = data_train, metric="Accuracy", method = "xgbLinear", trControl=trctrl, preProcess = c("center", "scale"), tuneLength=1, tuneGrid=data.frame(nrounds=45, lambda=0, alpha=0, eta=0.5))
  91. data_test$fit4<-predict(fit4, newdata=data_test)
  92. confusionMatrix(as.factor(data_test$alc), as.factor(data_test$fit4))
  93.  
  94. ```
  95.  
  96.  
  97.  
  98.  
  99. ```{r}
  100. data_test$ensemble <- as.numeric(as.character(data_test$svm1)) + as.numeric(as.character(data_test$fit2)) + as.numeric(as.character(data_test$fit3)) + as.numeric(as.character(data_test$fit4)) * 2
  101.  
  102. data_test$ensemble_predicted <- ifelse(data_test$ensemble >= 3, 1, 0)
  103. data_test$ensemble_predicted <- as.factor(data_test$ensemble_predicted)
  104.  
  105. confusionMatrix(as.factor(data_test$alc), data_test$ensemble_predicted)
  106. ```
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement