Advertisement
Nenkham

Untitled

Jun 5th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.98 KB | None | 0 0
  1.  
  2.  
  3.  
  4. # AS NOVAS FEATURES...
  5.  
  6.  
  7.  
  8.  
  9. TREINO <- features_treino
  10. VALID <- features_validacao
  11. TESTE <- features_teste
  12.  
  13.  
  14.  
  15.  
  16. ####TRANSFORMACOES PARA DECISION TREE...
  17.  
  18. # TREINO
  19.  
  20. TREINO <- TREINO[,-c(1)] # retiramos cartao
  21. TREINO <- TREINO[,-c(3)] # retiramos district
  22.  
  23.  
  24. # VALID
  25. VALID <- VALID[,-c(1)] # retiramos cartao
  26. VALID <- VALID[,-c(3)] # retiramos district
  27.  
  28.  
  29.  
  30. ####TRANSFORMACOES PARA RANDOM FOREST...
  31.  
  32. # TREINO
  33. f_TREINO <- TREINO
  34. # ja retiramos cartao e districs aqui!!!
  35. f_TREINO$Comprou <- as.factor(f_TREINO$Comprou) # transformar a label em factor para dar como input na randomForest(classificação)
  36.  
  37. f_TREINO[,3] <- ifelse(f_TREINO[,3] == "NORTE  ", 1, ifelse(f_TREINO[,3] == "SUL   ", 2, ifelse(f_TREINO[,3] == "CENTRO ", 3, ifelse(f_TREINO[,3] == "MADEIRA", 4, ifelse(f_TREINO[,3] == "AÇORES ", 5,6)))))
  38. f_TREINO[,4] <- ifelse(f_TREINO[,4] == "F", 1, ifelse(f_TREINO[,4] == "M", 2, ifelse(f_TREINO[,4] == "A", 3, 4)))
  39.  
  40.  
  41. # VALID
  42. f_VALID <- VALID
  43. # ja retiramos cartao e districs aqui!!!
  44. # nao queremos transformar a Label de validacao para ja
  45. f_VALID[,3] <- ifelse(f_VALID[,3] == "NORTE  ", 1, ifelse(f_VALID[,3] == "SUL   ", 2, ifelse(f_VALID[,3] == "CENTRO ", 3, ifelse(f_VALID[,3] == "MADEIRA", 4, ifelse(f_VALID[,3] == "AÇORES ", 5,6)))))
  46. f_VALID[,4] <- ifelse(f_VALID[,4] == "F", 1, ifelse(f_VALID[,4] == "M", 2, ifelse(f_VALID[,4] == "A", 3, 4)))
  47.                        
  48. # --------------------------------------------------------------------------------------------------------------------------#
  49. # FORM
  50.  
  51.  
  52.  
  53. features <- c('age', 'region', 'genero', 'total_compras3', 'total_AL3', 'total_semana3', 'media_semana3', 'total_FDS3', 'media_FDS3', 'total_online3', 'total_AUT3','total_bazar3', 'total_bio3', 'total_desporto3', 'total_lar3', 'total_pets3', 'total_restau3', 'total_roupa3', 'total_saude3', 'total_transp3', 'total_viagens3', 'total_compras6', 'total_AL6', 'total_semana6', 'media_semana6', 'total_FDS6', 'media_FDS6', 'total_online6', 'qte_online6', 'total_AUT6', 'total_bazar6', 'total_bio6', 'total_desporto6', 'total_lar6', 'total_pets6', 'total_restau6', 'total_roupa6', 'total_saude6', 'total_transp6', 'total_viagens6','total_compras12', 'total_AL12', 'total_semana12', 'media_semana12', 'total_FDS12', 'media_FDS12', 'total_online12', 'qte_online12', 'total_AUT12', 'total_bazar12', 'total_bio12', 'total_desporto12', 'total_lar12', 'total_pets12', 'total_restau12', 'total_roupa12', 'total_saude12', 'total_transp12', 'total_viagens12')
  54. label <- c('Comprou')
  55.  
  56.  
  57. form <- as.formula(paste(paste(label, " ~ "), paste(features, collapse = "+")))
  58. # --->PRECISION E RECALL
  59. metrics <- function(cm) {
  60.  
  61.   tp <- cm['table']$table[4]
  62.   tn <- cm['table']$table[1]
  63.   fp <- cm['table']$table[2]
  64.   fn <- cm['table']$table[3]
  65.  
  66.   precision <- tp / (tp + fp)
  67.   recall <- tp / (tp + fn)
  68.  
  69.   values <- c(precision, recall)
  70.  
  71.   return(values)
  72.  
  73. }        
  74.  
  75.  
  76.  
  77.  
  78.  
  79. # --------------------------------------------------------------------------------------------------------------------------#
  80. # ---------- MODELOS --------- #
  81. # --------------------------------------------------------------------------------------------------------------------------#
  82.  
  83. # --------------------------- #
  84. # ------ DECISION TREE ------ #
  85. # --------------------------- #
  86.  
  87. # -------> decision tree modelo Default
  88. tree_model_0 <- rpart(form, TREINO)
  89. rpart.plot(tree_model_0)
  90. # ---> ROC
  91. preds_tree_model_0 <- predict(tree_model_0, VALID)
  92.  
  93. threshold_preds <- (preds_tree_model_0 > 0.5) * 1
  94.  
  95. roc_curve_tree_0 <- roc.curve(scores.class0 = threshold_preds, weights.class0 = VALID[,label], curve=TRUE)
  96.  
  97. plot(roc_curve_tree_0)
  98. # ---> matriz de confusao
  99. cm_tm_0 <- confusionMatrix(as.factor(threshold_preds), as.factor(VALID[,label]))
  100. # ---> precision e recall
  101. metrics(cm_tm_0)
  102. # --------------------------------------------------------------------------------------------------------------------------#
  103.  
  104.  
  105. # --------> tree_model 1
  106. tree_model_1 <- rpart(form, TREINO, control = rpart.control(minsplit = 30, cp = 0.015, maxdepth = 30))
  107. rpart.plot(tree_model_1)
  108. # ---> ROC
  109. preds_tree_model_1 <- predict(tree_model_1, VALID)
  110. threshold_preds_1 <- (preds_tree_model_1 > 0.2) * 1
  111. roc_curve_tree_1 <- roc.curve(scores.class0 = preds_tree_model_1, weights.class0 = VALID[,label], curve=TRUE)
  112. plot(roc_curve_tree_1)
  113. # ---> matriz de confusao
  114. cm_tm_1 <- confusionMatrix(as.factor(threshold_preds_1), as.factor(VALID[,label]))
  115. # ---> precision e recall
  116. metrics(cm_tm_1)
  117. # --------------------------------------------------------------------------------------------------------------------------#
  118.  
  119.  
  120.  
  121.  
  122. # --------------------------------------------------------------------------------------------------------------------------#
  123. # ------ RANDOM FOREST ------ #
  124. # --------------------------- #
  125.  
  126. # --------> randomForest modelo Default
  127. rf_model_0 <- randomForest(form, f_TREINO, type="prob")
  128. print(rf_model_0)
  129. # ---> ROC
  130. preds_rf_model_0 <- predict(rf_model_0, f_VALID, type="prob")
  131.  
  132. preds_rf_model_0  <- preds_rf_model_0[,-c(1)] #esta a dar-nos prob da class 0 e prob da class 1, vamos retirar a variavel 0
  133. threshold_preds_rf_0 <- (preds_rf_model_0 > 0.5) * 1
  134.  
  135. roc_curve_rf_0 <- roc.curve(scores.class0 = threshold_preds_rf_0, weights.class0 = f_VALID[,label], curve=TRUE)
  136. plot(roc_curve_rf_0)
  137. # ---> matriz de confusao
  138. cm_rf_0 <- confusionMatrix(as.factor(threshold_preds_rf_0), as.factor(f_VALID[,label]))
  139. # ---> precision e recall
  140. metrics(cm_rf_0)
  141. # --------------------------------------------------------------------------------------------------------------------------#
  142.  
  143.  
  144. # --------> randomForest_1
  145. rf_model_1 <- randomForest(form, f_TREINO, type="prob",  ntree=30000, mtry=4) #AUC=0.670   Pre=0.675   Recall=0.432
  146. print(rf_model_1)
  147. # ---> ROC
  148. preds_rf_model_1 <- predict(rf_model_1, f_VALID, type="prob")
  149.  
  150. preds_rf_model_1  <- preds_rf_model_1[,-c(1)] #esta a dar-nos prob da class 0 e prob da class 1, vamos retirar a variavel 0
  151. threshold_preds_rf_1 <- (preds_rf_model_1 > 0.5) * 1
  152.  
  153. roc_curve_rf_1 <- roc.curve(scores.class0 = threshold_preds_rf_1, weights.class0 = f_VALID[,label], curve=TRUE)
  154. plot(roc_curve_rf_1)
  155. # ---> matriz de confusao
  156. cm_rf_1 <- confusionMatrix(as.factor(threshold_preds_rf_1), as.factor(f_VALID[,label]))
  157. # ---> precision e recall
  158. metrics(cm_rf_1)
  159.  
  160.  
  161.  
  162.  
  163. # --------> randomForest_DMwR2
  164.  
  165. potato <- rpartXse(form, TREINO)
  166. prp(potato,type=4,extra=101)
  167. potato_preds <- predict(potato,VALID)
  168.  
  169. potato_mae <- mean(abs(potato_preds-VALID[,label]))
  170.  
  171. potato_mae
  172.  
  173. cor_potato <- cor(potato_preds,VALID[,label])
  174.  
  175. cor_potato
  176.  
  177. potato_res <- performanceEstimation(
  178.                 PredTask(form, TREINO),
  179.                 Workflow(
  180.                           "standardWF",
  181.                           learner="rpartXse",
  182.                           learner.pars=list(se=c(0,0.5,1)),
  183.                           predictor.pars=list(type="class")
  184.                         ),
  185.                
  186.                 EstimationTask(metrics="err",method=CV(nReps=3,nFolds=10)))
  187.  
  188. summary(potato_res)
  189. plot(potato_res)
  190.  
  191.  
  192.  
  193.  
  194.  
  195. # --------------------------------------------------------------------------------------------------------------------------#
  196.  
  197.  
  198.  
  199. # --------------------------------------------------------------------------------------------------------------------------#
  200. # ----------- SVM (C-classification) ----------- #
  201. # ---------------------------------------------- #
  202.  
  203. # --------> SVM modelo baseline # RADIAL # ----------- #
  204. svm_model_R0 <- svm(form, TREINO, type="C-classification", kernel="radial")
  205. print(svm_model_R0)
  206. # ---> ROC
  207. preds_svm_model_R0 <- predict(svm_model_R0, VALID)
  208. roc_curve_svm_R0 <- roc.curve(scores.class0 = preds_svm_model_R0, weights.class0 = VALID[,label], curve=TRUE)
  209. plot(roc_curve_svm_R0)
  210. # ---> matriz de confusao
  211. cm_svm_R0 <- confusionMatrix(as.factor(preds_svm_model_R0), as.factor(VALID[,label]))
  212. # ---> precision e recall
  213. metrics(cm_svm_R0)
  214. # --------------------------------------------------------------------------------------------------------------------------#
  215.  
  216.  
  217. # ---> SVM 1 (radial)
  218. svm_model_R1 <- svm(form, TREINO, type="C-classification", kernel="radial", cost=50, gamma=1)
  219. print(svm_model_R1)
  220. # ---> ROC
  221. preds_svm_model_R1 <- predict(svm_model_R1, VALID)
  222. roc_curve_svm_R1 <- roc.curve(scores.class0 = preds_svm_model_R1, weights.class0 = VALID[,label], curve=TRUE)
  223. plot(roc_curve_svm_R1)
  224. # ---> matriz de confusao
  225. cm_svm_R1 <- confusionMatrix(as.factor(preds_svm_model_R1), as.factor(VALID[,label]))
  226. # ---> precision e recall
  227. metrics(cm_svm_R1)
  228. # --------------------------------------------------------------------------------------------------------------------------#
  229.  
  230.  
  231.  
  232. # --------------------------------------------------------------------------------------------------------------------------#
  233. # ------------------------------------------------> END <-------------------------------------------------------------------#
  234. # --------------------------------------------------------------------------------------------------------------------------#
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement