Advertisement
Guest User

Untitled

a guest
Nov 14th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.85 KB | None | 0 0
  1. trainData <- read.csv("C:/Users/TiagoMonteiro/Downloads/CDados/CDados/dataset1/aps_failure_training_set.csv", na.strings="na")
  2. testData <- read.csv("C:/Users/TiagoMonteiro/Downloads/CDados/CDados/dataset1/aps_failure_test_set.csv", na.strings="na")
  3.  
  4. qplot(as.factor(trainData$class), xlab = "class")
  5. qplot(as.factor(testData$class), xlab = "class")
  6.  
  7.  
  8. trainData <- trainData[, -which(colMeans(is.na(trainData)) > 0.7)]
  9. testData <- testData[, -which(colMeans(is.na(testData)) > 0.7)]
  10.  
  11. #impute test data
  12. preProcValues <- preProcess(testData, method = c("knnImpute"), k = 3)
  13.  
  14. # use it to predict all the missing values
  15. testData_knnimp <- predict(preProcValues, testData)
  16.  
  17. set.seed(1234)
  18.  
  19. #clustering
  20. cl <- makeCluster(8, type="SOCK")
  21. registerDoSNOW(cl)
  22.  
  23.  
  24. #omit
  25. trainData_omit <- na.omit(trainData);
  26.  
  27. #knn impute of train
  28. trainData_imputation <- preProcess(trainData, method = c("knnImpute"), k = 3)
  29.  
  30. # use it to predict all the missing values
  31. trainData_imputation <- predict(trainData_imputation, trainData)
  32.  
  33. #mean
  34. trainData_mean <- mice(trainData, m=1, maxit = 5, method = "mean", seed = 500)
  35. testData_mean <- mice(testData, m=1, maxit = 5, method = "mean", seed = 500)
  36.  
  37. #mean s/ constants linears
  38.  
  39. attrToDelete <- subset(trainData_mean$loggedEvents, meth == "collinear" | meth == "constant" )
  40. attrToDeleteNames <- as.character (attrToDelete [, "out"])
  41.  
  42. trainData_mean <- complete(trainData_mean, 1)
  43. trainData_mean_clean <- trainData_mean[, !(names(trainData_mean) %in% attrToDeleteNames)]
  44.  
  45. attrToDelete <- subset(testData_mean$loggedEvents, meth == "collinear" | meth == "constant" )
  46. attrToDeleteNames <- as.character (attrToDelete [, "out"])
  47.  
  48. testData_mean <- complete(testData_mean, 1)
  49. testData_mean_clean <- testData_mean[, !(names(testData_mean) %in% attrToDeleteNames)]
  50.  
  51.  
  52. #feature selection
  53.  
  54. trainData_mean_clean_feature_selection <- trainData_mean_clean
  55.  
  56. trainData_mean_clean_feature_selection <- gain.ratio( class~., trainData_mean_clean_feature_selection)
  57. trainData_mean_clean_feature_selection <- t(trainData_mean_clean_feature_selection)
  58. trainData_mean_clean_feature_selection <- trainData_mean_clean_feature_selection[which(!colSums(trainData_mean_clean_feature_selection) == 0)]
  59.  
  60.  
  61.  
  62. #KNN
  63. grid <- expand.grid(k = c((seq(3, 17, by=2))))
  64.  
  65.  
  66.  testData1$as_000 <- NULL
  67.  trainData_omit$cd_000 <- NULL
  68.  
  69. results.model_knn_omit_pp_norm = train(trainData_omit[-1], trainData_omit$class, method="knn", tuneGrid=grid, preProcess=c("center", "scale"))
  70. results.model_knn_omit_pp_norm_pca = train(trainData_omit[-1], trainData_omit$class, method="knn", tuneGrid=grid, preProcess=c("center", "scale", "pca"))
  71.  
  72. results.model_knn_clean_pp_norm = train(trainData_mean_clean[-1], trainData_mean_clean$class, method="knn", tuneGrid=grid, preProcess=c("center", "scale"))
  73. results.model_knn_clean_pp_norm_pca = train(trainData_mean_clean[-1], trainData_mean_clean$class, method="knn", tuneGrid=grid, preProcess=c("center", "scale", "pca"))
  74.  
  75.  
  76. #NB
  77.  
  78. grid <- data.frame(fL=c(0.5,1.0), usekernel = TRUE, adjust=c(0.5,1.0))
  79.  
  80.  
  81. results.model_nb_omit_pp_norm = train(trainData_omit[-1], trainData_omit$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale"))
  82. results.model_nb_omit_pp_norm_pca = train(trainData_omit[-1], trainData_omit$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale", "pca"))
  83.  
  84. results.model_nb_mean_pp_norm = train(trainData_mean[-1], trainData_mean$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale"))
  85. results.model_nb_mean_pp_norm_pca = train(trainData_mean[-1], trainData_mean$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale", "pca"))
  86.  
  87. results.model_nb_clean_pp_norm = train(trainData_mean_clean[-1], trainData_mean_clean$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale"))
  88. results.model_nb_clean_pp_norm_pca = train(trainData_mean_clean[-1], trainData_mean_clean$class, method="nb", tuneGrid=grid, preProcess=c("center", "scale", "pca"))
  89.  
  90.  
  91. model_nb <- train(class ~ .,data = trainData, method = "nb",tuneList = NULL, preProcess = c("center", "scale"))
  92.  
  93. preds <- predict(model_nb, testData)
  94. confusionMatrix(preds, testData$class)
  95.  
  96. #KNN
  97.  
  98. grid <- expand.grid(k = c((seq(3, 17, by=2))))
  99. model_cv <- train(class ~ ., data = trainData, method="knn", tuneGrid=grid, preProcess = c("pca"))
  100.  
  101. #J48
  102. library("RWeka")
  103. truck_j48 = J48(trainData$class ~ ., data = trainData)
  104. plot(truck_j48)
  105.  
  106. eval_j48 <- evaluate_Weka_classifier(truck_j48, numFolds = 10, complexity = FALSE,
  107.                                      seed = 1, class = TRUE)
  108. eval_j48
  109. preds <- predict(truck_j48, newdata = testData)
  110. confusionMatrix(preds, testData$class)
  111.  
  112. #randomForest
  113. library("randomForest")
  114. set.seed(1234)
  115.  
  116. random_forest <- randomForest(trainData$class~., data=trainData, controls=cforest_unbiased(ntree=2000, mtry=3))
  117. preds <- predict(random_forest, newdata = testData)
  118. confusionMatrix(preds, testData$class)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement