Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- set.seed(1234)
- #import datasets
- green <- read.csv("C:/Users/tiago/Desktop/CDados/dataset2/green.csv")
- hinselmann <- read.csv("C:/Users/tiago/Desktop/CDados/dataset2/hinselmann.csv")
- schiller <- read.csv("C:/Users/tiago/Desktop/CDados/dataset2/schiller.csv")
- group <- rbind(green,hinselmann,schiller)
- nb <- NULL
- #turn some variables factors
- factorize <- function(dataset){
- dataset$experts..0 <- as.factor(dataset$experts..0)
- dataset$experts..1 <- as.factor(dataset$experts..1)
- dataset$experts..2 <- as.factor(dataset$experts..2)
- dataset$experts..3 <- as.factor(dataset$experts..3)
- dataset$experts..4 <- as.factor(dataset$experts..4)
- dataset$experts..5 <- as.factor(dataset$experts..5)
- dataset$consensus <- as.factor(dataset$consensus)
- return(dataset)
- }
- group <- factorize(group)
- green <- factorize(green)
- hinselmann <- factorize(hinselmann)
- schiller <- factorize(schiller)
- #remove cervix_area because we will have problems in preProcess with it in future computations
- group$cervix_area <- NULL
- green$cervix_area <- NULL
- hinselmann$cervix_area <- NULL
- schiller$cervix_area <- NULL
- # Random splitting of data as 70% train and 30%test datasets
- #group
- indexC_group <- createDataPartition(group$consensus, p=0.70, list=FALSE)
- trainDataC_group = group[indexC_group,]
- testDataC_group = group[-indexC_group,]
- #green
- indexC_green <- createDataPartition(green$consensus, p=0.70, list=FALSE)
- trainDataC_green = green[indexC_green,]
- testDataC_green = green[-indexC_green,]
- #hinselmann
- indexC_hinselmann <- createDataPartition(hinselmann$consensus, p=0.70, list=FALSE)
- trainDataC_hinselmann = hinselmann[indexC_hinselmann,]
- testDataC_hinselmann = hinselmann[-indexC_hinselmann,]
- #schiller
- indexC_schiller <- createDataPartition(schiller$consensus, p=0.70, list=FALSE)
- trainDataC_schiller = schiller[indexC_schiller,]
- testDataC_schiller = schiller[-indexC_schiller,]
- #clustering
- cl <- makeCluster(8, type="SOCK")
- registerDoSNOW(cl)
- grid <- data.frame(fL=c(0.5,1.0), usekernel = TRUE, adjust=c(0.5,1.0))
- # define training control
- train_control <- trainControl(method="repeatedcv", number=10)
- #train based on consensus with and without pp
- nb.model_nbC_group_cv = train(trainDataC_group[1:(length(trainDataC_group)-8)], trainDataC_group$consensus, method="nb", trControl=train_control, tuneGrid=grid)
- nb.model_nbC_group_pp_c_S_cv = train(trainDataC_group[1:(length(trainDataC_group)-8)], trainDataC_group$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale"))
- nb.model_nbC_group_pp_pca_cv = train(trainDataC_group[1:(length(trainDataC_group)-8)], trainDataC_group$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale","pca"))
- nb.model_nbC_green_cv = train(trainDataC_green[1:(length(trainDataC_green)-8)], trainDataC_green$consensus, method="nb", trControl=train_control, tuneGrid=grid)
- nb.model_nbC_green_pp_c_S_cv = train(trainDataC_green[1:(length(trainDataC_green)-8)], trainDataC_green$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale"))
- nb.model_nbC_green_pp_pca_cv = train(trainDataC_green[1:(length(trainDataC_green)-8)], trainDataC_green$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale","pca"))
- nb.model_nbC_hinselmann_cv = train(trainDataC_hinselmann[1:(length(trainDataC_hinselmann)-8)], trainDataC_hinselmann$consensus, method="nb", trControl=train_control, tuneGrid=grid)
- nb.model_nbC_hinselmann_pp_c_S_cv = train(trainDataC_hinselmann[1:(length(trainDataC_hinselmann)-8)], trainDataC_hinselmann$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale"))
- nb.model_nbC_hinselmann_pp_pca_cv = train(trainDataC_hinselmann[1:(length(trainDataC_hinselmann)-8)], trainDataC_hinselmann$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale","pca"))
- nb.model_nbC_schiller_cv = train(trainDataC_schiller[1:(length(trainDataC_schiller)-8)], trainDataC_schiller$consensus, method="nb", trControl=train_control, tuneGrid=grid)
- nb.model_nbC_schiller_pp_c_S_cv = train(trainDataC_schiller[1:(length(trainDataC_schiller)-8)], trainDataC_schiller$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale"))
- nb.model_nbC_schiller_pp_pca_cv = train(trainDataC_schiller[1:(length(trainDataC_schiller)-8)], trainDataC_schiller$consensus, method="nb", trControl=train_control, tuneGrid=grid, preProcess=c("center", "scale","pca"))
- stopCluster(cl)
- nb.model_nbC_group_cv$results$Accuracy
- nb.model_nbC_group_pp_c_S_cv$results$Accuracy
- nb.model_nbC_group_pp_pca_cv$results$Accuracy
- nb.model_nbC_green_cv$results$Accuracy
- nb.model_nbC_green_pp_c_S_cv$results$Accuracy
- nb.model_nbC_green_pp_pca_cv$results$Accuracy
- nb.model_nbC_hinselmann_cv$results$Accuracy
- nb.model_nbC_hinselmann_pp_c_S_cv$results$Accuracy
- nb.model_nbC_hinselmann_pp_pca_cv$results$Accuracy
- nb.model_nbC_schiller_cv$results$Accuracy
- nb.model_nbC_schiller_pp_c_S_cv$results$Accuracy
- nb.model_nbC_schiller_pp_pca_cv$results$Accuracy
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement