Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rm(list=ls())
- source('dataPartitionUtil.R')
- source('GAUtil.R')
- load("top_univariate_features.RData")
- library(GA)
- library(reshape2)
- library(ggplot2)
- library(e1071)
- library(foreach)
- library(doMC)
- library(parallel)
- library(cvTools)
- registerDoMC(cores = detectCores())
- predict.SVM <- function(){
- fitness <- function(string){
- print(paste("Executando função de ajuste...", i))
- i <<- i + 1
- predict.with.SVM <- function(fs.train, fs.test){
- inc <- which(string == 1)
- features.subset <- top.features.vector[inc]
- train.fs.data <- fs.data[fs.train, features.subset]
- train.fs.data <- transform(train.fs.data,
- Label = fs.data$Label[fs.train])
- test.fs.data <- fs.data[fs.test, features.subset]
- test.fs.data <- transform(test.fs.data ,
- Label = fs.data$Label[fs.test])
- model <- svm(Label ~.,
- data = train.fs.data)
- result <- predict(model, newdata = test.fs.data)
- mean(result == test.fs.data$Label)
- }
- folds <- cvFolds(nrow(fs.data), K = 10)
- prediction <- foreach(i = 1:10,
- .combine = 'cbind',
- .packages = 'e1071',
- .export = c('top.features.vector',
- 'fs.data', 'down.sample')) %dopar% {
- predict.with.SVM(folds$subsets[folds$which != i],
- folds$subsets[folds$which == i])
- }
- mean(prediction)
- }
- train <- down.sample(breast.cancer.data,
- breast.cancer.data$Label,
- 30)
- fs.data <- breast.cancer.data[train,]
- remaining.data <- breast.cancer.data[-train,]
- test <- down.sample(remaining.data,
- remaining.data$Label,
- 10)
- test.data <- remaining.data[test,]
- i <- 0
- GA <- ga("binary",
- fitness = fitness,
- nBits = length(top.features.vector),
- names = top.features.vector,
- monitor = plot,
- maxiter = 300,
- population = best.first,
- popSize = 100
- )
- plot(GA)
- solution.SVM <- top.features.vector[GA@solution[1, ] == 1]#[1:64]
- predict.breast.cancer <- function(){
- train.data <- fs.data[, solution.SVM]
- train.data <- transform(train.data,
- Label = fs.data$Label)
- vld.data <- test.data[, solution.SVM]
- vld.data <- transform(test.data ,
- Label = test.data$Label)
- model <- svm(Label ~.,
- data = train.data)
- result <- predict(model, newdata = vld.data)
- c1.test.indexes <- vld.data$Label == 1
- c2.test.indexes <- vld.data$Label == 0
- c2.acc <- mean(result[c2.test.indexes] == vld.data$Label[c2.test.indexes])
- c1.acc <- mean(result[c1.test.indexes] == vld.data$Label[c1.test.indexes])
- total.acc <- mean(result == vld.data$Label)
- c(c1.acc, c2.acc, total.acc)
- }
- predictions <- predict.breast.cancer()
- names(predictions) <- c("Classe 1 (y = 1)", "Classe 2 (y = 0)", "Total")
- list(top.features = solution.SVM,
- predictions = predictions)
- }
- final.result <- lapply(1:10,
- function(i){
- predict.SVM()
- })
- acc <- matrix(nrow = 0, ncol = 3)
- for(i in 1:length(final.result)){
- acc <- as.matrix(rbind(acc, final.result[[i]]$predictions))
- }
- melted <- melt(acc)
- boxplot <- ggplot(melted, aes(x = Var2, y = value)) +
- facet_wrap(~Var2, scale="free_x") +
- geom_boxplot() +
- ggtitle("Performance using SVM applied to best subset of features according to GA with SVM") +
- xlab("Classes") +
- ylab("Accuracy") +
- theme(title = element_text(size = 20))
- print(boxplot)
- ggsave(plot = boxplot,
- filename = "GA_SVM_rene_no_pls.pdf",
- height = 10,
- width = 20)
- save.image("SVM_rene.RData")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement