Guest User

Untitled

a guest
May 21st, 2018
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.30 KB | None | 0 0
  1. # k-Fold Cross Validation
  2. # Used to have a better evaluation for our classifier performance
  3.  
  4. rm(list = ls())
  5.  
  6. # Importing the dataset
  7. dataset <- read.csv(file.path(getwd(),'Data/Social_Network_Ads.csv'))
  8. dataset = dataset[3:5]
  9.  
  10. # Encoding the target feature as factor
  11. dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
  12.  
  13. # Splitting the dataset into the Training set and Test set
  14. # install.packages('caTools')
  15. library(caTools)
  16. set.seed(123)
  17. split = sample.split(dataset$Purchased, SplitRatio = 0.75)
  18. training_set = subset(dataset, split == TRUE)
  19. test_set = subset(dataset, split == FALSE)
  20.  
  21. # Feature Scaling
  22. training_set[-3] = scale(training_set[-3])
  23. test_set[-3] = scale(test_set[-3])
  24.  
  25. # Fitting Kernel SVM to the Training set
  26. # install.packages('e1071')
  27. library(e1071)
  28. classifier = svm(formula = Purchased ~ .,
  29. data = training_set,
  30. type = 'C-classification',
  31. kernel = 'radial')
  32.  
  33. # Predicting the Test set results
  34. y_pred = predict(classifier, newdata = test_set[-3])
  35.  
  36.  
  37. # Making the Confusion Matrix
  38. cm <- as.matrix(table(Actual = test_set[, 3], Predicted = y_pred)) # create the confusion matrix
  39.  
  40. # Calculating accuracy
  41. # accuracy = sum(diag(cm)) / sum(cm)
  42. (accuracy <- mean(y_pred == test_set$Purchased))
  43.  
  44. # Model Evaluation
  45. # Applying k-Fold Cross Validation to evaluate the classifier performance
  46. # install.packages('caret')
  47. library(caret)
  48. folds = createFolds(training_set$Purchased, k = 10)
  49. cv = lapply(folds, function(x) {
  50. training_fold = training_set[-x, ]
  51. test_fold = training_set[x, ]
  52. classifier = svm(formula = Purchased ~ .,
  53. data = training_fold,
  54. type = 'C-classification',
  55. kernel = 'radial')
  56. y_pred = predict(classifier, newdata = test_fold[-3])
  57. cm = table(test_fold[, 3], y_pred)
  58. #accuracy = (cm[1,1] + cm[2,2]) / (cm[1,1] + cm[2,2] + cm[1,2] + cm[2,1])
  59. accuracy = sum(diag(cm)) / sum(cm)
  60. return(accuracy)
  61. })
  62. accuracy_k_folds <- mean(as.numeric(cv))
  63.  
  64. # Visualising the Training set results
  65. library(ElemStatLearn)
  66. set = training_set
  67. X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
  68. X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
  69. grid_set = expand.grid(X1, X2)
  70. colnames(grid_set) = c('Age', 'EstimatedSalary')
  71. y_grid = predict(classifier, newdata = grid_set)
  72. plot(set[, -3],
  73. main = 'Kernel SVM (Training set)',
  74. xlab = 'Age', ylab = 'Estimated Salary',
  75. xlim = range(X1), ylim = range(X2))
  76. contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
  77. points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
  78. points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
  79.  
  80. # Visualising the Test set results
  81. library(ElemStatLearn)
  82. set = test_set
  83. X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
  84. X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
  85. grid_set = expand.grid(X1, X2)
  86. colnames(grid_set) = c('Age', 'EstimatedSalary')
  87. y_grid = predict(classifier, newdata = grid_set)
  88. plot(set[, -3], main = 'Kernel SVM (Test set)',
  89. xlab = 'Age', ylab = 'Estimated Salary',
  90. xlim = range(X1), ylim = range(X2))
  91. contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
  92. points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
  93. points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
Add Comment
Please, Sign In to add comment