Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Support Vector Machine (SVM)
- rm(list = ls())
- # Importing the dataset
- dataset <- read.csv(file.path(getwd(),'Data/Social_Network_Ads.csv'))
- dataset = dataset[3:5]
- # Encoding the target feature as factor
- dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
- # Splitting the dataset into the Training set and Test set
- # install.packages('caTools')
- library(caTools)
- set.seed(123)
- split = sample.split(dataset$Purchased, SplitRatio = 0.75)
- training_set = subset(dataset, split == TRUE)
- test_set = subset(dataset, split == FALSE)
- # Feature Scaling
- training_set[-3] = scale(training_set[-3])
- test_set[-3] = scale(test_set[-3])
- # Fitting SVM to the Training set
- # install.packages('e1071')
- library(e1071)
- classifier = svm(formula = Purchased ~ .,
- data = training_set,
- type = 'C-classification',
- kernel = 'linear')
- # Predicting the Test set results
- y_pred = predict(classifier, newdata = test_set[-3])
- # Making the Confusion Matrix
- cm <- as.matrix(table(Actual = test_set[, 3], Predicted = y_pred)) # create the confusion matrix
- # Calculating accuracy
- # accuracy = sum(diag(cm)) / sum(cm)
- (accuracy <- mean(y_pred == test_set$Purchased))
- # Model Evaluation
- # Applying k-Fold Cross Validation
- # install.packages('caret')
- library(caret)
- folds = createFolds(training_set$Purchased, k = 10)
- cv = lapply(folds, function(x) {
- training_fold = training_set[-x, ]
- test_fold = training_set[x, ]
- classifier = svm(formula = Purchased ~ .,
- data = training_fold,
- type = 'C-classification',
- kernel = 'linear')
- y_pred = predict(classifier, newdata = test_fold[-3])
- cm = table(test_fold[, 3], y_pred)
- #accuracy = (cm[1,1] + cm[2,2]) / (cm[1,1] + cm[2,2] + cm[1,2] + cm[2,1])
- accuracy = sum(diag(cm)) / sum(cm)
- return(accuracy)
- })
- accuracy_k_folds <- mean(as.numeric(cv))
- # Applying Grid Search to find the best parameters
- # install.packages('caret')
- library(caret)
- classifier = train(form = Purchased ~ ., data = training_set, method = 'svmLinear')
- classifier
- classifier$bestTune
- # Visualising the Training set results
- library(ElemStatLearn)
- set = training_set
- X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
- X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
- grid_set = expand.grid(X1, X2)
- colnames(grid_set) = c('Age', 'EstimatedSalary')
- y_grid = predict(classifier, newdata = grid_set)
- plot(set[, -3],
- main = 'SVM (Training set)',
- xlab = 'Age', ylab = 'Estimated Salary',
- xlim = range(X1), ylim = range(X2))
- contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
- points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
- points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
- # Visualising the Test set results
- library(ElemStatLearn)
- set = test_set
- X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
- X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
- grid_set = expand.grid(X1, X2)
- colnames(grid_set) = c('Age', 'EstimatedSalary')
- y_grid = predict(classifier, newdata = grid_set)
- plot(set[, -3], main = 'SVM (Test set)',
- xlab = 'Age', ylab = 'Estimated Salary',
- xlim = range(X1), ylim = range(X2))
- contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
- points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
- points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
Add Comment
Please, Sign In to add comment