Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # ROC
- # using Logistic Regression as an example
- rm(list = ls())
- # Importing the dataset
- dataset <- read.csv(file.path(getwd(),'Data/Social_Network_Ads.csv'))
- dataset <- dataset[3:5]
- # Encoding the target feature as factor
- dataset$Purchased <- factor(dataset$Purchased, levels = c(0, 1), labels = c(0, 1))
- # Splitting the dataset into the Training set and Test set
- # install.packages('caTools')
- library(caTools)
- set.seed(123)
- split <- sample.split(dataset$Purchased, SplitRatio = 0.75)
- training_set <- subset(dataset, split == TRUE)
- test_set <- subset(dataset, split == FALSE)
- # Feature Scaling
- training_set[-3] <- scale(training_set[-3])
- test_set[-3] <- scale(test_set[-3])
- # Fitting Logistic Regression to the Training set
- classifier <- glm(formula = Purchased ~ .,
- family = binomial,
- data = training_set)
- # Predicting the Test set results
- prob_pred <- predict(classifier, type = 'response', newdata = test_set[-3])
- y_pred <- ifelse(prob_pred > 0.5, 1, 0)
- y_pred <- factor(y_pred, levels = c(0,1), labels = c(0,1))
- # Making the Confusion Matrix
- # cm = table(test_set[, 3], y_pred)
- # cm = table(test_set[, 3] > 0.5 , y_pred > 0.5) we cant use > when having factors
- cm <- as.matrix(table(Actual = test_set[, 3], Predicted = y_pred)) # create the confusion matrix
- (accuracy <- mean(y_pred == test_set$Purchased))
- # Model Evaluation
- # Using Receiver Operating Characterstic (ROC) Curve
- # install.packages("ROCR")
- library(ROCR)
- roc_pred <- prediction(prob_pred, test_set[3])
- roc_eval <- performance(roc_pred, "tpr", "fpr")
- plot(roc_eval,
- colorize = TRUE)
- # Add another curve
- # plot(perf2, add = TRUE, colorize = TRUE)
- # then, instead of using colorize = TRUE, we can use col = as.list(1:10)
- # Add lables to each curve
- text(locator(), labels = c("Logistic Regression"), cex = 0.7) # clicking on an existing graph, then click on finish.
- # coor <- (locator()) to get the coordiante of x and y axes
- # text(x = 0.5, y = 0.5, labels = c("Logistic Regression"))
- # or
- # text(coor$x, coor$y, labels = c("Logistic Regression"))
- abline(a = 0, b = 1)
- # Area Under the Curve (AUC)
- lr_auc <- performance(roc_pred, "auc")
- lr_auc <- unlist(slot(auc, "y.values"))
- lr_auc <- round(auc, 3)
- lr_auc <- paste(c("Logistic Regression "),lr_auc, sep="")
- # dt_auc <- paste(c("Decision Tree "), dt_auc, sep="")
- # legend(.6, .4, auc, title = "AUC", cex = 0.5)
- # legend("bottomright", paste(auc), col = c("green"), pch = c(3), cex = 0.5)
- # legend(0.6, 0.2, title = "AUC", c(lr_auc), col = c("green"), pch = c(3), cex = 0.5, box.col = "white", border="white")
- legend(0.6, 0.2, title = "AUC", c(lr_auc), col = c("green"), cex = 0.6, box.col = "white",
- border="white")
Add Comment
Please, Sign In to add comment