Guest User

Untitled

a guest
May 21st, 2018
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.64 KB | None | 0 0
  1. # ROC
  2. # using Logistic Regression as an example
  3.  
  4. rm(list = ls())
  5.  
  6. # Importing the dataset
  7. dataset <- read.csv(file.path(getwd(),'Data/Social_Network_Ads.csv'))
  8. dataset <- dataset[3:5]
  9.  
  10. # Encoding the target feature as factor
  11. dataset$Purchased <- factor(dataset$Purchased, levels = c(0, 1), labels = c(0, 1))
  12.  
  13. # Splitting the dataset into the Training set and Test set
  14. # install.packages('caTools')
  15. library(caTools)
  16. set.seed(123)
  17. split <- sample.split(dataset$Purchased, SplitRatio = 0.75)
  18. training_set <- subset(dataset, split == TRUE)
  19. test_set <- subset(dataset, split == FALSE)
  20.  
  21. # Feature Scaling
  22. training_set[-3] <- scale(training_set[-3])
  23. test_set[-3] <- scale(test_set[-3])
  24.  
  25. # Fitting Logistic Regression to the Training set
  26. classifier <- glm(formula = Purchased ~ .,
  27. family = binomial,
  28. data = training_set)
  29.  
  30. # Predicting the Test set results
  31. prob_pred <- predict(classifier, type = 'response', newdata = test_set[-3])
  32. y_pred <- ifelse(prob_pred > 0.5, 1, 0)
  33. y_pred <- factor(y_pred, levels = c(0,1), labels = c(0,1))
  34.  
  35. # Making the Confusion Matrix
  36. # cm = table(test_set[, 3], y_pred)
  37. # cm = table(test_set[, 3] > 0.5 , y_pred > 0.5) we cant use > when having factors
  38. cm <- as.matrix(table(Actual = test_set[, 3], Predicted = y_pred)) # create the confusion matrix
  39.  
  40. (accuracy <- mean(y_pred == test_set$Purchased))
  41.  
  42. # Model Evaluation
  43. # Using Receiver Operating Characterstic (ROC) Curve
  44. # install.packages("ROCR")
  45. library(ROCR)
  46. roc_pred <- prediction(prob_pred, test_set[3])
  47. roc_eval <- performance(roc_pred, "tpr", "fpr")
  48. plot(roc_eval,
  49. colorize = TRUE)
  50.  
  51. # Add another curve
  52. # plot(perf2, add = TRUE, colorize = TRUE)
  53. # then, instead of using colorize = TRUE, we can use col = as.list(1:10)
  54.  
  55. # Add lables to each curve
  56. text(locator(), labels = c("Logistic Regression"), cex = 0.7) # clicking on an existing graph, then click on finish.
  57. # coor <- (locator()) to get the coordiante of x and y axes
  58. # text(x = 0.5, y = 0.5, labels = c("Logistic Regression"))
  59. # or
  60. # text(coor$x, coor$y, labels = c("Logistic Regression"))
  61.  
  62. abline(a = 0, b = 1)
  63.  
  64. # Area Under the Curve (AUC)
  65. lr_auc <- performance(roc_pred, "auc")
  66. lr_auc <- unlist(slot(auc, "y.values"))
  67. lr_auc <- round(auc, 3)
  68. lr_auc <- paste(c("Logistic Regression "),lr_auc, sep="")
  69. # dt_auc <- paste(c("Decision Tree "), dt_auc, sep="")
  70.  
  71. # legend(.6, .4, auc, title = "AUC", cex = 0.5)
  72. # legend("bottomright", paste(auc), col = c("green"), pch = c(3), cex = 0.5)
  73. # legend(0.6, 0.2, title = "AUC", c(lr_auc), col = c("green"), pch = c(3), cex = 0.5, box.col = "white", border="white")
  74. legend(0.6, 0.2, title = "AUC", c(lr_auc), col = c("green"), cex = 0.6, box.col = "white",
  75. border="white")
Add Comment
Please, Sign In to add comment