Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.67 KB | None | 0 0
  1. > spam <- read_csv("~/R_Projects/spam_data.csv")
  2. Parsed with column specification:
  3. cols(
  4. .default = col_integer()
  5. )
  6. See spec(...) for full column specifications.
  7. |===========================================================================================================| 100% 423 MB
  8. > mutate(spam, label=as.factor(spam$label))
  9. # A tibble: 75,419 x 2,941
  10. label a2638888 abbott abby ability able about above acc0 accelerated accept accepted access accessed accompany
  11. <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
  12. 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  13. 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
  14. 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  15. 4 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
  16. 5 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
  17. 6 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  18. 7 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  19. 8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  20. 9 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  21. 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  22. # ... with 75,409 more rows, and 2,926 more variables: accounting <int>, accuweather <int>, acquired <int>,
  23. # acquires <int>, act <int>, acting <int>, action <int>, actions <int>, activated <int>, activatian <int>,
  24. # activation <int>, activities <int>, activity <int>, ad <int>, add <int>, adding <int>, additional <int>, adf <int>,
  25. # administrator <int>, admitted <int>, adquirir <int>, adt <int>, advance <int>, advanced <int>, advantage <int>,
  26. # advertise <int>, advertisement <int>, advertisements <int>, advertisers <int>, advertising <int>, advice <int>,
  27. # advisor <int>, advisory <int>, ae <int>, aerial <int>, affected <int>, affiliate <int>, affiliated <int>, ag <int>,
  28. # again <int>, age <int>, agent <int>, aggresive <int>, ago <int>, agree <int>, agreed <int>, agreement <int>,
  29. # aids <int>, aim <int>, airs <int>, aktie <int>, aktien <int>, al <int>, album <int>, alek <int>, alert <int>,
  30. # alerts <int>, alfonso <int>, alice <int>, align <int>, alle <int>, allen <int>, alles <int>, allison <int>,
  31. # allocate <int>, almost <int>, alone <int>, already <int>, also <int>, alt <int>, alternate <int>, alternative <int>,
  32. # although <int>, always <int>, am9 <int>, amateure <int>, amazing <int>, ambien <int>, amd64 <int>, americans <int>,
  33. # amigo <int>, amount <int>, amp <int>, an <int>, analysis <int>, anatrim <int>, and <int>, andrea <int>, andrew <int>,
  34. # andy <int>, angela <int>, angeles <int>, animation <int>, anleger <int>, annie <int>, announced <int>,
  35. # announcement <int>, announcements <int>, annual <int>, anonymous <int>, ...
  36. > spam <- mutate(spam, label=as.factor(spam$label))
  37. > head(spam)
  38. # A tibble: 6 x 2,941
  39. label a2638888 abbott abby ability able about above acc0 accelerated accept accepted access accessed accompany
  40. <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
  41. 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  42. 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
  43. 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  44. 4 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
  45. 5 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
  46. 6 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  47. # ... with 2,926 more variables: accounting <int>, accuweather <int>, acquired <int>, acquires <int>, act <int>,
  48. # acting <int>, action <int>, actions <int>, activated <int>, activatian <int>, activation <int>, activities <int>,
  49. # activity <int>, ad <int>, add <int>, adding <int>, additional <int>, adf <int>, administrator <int>, admitted <int>,
  50. # adquirir <int>, adt <int>, advance <int>, advanced <int>, advantage <int>, advertise <int>, advertisement <int>,
  51. # advertisements <int>, advertisers <int>, advertising <int>, advice <int>, advisor <int>, advisory <int>, ae <int>,
  52. # aerial <int>, affected <int>, affiliate <int>, affiliated <int>, ag <int>, again <int>, age <int>, agent <int>,
  53. # aggresive <int>, ago <int>, agree <int>, agreed <int>, agreement <int>, aids <int>, aim <int>, airs <int>,
  54. # aktie <int>, aktien <int>, al <int>, album <int>, alek <int>, alert <int>, alerts <int>, alfonso <int>, alice <int>,
  55. # align <int>, alle <int>, allen <int>, alles <int>, allison <int>, allocate <int>, almost <int>, alone <int>,
  56. # already <int>, also <int>, alt <int>, alternate <int>, alternative <int>, although <int>, always <int>, am9 <int>,
  57. # amateure <int>, amazing <int>, ambien <int>, amd64 <int>, americans <int>, amigo <int>, amount <int>, amp <int>,
  58. # an <int>, analysis <int>, anatrim <int>, and <int>, andrea <int>, andrew <int>, andy <int>, angela <int>,
  59. # angeles <int>, animation <int>, anleger <int>, annie <int>, announced <int>, announcement <int>, announcements <int>,
  60. # annual <int>, anonymous <int>, ...
  61. > trainIndex <- createDataPartition(spam$label, p=0.8, list=FALSE, times=1)
  62. > set.seed(721)
  63. > trainIndex <- createDataPartition(spam$label, p=0.8, list=FALSE, times=1)
  64. > spamTrain <- spam[trainIndex,]
  65. > spamTest <- spam[-trainIndex,]
  66. > lr <- glmnet(label ~ ., data=spamTrain, family="binomial", na.action = na.omit)
  67. > predictions <- predict (lr, spamTest, type="class", na.action=na.pass, s=0.01)
  68. > confusionMatrix(predictions, spam$label)
  69. Error in table(data, reference, dnn = dnn, ...) :
  70. all arguments must have the same length
  71. > head(predictions)
  72. 1
  73. [1,] "1"
  74. [2,] "0"
  75. [3,] "1"
  76. [4,] "1"
  77. [5,] "1"
  78. [6,] "0"
  79. > predictions <- as.factor(predictions)
  80. > head(predictions)
  81. [1] 1 0 1 1 1 0
  82. Levels: 0 1
  83. > confusionMatrix(predictions, spam$label)
  84. Error in table(data, reference, dnn = dnn, ...) :
  85. all arguments must have the same length
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement