Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- > spam <- read_csv("~/R_Projects/spam_data.csv")
- Parsed with column specification:
- cols(
- .default = col_integer()
- )
- See spec(...) for full column specifications.
- |===========================================================================================================| 100% 423 MB
- > mutate(spam, label=as.factor(spam$label))
- # A tibble: 75,419 x 2,941
- label a2638888 abbott abby ability able about above acc0 accelerated accept accepted access accessed accompany
- <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
- 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
- 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 4 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
- 5 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
- 6 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 7 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 9 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- # ... with 75,409 more rows, and 2,926 more variables: accounting <int>, accuweather <int>, acquired <int>,
- # acquires <int>, act <int>, acting <int>, action <int>, actions <int>, activated <int>, activatian <int>,
- # activation <int>, activities <int>, activity <int>, ad <int>, add <int>, adding <int>, additional <int>, adf <int>,
- # administrator <int>, admitted <int>, adquirir <int>, adt <int>, advance <int>, advanced <int>, advantage <int>,
- # advertise <int>, advertisement <int>, advertisements <int>, advertisers <int>, advertising <int>, advice <int>,
- # advisor <int>, advisory <int>, ae <int>, aerial <int>, affected <int>, affiliate <int>, affiliated <int>, ag <int>,
- # again <int>, age <int>, agent <int>, aggresive <int>, ago <int>, agree <int>, agreed <int>, agreement <int>,
- # aids <int>, aim <int>, airs <int>, aktie <int>, aktien <int>, al <int>, album <int>, alek <int>, alert <int>,
- # alerts <int>, alfonso <int>, alice <int>, align <int>, alle <int>, allen <int>, alles <int>, allison <int>,
- # allocate <int>, almost <int>, alone <int>, already <int>, also <int>, alt <int>, alternate <int>, alternative <int>,
- # although <int>, always <int>, am9 <int>, amateure <int>, amazing <int>, ambien <int>, amd64 <int>, americans <int>,
- # amigo <int>, amount <int>, amp <int>, an <int>, analysis <int>, anatrim <int>, and <int>, andrea <int>, andrew <int>,
- # andy <int>, angela <int>, angeles <int>, animation <int>, anleger <int>, annie <int>, announced <int>,
- # announcement <int>, announcements <int>, annual <int>, anonymous <int>, ...
- > spam <- mutate(spam, label=as.factor(spam$label))
- > head(spam)
- # A tibble: 6 x 2,941
- label a2638888 abbott abby ability able about above acc0 accelerated accept accepted access accessed accompany
- <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
- 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
- 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 4 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
- 5 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
- 6 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- # ... with 2,926 more variables: accounting <int>, accuweather <int>, acquired <int>, acquires <int>, act <int>,
- # acting <int>, action <int>, actions <int>, activated <int>, activatian <int>, activation <int>, activities <int>,
- # activity <int>, ad <int>, add <int>, adding <int>, additional <int>, adf <int>, administrator <int>, admitted <int>,
- # adquirir <int>, adt <int>, advance <int>, advanced <int>, advantage <int>, advertise <int>, advertisement <int>,
- # advertisements <int>, advertisers <int>, advertising <int>, advice <int>, advisor <int>, advisory <int>, ae <int>,
- # aerial <int>, affected <int>, affiliate <int>, affiliated <int>, ag <int>, again <int>, age <int>, agent <int>,
- # aggresive <int>, ago <int>, agree <int>, agreed <int>, agreement <int>, aids <int>, aim <int>, airs <int>,
- # aktie <int>, aktien <int>, al <int>, album <int>, alek <int>, alert <int>, alerts <int>, alfonso <int>, alice <int>,
- # align <int>, alle <int>, allen <int>, alles <int>, allison <int>, allocate <int>, almost <int>, alone <int>,
- # already <int>, also <int>, alt <int>, alternate <int>, alternative <int>, although <int>, always <int>, am9 <int>,
- # amateure <int>, amazing <int>, ambien <int>, amd64 <int>, americans <int>, amigo <int>, amount <int>, amp <int>,
- # an <int>, analysis <int>, anatrim <int>, and <int>, andrea <int>, andrew <int>, andy <int>, angela <int>,
- # angeles <int>, animation <int>, anleger <int>, annie <int>, announced <int>, announcement <int>, announcements <int>,
- # annual <int>, anonymous <int>, ...
- > trainIndex <- createDataPartition(spam$label, p=0.8, list=FALSE, times=1)
- > set.seed(721)
- > trainIndex <- createDataPartition(spam$label, p=0.8, list=FALSE, times=1)
- > spamTrain <- spam[trainIndex,]
- > spamTest <- spam[-trainIndex,]
- > lr <- glmnet(label ~ ., data=spamTrain, family="binomial", na.action = na.omit)
- > predictions <- predict (lr, spamTest, type="class", na.action=na.pass, s=0.01)
- > confusionMatrix(predictions, spam$label)
- Error in table(data, reference, dnn = dnn, ...) :
- all arguments must have the same length
- > head(predictions)
- 1
- [1,] "1"
- [2,] "0"
- [3,] "1"
- [4,] "1"
- [5,] "1"
- [6,] "0"
- > predictions <- as.factor(predictions)
- > head(predictions)
- [1] 1 0 1 1 1 0
- Levels: 0 1
- > confusionMatrix(predictions, spam$label)
- Error in table(data, reference, dnn = dnn, ...) :
- all arguments must have the same length
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement