Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- outcome <- c(1,0,0,1,1)
- string <- c('I love pasta','hello world', '1+1 = 2','pasta madness', 'pizza madness')
- df = df=data.frame(outcome,string)
- > df
- outcome string
- 1 1 I love pasta
- 2 0 hello world
- 3 0 1+1 = 2
- 4 1 pasta madness
- 5 1 pizza madness
- outcome <- c(1,0,0,1,1)
- string <- c('I love pasta','hello world', '1+1 = 2','pasta madness', 'pizza madness')
- df <- data.frame(outcome=factor(outcome,levels=c(0,1)),string, stringsAsFactors=FALSE)
- library(dplyr)
- library(tidyr)
- inp <- df %>% mutate(string=strsplit(string,split=" ")) %>% unnest(string)
- ## outcome string
- ##1 1 I
- ##2 1 love
- ##3 1 pasta
- ##4 0 hello
- ##5 0 world
- ##6 0 1+1
- ##7 0 =
- ##8 0 2
- ##9 1 pasta
- ##10 1 madness
- ##11 1 pizza
- ##12 1 madness
- library(randomForest)
- mm <- model.matrix(outcome~string,inp)
- rf <- randomForest(mm, inp$outcome, importance=TRUE)
- imp <- importance(rf)
- ## 0 1 MeanDecreaseAccuracy MeanDecreaseGini
- ##(Intercept) 0.000000 0.000000 0.000000 0.0000000
- ##string1+1 0.000000 0.000000 0.000000 0.3802400
- ##string2 0.000000 0.000000 0.000000 0.4514319
- ##stringhello 0.000000 0.000000 0.000000 0.4152465
- ##stringI 0.000000 0.000000 0.000000 0.2947108
- ##stringlove 0.000000 0.000000 0.000000 0.2944955
- ##stringmadness 4.811252 5.449195 5.610477 0.5733814
- ##stringpasta 4.759957 5.281133 5.368852 0.6651675
- ##stringpizza 0.000000 0.000000 0.000000 0.3025495
- ##stringworld 0.000000 0.000000 0.000000 0.4183821
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement