Advertisement
Guest User

Untitled

a guest
Oct 21st, 2016
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.78 KB | None | 0 0
  1. outcome <- c(1,0,0,1,1)
  2. string <- c('I love pasta','hello world', '1+1 = 2','pasta madness', 'pizza madness')
  3.  
  4. df = df=data.frame(outcome,string)
  5.  
  6.  
  7. > df
  8. outcome string
  9. 1 1 I love pasta
  10. 2 0 hello world
  11. 3 0 1+1 = 2
  12. 4 1 pasta madness
  13. 5 1 pizza madness
  14.  
  15. outcome <- c(1,0,0,1,1)
  16. string <- c('I love pasta','hello world', '1+1 = 2','pasta madness', 'pizza madness')
  17.  
  18. df <- data.frame(outcome=factor(outcome,levels=c(0,1)),string, stringsAsFactors=FALSE)
  19.  
  20. library(dplyr)
  21. library(tidyr)
  22. inp <- df %>% mutate(string=strsplit(string,split=" ")) %>% unnest(string)
  23. ## outcome string
  24. ##1 1 I
  25. ##2 1 love
  26. ##3 1 pasta
  27. ##4 0 hello
  28. ##5 0 world
  29. ##6 0 1+1
  30. ##7 0 =
  31. ##8 0 2
  32. ##9 1 pasta
  33. ##10 1 madness
  34. ##11 1 pizza
  35. ##12 1 madness
  36.  
  37. library(randomForest)
  38. mm <- model.matrix(outcome~string,inp)
  39. rf <- randomForest(mm, inp$outcome, importance=TRUE)
  40. imp <- importance(rf)
  41. ## 0 1 MeanDecreaseAccuracy MeanDecreaseGini
  42. ##(Intercept) 0.000000 0.000000 0.000000 0.0000000
  43. ##string1+1 0.000000 0.000000 0.000000 0.3802400
  44. ##string2 0.000000 0.000000 0.000000 0.4514319
  45. ##stringhello 0.000000 0.000000 0.000000 0.4152465
  46. ##stringI 0.000000 0.000000 0.000000 0.2947108
  47. ##stringlove 0.000000 0.000000 0.000000 0.2944955
  48. ##stringmadness 4.811252 5.449195 5.610477 0.5733814
  49. ##stringpasta 4.759957 5.281133 5.368852 0.6651675
  50. ##stringpizza 0.000000 0.000000 0.000000 0.3025495
  51. ##stringworld 0.000000 0.000000 0.000000 0.4183821
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement