Advertisement
Guest User

Untitled

a guest
Sep 21st, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.11 KB | None | 0 0
  1. text <- "the slurry includes: attrition pellet, oxidant, amino acid and water."
  2.  
  3. corpus_text <- VCorpus(VectorSource(text))
  4. content(corpus_text[[1]])
  5.  
  6. BigramTokenizer <- function(x) NGramTokenizer(x, Weka_control(min = 1, max = 3))
  7. dtm <- DocumentTermMatrix(corpus_text, control = list(tokenize = BigramTokenizer))
  8. mat <- as.matrix(dtm)
  9. colnames(mat)
  10.  
  11. [1] "acid" "acid and" "acid and water"
  12. [4] "amino" "amino acid" "amino acid and"
  13. [7] "and" "and water" "attrition"
  14. [10] "attrition pellet" "attrition pellet oxidant" "includes"
  15. [13] "includes attrition" "includes attrition pellet" "oxidant"
  16. [16] "oxidant amino" "oxidant amino acid" "pellet"
  17. [19] "pellet oxidant" "pellet oxidant amino" "slurry"
  18. [22] "slurry includes" "slurry includes attrition" "the"
  19. [25] "the slurry" "the slurry includes" "water"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement