Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- text <- "the slurry includes: attrition pellet, oxidant, amino acid and water."
- corpus_text <- VCorpus(VectorSource(text))
- content(corpus_text[[1]])
- BigramTokenizer <- function(x) NGramTokenizer(x, Weka_control(min = 1, max = 3))
- dtm <- DocumentTermMatrix(corpus_text, control = list(tokenize = BigramTokenizer))
- mat <- as.matrix(dtm)
- colnames(mat)
- [1] "acid" "acid and" "acid and water"
- [4] "amino" "amino acid" "amino acid and"
- [7] "and" "and water" "attrition"
- [10] "attrition pellet" "attrition pellet oxidant" "includes"
- [13] "includes attrition" "includes attrition pellet" "oxidant"
- [16] "oxidant amino" "oxidant amino acid" "pellet"
- [19] "pellet oxidant" "pellet oxidant amino" "slurry"
- [22] "slurry includes" "slurry includes attrition" "the"
- [25] "the slurry" "the slurry includes" "water"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement