Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library("RWeka")
- library("tm")
- data("crude")
- BigramTokenizer <- function(x) NGramTokenizer(x, Weka_control(min = 2, max = 2))
- tdm <- TermDocumentMatrix(crude, control = list(tokenize = BigramTokenizer))
- inspect(tdm[340:345,1:10])
- plot(tdm, terms = findFreqTerms(tdm, lowfreq = 2)[1:50], corThreshold = 0.5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement