Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '%nin%' <- Negate('%in%')
- library(tm)
- library(wordcloud2)
- library(dplyr)
- library(stringi)
- docs <- Corpus(VectorSource(df$stripped_text))
- dtm <- TermDocumentMatrix(docs)
- freq <- slam::row_sums(dtm)
- #freq <- freq[order(-freq)]
- word <- names(freq)
- df <- data.frame(word,
- freq,
- row.names = NULL)
- my_stopwords <- c(stopwords('pt'),'vc','vcs','vlw','flw','ta','pra','q','n','ok','okay')
- my_stopwords <- stri_trans_general(my_stopwords, "Latin-ASCII") #remover acentuações
- FreqMat <- df %>%
- filter(word %nin% my_stopwords) %>%
- filter(freq > 50)
- wordcloud2(FreqMat)
Add Comment
Please, Sign In to add comment