Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Obviously these need to be installed!
- library(jsonlite)
- library(tm)
- library(wordcloud)
- files <- list.files('.',"*.json", recursive=T)
- json <- sapply(files, fromJSON)
- texts <- sapply(json, function(f){if ('subtype' %in% names(f)) f$text[is.na(f$subtype)] else f$text})
- flat <- unlist(texts)
- corpus <- Corpus(VectorSource(flat))
- corpus <- tm_map(corpus, stripWhitespace)
- corpus <- tm_map(corpus, tolower)
- corpus <- tm_map(corpus, removePunctuation)
- corpus <- tm_map(corpus, removeNumbers)
- corpus <- tm_map(corpus, removeWords, stopwords('english'))
- #remove words slack adds
- corpus1 <- tm_map(corpus, removeWords, c('UZDP', 'UULQF', 'UKZS','UCNJ', 'UUJX','UULQF'))
- wordcloud(corpus1, scale=c(2.5,0.5), max.words=1000, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement