Advertisement
Guest User

Untitled

a guest
Jul 28th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. #Obviously these need to be installed!
  2. library(jsonlite)
  3. library(tm)
  4. library(wordcloud)
  5.  
  6. files <- list.files('.',"*.json", recursive=T)
  7.  
  8. json <- sapply(files, fromJSON)
  9.  
  10. texts <- sapply(json, function(f){if ('subtype' %in% names(f)) f$text[is.na(f$subtype)] else f$text})
  11.  
  12. flat <- unlist(texts)
  13.  
  14. corpus <- Corpus(VectorSource(flat))
  15. corpus <- tm_map(corpus, stripWhitespace)
  16. corpus <- tm_map(corpus, tolower)
  17. corpus <- tm_map(corpus, removePunctuation)
  18. corpus <- tm_map(corpus, removeNumbers)
  19. corpus <- tm_map(corpus, removeWords, stopwords('english'))
  20. #remove words slack adds
  21. corpus1 <- tm_map(corpus, removeWords, c('UZDP', 'UULQF', 'UKZS','UCNJ', 'UUJX','UULQF'))
  22.  
  23. wordcloud(corpus1, scale=c(2.5,0.5), max.words=1000, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement