Guest User

Untitled

a guest
Mar 22nd, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.35 KB | None | 0 0
  1. cname <- file.path("/home/dos/Desktop/blog/R")
  2. #install.packages("tm")
  3. library(tm)
  4. docs <- VCorpus(DirSource(cname))
  5. docs <- tm_map(docs,removePunctuation)
  6. docs <- tm_map(docs, removeNumbers)
  7. docs <- tm_map(docs, tolower)
  8. docs <- tm_map(docs, removeWords, stopwords("english"))
  9. docs <- tm_map(docs, removeWords, c("shreyas", "waghmare"))
  10. docs <- tm_map(docs, stripWhitespace)
  11. docs <- tm_map(docs, PlainTextDocument)
  12. dtm <- DocumentTermMatrix(docs)
  13. tdm <- TermDocumentMatrix(docs)
  14. freq <- colSums(as.matrix(dtm))
  15. length(freq)
  16. ord <- order(freq)
  17. m <- as.matrix(dtm)
  18. dim(m)
  19. write.csv(m, file="DocumentTermMatrix.csv")
  20. dtms <- removeSparseTerms(dtm, 0.1)
  21. head(table(freq), 20)
  22. freq
  23. tail(table(freq), 20)
  24. freq
  25. freq <- colSums(as.matrix(dtms))
  26. freq
  27. #install.packages("ggplot2")
  28. library(ggplot2)
  29. wf <- data.frame(word=names(freq), freq=freq)
  30. p <- ggplot(subset(wf, freq>50), aes(word, freq))
  31. p <- p + geom_bar(stat="identity")
  32. p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
  33. p
  34. findAssocs(dtm, c("shreyas" , "waghmare"), corlimit=0.85)
  35. findAssocs(dtms, "think", corlimit=0.70)
  36. #install.packages("wordcloud")
  37. library(wordcloud)
  38. dtms <- removeSparseTerms(dtm, 0.15) # Prepare the data (max 15% empty space)
  39. freq <- colSums(as.matrix(dtm))
  40. dark2 <- brewer.pal(6, "Dark2")
  41. wordcloud(names(freq), freq, max.words=20, rot.per=0.5, colors=dark2)
Add Comment
Please, Sign In to add comment