Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cname <- file.path("/home/dos/Desktop/blog/R")
- #install.packages("tm")
- library(tm)
- docs <- VCorpus(DirSource(cname))
- docs <- tm_map(docs,removePunctuation)
- docs <- tm_map(docs, removeNumbers)
- docs <- tm_map(docs, tolower)
- docs <- tm_map(docs, removeWords, stopwords("english"))
- docs <- tm_map(docs, removeWords, c("shreyas", "waghmare"))
- docs <- tm_map(docs, stripWhitespace)
- docs <- tm_map(docs, PlainTextDocument)
- dtm <- DocumentTermMatrix(docs)
- tdm <- TermDocumentMatrix(docs)
- freq <- colSums(as.matrix(dtm))
- length(freq)
- ord <- order(freq)
- m <- as.matrix(dtm)
- dim(m)
- write.csv(m, file="DocumentTermMatrix.csv")
- dtms <- removeSparseTerms(dtm, 0.1)
- head(table(freq), 20)
- freq
- tail(table(freq), 20)
- freq
- freq <- colSums(as.matrix(dtms))
- freq
- #install.packages("ggplot2")
- library(ggplot2)
- wf <- data.frame(word=names(freq), freq=freq)
- p <- ggplot(subset(wf, freq>50), aes(word, freq))
- p <- p + geom_bar(stat="identity")
- p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
- p
- findAssocs(dtm, c("shreyas" , "waghmare"), corlimit=0.85)
- findAssocs(dtms, "think", corlimit=0.70)
- #install.packages("wordcloud")
- library(wordcloud)
- dtms <- removeSparseTerms(dtm, 0.15) # Prepare the data (max 15% empty space)
- freq <- colSums(as.matrix(dtm))
- dark2 <- brewer.pal(6, "Dark2")
- wordcloud(names(freq), freq, max.words=20, rot.per=0.5, colors=dark2)
Add Comment
Please, Sign In to add comment