Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- demo=read.csv("demo.csv")
- corpus=Corpus(VectorSource(demo))
- corpus <- tm_map(corpus, removePunctuation)
- corpus <- tm_map(corpus,removeNumbers)
- corpus <- tm_map(corpus, stripWhitespace)
- corpus <- tm_map(corpus,function(word) {gsub("[A-Za-z0-9]","",word)})
- corpus <- tm_map(corpus, segmentCN, nature = TRUE)
- #demo_source=VectorSource(demo)
- stopwords=c(stopwordsCN(c("問題","請問","你好","您好","妳好")),stopwords("english"),"\n","")
- corpus=tm_map(corpus,removeWords,stopwords)
- strwrap(corpus[[3]],width=100)
- corpus=tm_map(corpus,PlainTextDocument) #把資料轉成PlainTextDocument形式
- tdm=TermDocumentMatrix(corpus,control=list(wordLengths=c(2,Inf)))
- m1 <- as.matrix(tdm)
- v <- sort(rowSums(m1), decreasing = TRUE)
- d <- data.frame(word = names(v), freq = v)
- wordcloud(d$word, d$freq, min.freq = 2, random.order = F, ordered.colors = F,
- colors = rainbow(length(row.names(m1))))
- head(v,10)
- #結果如下
- # 嗎\n 現貨 謝謝 購買 嗎\n謝謝 顆\n 全新 老闆 硬碟 還\n
- # 15 10 8 8 4 4 3 3 3 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement