Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidytext)
- library(dplyr)
- library(tidyr)
- library(stylo)
- library(ggplot2)
- txt1 <- c("this is a sample document about Computer Science", "Not all sample documents are about science, they can be about art too", "and sometimes they are just useless")
- names(txt1) <- c("Doc1", "Doc2", "Doc3")
- df1 <- data.frame(doc = names(txt1), texts = txt1, stringsAsFactors = FALSE)
- a1 <- df1 %>% unnest_tokens(output = word, input = texts) %>%
- group_by(doc, word) %>%
- summarise(totals = n()) %>%
- ungroup %>%
- spread(word, totals, fill = 0) %>%
- select(-doc) %>% as.matrix %>%
- scale(center = TRUE, scale = TRUE) %>%
- dist.cosine %>%
- cmdscale %>%
- data.frame
- a1$name <- 1:nrow(a1)
- ggplot(data = a1, aes(x = X1, y = X2)) +
- geom_text(aes(label = name))
Add Comment
Please, Sign In to add comment