Guest User

Untitled

a guest
Dec 15th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.74 KB | None | 0 0
  1. library(tidytext)
  2. library(dplyr)
  3. library(tidyr)
  4. library(stylo)
  5. library(ggplot2)
  6.  
  7. txt1 <- c("this is a sample document about Computer Science", "Not all sample documents are about science, they can be about art too", "and sometimes they are just useless")
  8. names(txt1) <- c("Doc1", "Doc2", "Doc3")
  9. df1 <- data.frame(doc = names(txt1), texts = txt1, stringsAsFactors = FALSE)
  10.  
  11.  
  12.  
  13. a1 <- df1 %>% unnest_tokens(output = word, input = texts) %>%
  14. group_by(doc, word) %>%
  15. summarise(totals = n()) %>%
  16. ungroup %>%
  17. spread(word, totals, fill = 0) %>%
  18. select(-doc) %>% as.matrix %>%
  19. scale(center = TRUE, scale = TRUE) %>%
  20. dist.cosine %>%
  21. cmdscale %>%
  22. data.frame
  23.  
  24. a1$name <- 1:nrow(a1)
  25.  
  26. ggplot(data = a1, aes(x = X1, y = X2)) +
  27. geom_text(aes(label = name))
Add Comment
Please, Sign In to add comment