Advertisement
Guest User

Untitled

a guest
May 27th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.04 KB | None | 0 0
  1. install.packages('janeaustenr')
  2. install.packages('dplyr')
  3. install.packages('stringr')
  4. install.packages('tidytext')
  5. install.packages('tidyr')
  6. install.packages('ggplot2')
  7. install.packages('wordcloud')
  8. install.packages('reshape2')
  9.  
  10. library(janeaustenr)
  11. library(dplyr)
  12. library(stringr)
  13. library(tidytext)
  14. library(tidyr)
  15. library(ggplot2)
  16. library(wordcloud)
  17. library(reshape2)
  18.  
  19. original_books <- austen_books() %>%
  20. group_by(book) %>%
  21. mutate(line = row_number(),
  22. chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
  23. ignore_case = TRUE)))) %>%
  24. ungroup()
  25.  
  26. original_books
  27.  
  28. tidy_books <- original_books %>%
  29. unnest_tokens(word, text)
  30.  
  31. tidy_books
  32.  
  33. cleaned_books <- tidy_books %>%
  34. anti_join(get_stopwords())
  35.  
  36. cleaned_books %>%
  37. count(word, sort = TRUE)
  38.  
  39. nrcjoy <- get_sentiments("nrc") %>%
  40. filter(sentiment == "joy")
  41.  
  42. tidy_books %>%
  43. filter(book == "Emma") %>%
  44. semi_join(nrcjoy) %>%
  45. count(word, sort = TRUE)
  46.  
  47. bing <- get_sentiments("bing")
  48.  
  49. janeaustensentiment <- tidy_books %>%
  50. inner_join(bing) %>%
  51. count(book, index = line %/% 80, sentiment) %>%
  52. spread(sentiment, n, fill = 0) %>%
  53. mutate(sentiment = positive - negative)
  54.  
  55. ##################
  56.  
  57. ggplot(janeaustensentiment, aes(index, sentiment, fill = book)) +
  58. geom_bar(stat = "identity", show.legend = FALSE) +
  59. facet_wrap(~book, ncol = 2, scales = "free_x")
  60.  
  61.  
  62. #################
  63.  
  64. bing_word_counts <- tidy_books %>%
  65. inner_join(bing) %>%
  66. count(word, sentiment, sort = TRUE) %>%
  67. ungroup()
  68.  
  69. bing_word_counts
  70.  
  71. #################
  72.  
  73. bing_word_counts %>%
  74. filter(n > 150) %>%
  75. mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
  76. mutate(word = reorder(word, n)) %>%
  77. ggplot(aes(word, n, fill = sentiment)) +
  78. geom_col() +
  79. coord_flip() +
  80. labs(y = "Contribution to sentiment")
  81.  
  82. #################
  83.  
  84.  
  85.  
  86. cleaned_books %>%
  87. count(word) %>%
  88. with(wordcloud(word, n, max.words = 100))
  89.  
  90. #############
  91.  
  92. tidy_books %>%
  93. inner_join(bing) %>%
  94. count(word, sentiment, sort = TRUE) %>%
  95. acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  96. comparison.cloud(colors = c("#F8766D", "#00BFC4"),
  97. max.words = 100)
  98.  
  99.  
  100. ###############
  101. #############
  102.  
  103. PandP_sentences <- data_frame(text = prideprejudice) %>%
  104. unnest_tokens(sentence, text, token = "sentences")
  105.  
  106. PandP_sentences$sentence[2]
  107.  
  108. austen_chapters <- austen_books() %>%
  109. group_by(book) %>%
  110. unnest_tokens(chapter, text, token = "regex", pattern = "Chapter|CHAPTER [\\dIVXLC]") %>%
  111. ungroup()
  112.  
  113. austen_chapters %>%
  114. group_by(book) %>%
  115. summarise(chapters = n())
  116.  
  117.  
  118. bingnegative <- get_sentiments("bing") %>%
  119. filter(sentiment == "negative")
  120.  
  121. wordcounts <- tidy_books %>%
  122. group_by(book, chapter) %>%
  123. summarize(words = n())
  124.  
  125. tidy_books %>%
  126. semi_join(bingnegative) %>%
  127. group_by(book, chapter) %>%
  128. summarize(negativewords = n()) %>%
  129. left_join(wordcounts, by = c("book", "chapter")) %>%
  130. mutate(ratio = negativewords/words) %>%
  131. filter(chapter != 0) %>%
  132. top_n(1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement