SHARE
TWEET

Untitled

a guest Aug 19th, 2019 69 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(qdap)
  2. # 2014 speech
  3. speech_2014 <- paste(readLines("SOU2014.txt"), collapse=" ")
  4. speech_2014 <- iconv(speech_2014, "latin1", "ASCII", "")
  5. prep14 <- qprep(speech_2014)
  6. prep14 <- replace_contraction(prep14)
  7. prep14 <- rm_stopwords(prep14, Top100Words, separate = F)
  8. prep14 <- strip(prep14, char.keep = c("?", "."))
  9. #Split the speech into sentences
  10. sent14 <- data.frame(speech = prep14)
  11. sent14 <- sentSplit(sent14, "speech")
  12. sent14$year <- "2014"
  13.  
  14. #2015 speech
  15. speech_2015 <- paste(readLines("SOU2015.txt"), collapse=" ")
  16. speech_2015 <- iconv(speech_2015, "latin1", "ASCII", "")
  17. prep15 <- qprep(speech_2015)
  18. prep15 <- replace_contraction(prep15)
  19. prep15 <- rm_stopwords(prep15, Top100Words, separate = F)
  20. prep15 <- strip(prep15, char.keep = c("?", "."))
  21. #Split the speech into sentences
  22. sent15 <- data.frame(speech = prep15)
  23. sent15 <- sentSplit(sent15, "speech")
  24. sent15$year <- "2015"
  25.  
  26. #2016 speech
  27. speech_2016 <- paste(readLines("SOU2016.txt"), collapse=" ")
  28. speech_2016 <- iconv(speech_2016, "latin1", "ASCII", "")
  29. prep16 <- qprep(speech_2016)
  30. prep16 <- replace_contraction(prep16)
  31. prep16 <- rm_stopwords(prep16, Top100Words, separate = F)
  32. prep16 <- strip(prep16, char.keep = c("?", "."))
  33. #Split the speech into sentences
  34. sent16 <- data.frame(speech = prep16)
  35. sent16 <- sentSplit(sent16, "speech")
  36. sent16$year <- "2016"
  37.  
  38. #2017 speech
  39. speech_2017 <- paste(readLines("SOU2017.txt"), collapse=" ")
  40. speech_2017 <- iconv(speech_2017, "latin1", "ASCII", "")
  41. prep17 <- qprep(speech_2017)
  42. prep17 <- replace_contraction(prep17)
  43. prep17 <- rm_stopwords(prep17, Top100Words, separate = F)
  44. prep17 <- strip(prep17, char.keep = c("?", "."))
  45. #Split the speech into sentences
  46. sent17 <- data.frame(speech = prep17)
  47. sent17 <- sentSplit(sent17, "speech")
  48. sent17$year <- "2017"
  49.  
  50. #2018
  51. speech_2018 <- paste(readLines("SOU2018.txt"), collapse=" ")
  52. speech_2018 <- iconv(speech_2018, "latin1", "ASCII", "")
  53. prep18 <- qprep(speech_2018)
  54. prep18 <- replace_contraction(prep18)
  55. prep18 <- rm_stopwords(prep18, Top100Words, separate = F)
  56. prep18 <- strip(prep18, char.keep = c("?", "."))
  57. #Split the speech into sentences
  58. sent18 <- data.frame(speech = prep18)
  59. sent18 <- sentSplit(sent18, "speech")
  60. sent18$year <- "2018"
  61.  
  62. #2019 speech
  63. speech_2019 <- paste(readLines("SOU2019.txt"), collapse=" ")
  64. speech_2019 <- iconv(speech_2019, "latin1", "ASCII", "")
  65. speech_2019 <- gsub("(Honourable)", "", speech_2019)
  66. prep19 <- qprep(speech_2019)
  67. prep19 <- replace_contraction(prep19)
  68. prep19 <- rm_stopwords(prep19, Top100Words, separate = F)
  69. prep19 <- strip(prep19, char.keep = c("?", "."))
  70. sent19 <- data.frame(speech = prep19)
  71. sent19 <- sentSplit(sent19, "speech")
  72. sent19$year <- "2019"
  73. sentences <- data.frame(rbind(sent14, sent15, sent16, sent17, sent18,sent19)) # concatenate sentences
  74. plot(freq_terms(sentences$speech)) #Plots frequency of words
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top