Guest User

Untitled

a guest
Aug 19th, 2019
71
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(qdap)
  2. # 2014 speech
  3. speech_2014 <- paste(readLines("SOU2014.txt"), collapse=" ")
  4. speech_2014 <- iconv(speech_2014, "latin1", "ASCII", "")
  5. prep14 <- qprep(speech_2014)
  6. prep14 <- replace_contraction(prep14)
  7. prep14 <- rm_stopwords(prep14, Top100Words, separate = F)
  8. prep14 <- strip(prep14, char.keep = c("?", "."))
  9. #Split the speech into sentences
  10. sent14 <- data.frame(speech = prep14)
  11. sent14 <- sentSplit(sent14, "speech")
  12. sent14$year <- "2014"
  13.  
  14. #2015 speech
  15. speech_2015 <- paste(readLines("SOU2015.txt"), collapse=" ")
  16. speech_2015 <- iconv(speech_2015, "latin1", "ASCII", "")
  17. prep15 <- qprep(speech_2015)
  18. prep15 <- replace_contraction(prep15)
  19. prep15 <- rm_stopwords(prep15, Top100Words, separate = F)
  20. prep15 <- strip(prep15, char.keep = c("?", "."))
  21. #Split the speech into sentences
  22. sent15 <- data.frame(speech = prep15)
  23. sent15 <- sentSplit(sent15, "speech")
  24. sent15$year <- "2015"
  25.  
  26. #2016 speech
  27. speech_2016 <- paste(readLines("SOU2016.txt"), collapse=" ")
  28. speech_2016 <- iconv(speech_2016, "latin1", "ASCII", "")
  29. prep16 <- qprep(speech_2016)
  30. prep16 <- replace_contraction(prep16)
  31. prep16 <- rm_stopwords(prep16, Top100Words, separate = F)
  32. prep16 <- strip(prep16, char.keep = c("?", "."))
  33. #Split the speech into sentences
  34. sent16 <- data.frame(speech = prep16)
  35. sent16 <- sentSplit(sent16, "speech")
  36. sent16$year <- "2016"
  37.  
  38. #2017 speech
  39. speech_2017 <- paste(readLines("SOU2017.txt"), collapse=" ")
  40. speech_2017 <- iconv(speech_2017, "latin1", "ASCII", "")
  41. prep17 <- qprep(speech_2017)
  42. prep17 <- replace_contraction(prep17)
  43. prep17 <- rm_stopwords(prep17, Top100Words, separate = F)
  44. prep17 <- strip(prep17, char.keep = c("?", "."))
  45. #Split the speech into sentences
  46. sent17 <- data.frame(speech = prep17)
  47. sent17 <- sentSplit(sent17, "speech")
  48. sent17$year <- "2017"
  49.  
  50. #2018
  51. speech_2018 <- paste(readLines("SOU2018.txt"), collapse=" ")
  52. speech_2018 <- iconv(speech_2018, "latin1", "ASCII", "")
  53. prep18 <- qprep(speech_2018)
  54. prep18 <- replace_contraction(prep18)
  55. prep18 <- rm_stopwords(prep18, Top100Words, separate = F)
  56. prep18 <- strip(prep18, char.keep = c("?", "."))
  57. #Split the speech into sentences
  58. sent18 <- data.frame(speech = prep18)
  59. sent18 <- sentSplit(sent18, "speech")
  60. sent18$year <- "2018"
  61.  
  62. #2019 speech
  63. speech_2019 <- paste(readLines("SOU2019.txt"), collapse=" ")
  64. speech_2019 <- iconv(speech_2019, "latin1", "ASCII", "")
  65. speech_2019 <- gsub("(Honourable)", "", speech_2019)
  66. prep19 <- qprep(speech_2019)
  67. prep19 <- replace_contraction(prep19)
  68. prep19 <- rm_stopwords(prep19, Top100Words, separate = F)
  69. prep19 <- strip(prep19, char.keep = c("?", "."))
  70. sent19 <- data.frame(speech = prep19)
  71. sent19 <- sentSplit(sent19, "speech")
  72. sent19$year <- "2019"
  73. sentences <- data.frame(rbind(sent14, sent15, sent16, sent17, sent18,sent19)) # concatenate sentences
  74. plot(freq_terms(sentences$speech)) #Plots frequency of words
RAW Paste Data