Untitled

library(qdap)
# 2014 speech
speech_2014 <- paste(readLines("SOU2014.txt"), collapse=" ")
speech_2014 <- iconv(speech_2014, "latin1", "ASCII", "")
prep14 <- qprep(speech_2014)
prep14 <- replace_contraction(prep14)
prep14 <- rm_stopwords(prep14, Top100Words, separate = F)
prep14 <- strip(prep14, char.keep = c("?", "."))
#Split the speech into sentences
sent14 <- data.frame(speech = prep14)
sent14 <- sentSplit(sent14, "speech")
sent14$year <- "2014"

#2015 speech
speech_2015 <- paste(readLines("SOU2015.txt"), collapse=" ")
speech_2015 <- iconv(speech_2015, "latin1", "ASCII", "")
prep15 <- qprep(speech_2015)
prep15 <- replace_contraction(prep15)
prep15 <- rm_stopwords(prep15, Top100Words, separate = F)
prep15 <- strip(prep15, char.keep = c("?", "."))
#Split the speech into sentences
sent15 <- data.frame(speech = prep15)
sent15 <- sentSplit(sent15, "speech")
sent15$year <- "2015"

#2016 speech
speech_2016 <- paste(readLines("SOU2016.txt"), collapse=" ")
speech_2016 <- iconv(speech_2016, "latin1", "ASCII", "")
prep16 <- qprep(speech_2016)
prep16 <- replace_contraction(prep16)
prep16 <- rm_stopwords(prep16, Top100Words, separate = F)
prep16 <- strip(prep16, char.keep = c("?", "."))
#Split the speech into sentences
sent16 <- data.frame(speech = prep16)
sent16 <- sentSplit(sent16, "speech")
sent16$year <- "2016"

#2017 speech
speech_2017 <- paste(readLines("SOU2017.txt"), collapse=" ")
speech_2017 <- iconv(speech_2017, "latin1", "ASCII", "")
prep17 <- qprep(speech_2017)
prep17 <- replace_contraction(prep17)
prep17 <- rm_stopwords(prep17, Top100Words, separate = F)
prep17 <- strip(prep17, char.keep = c("?", "."))
#Split the speech into sentences
sent17 <- data.frame(speech = prep17)
sent17 <- sentSplit(sent17, "speech")
sent17$year <- "2017"

#2018
speech_2018 <- paste(readLines("SOU2018.txt"), collapse=" ")
speech_2018 <- iconv(speech_2018, "latin1", "ASCII", "")
prep18 <- qprep(speech_2018)
prep18 <- replace_contraction(prep18)
prep18 <- rm_stopwords(prep18, Top100Words, separate = F)
prep18 <- strip(prep18, char.keep = c("?", "."))
#Split the speech into sentences
sent18 <- data.frame(speech = prep18)
sent18 <- sentSplit(sent18, "speech")
sent18$year <- "2018"

#2019 speech
speech_2019 <- paste(readLines("SOU2019.txt"), collapse=" ")
speech_2019 <- iconv(speech_2019, "latin1", "ASCII", "")
speech_2019 <- gsub("(Honourable)", "", speech_2019)
prep19 <- qprep(speech_2019)
prep19 <- replace_contraction(prep19)
prep19 <- rm_stopwords(prep19, Top100Words, separate = F)
prep19 <- strip(prep19, char.keep = c("?", "."))
sent19 <- data.frame(speech = prep19)
sent19 <- sentSplit(sent19, "speech")
sent19$year <- "2019"
sentences <- data.frame(rbind(sent14, sent15, sent16, sent17, sent18,sent19)) # concatenate sentences
plot(freq_terms(sentences$speech)) #Plots frequency of words