Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(qdap)
- # 2014 speech
- speech_2014 <- paste(readLines("SOU2014.txt"), collapse=" ")
- speech_2014 <- iconv(speech_2014, "latin1", "ASCII", "")
- prep14 <- qprep(speech_2014)
- prep14 <- replace_contraction(prep14)
- prep14 <- rm_stopwords(prep14, Top100Words, separate = F)
- prep14 <- strip(prep14, char.keep = c("?", "."))
- #Split the speech into sentences
- sent14 <- data.frame(speech = prep14)
- sent14 <- sentSplit(sent14, "speech")
- sent14$year <- "2014"
- #2015 speech
- speech_2015 <- paste(readLines("SOU2015.txt"), collapse=" ")
- speech_2015 <- iconv(speech_2015, "latin1", "ASCII", "")
- prep15 <- qprep(speech_2015)
- prep15 <- replace_contraction(prep15)
- prep15 <- rm_stopwords(prep15, Top100Words, separate = F)
- prep15 <- strip(prep15, char.keep = c("?", "."))
- #Split the speech into sentences
- sent15 <- data.frame(speech = prep15)
- sent15 <- sentSplit(sent15, "speech")
- sent15$year <- "2015"
- #2016 speech
- speech_2016 <- paste(readLines("SOU2016.txt"), collapse=" ")
- speech_2016 <- iconv(speech_2016, "latin1", "ASCII", "")
- prep16 <- qprep(speech_2016)
- prep16 <- replace_contraction(prep16)
- prep16 <- rm_stopwords(prep16, Top100Words, separate = F)
- prep16 <- strip(prep16, char.keep = c("?", "."))
- #Split the speech into sentences
- sent16 <- data.frame(speech = prep16)
- sent16 <- sentSplit(sent16, "speech")
- sent16$year <- "2016"
- #2017 speech
- speech_2017 <- paste(readLines("SOU2017.txt"), collapse=" ")
- speech_2017 <- iconv(speech_2017, "latin1", "ASCII", "")
- prep17 <- qprep(speech_2017)
- prep17 <- replace_contraction(prep17)
- prep17 <- rm_stopwords(prep17, Top100Words, separate = F)
- prep17 <- strip(prep17, char.keep = c("?", "."))
- #Split the speech into sentences
- sent17 <- data.frame(speech = prep17)
- sent17 <- sentSplit(sent17, "speech")
- sent17$year <- "2017"
- #2018
- speech_2018 <- paste(readLines("SOU2018.txt"), collapse=" ")
- speech_2018 <- iconv(speech_2018, "latin1", "ASCII", "")
- prep18 <- qprep(speech_2018)
- prep18 <- replace_contraction(prep18)
- prep18 <- rm_stopwords(prep18, Top100Words, separate = F)
- prep18 <- strip(prep18, char.keep = c("?", "."))
- #Split the speech into sentences
- sent18 <- data.frame(speech = prep18)
- sent18 <- sentSplit(sent18, "speech")
- sent18$year <- "2018"
- #2019 speech
- speech_2019 <- paste(readLines("SOU2019.txt"), collapse=" ")
- speech_2019 <- iconv(speech_2019, "latin1", "ASCII", "")
- speech_2019 <- gsub("(Honourable)", "", speech_2019)
- prep19 <- qprep(speech_2019)
- prep19 <- replace_contraction(prep19)
- prep19 <- rm_stopwords(prep19, Top100Words, separate = F)
- prep19 <- strip(prep19, char.keep = c("?", "."))
- sent19 <- data.frame(speech = prep19)
- sent19 <- sentSplit(sent19, "speech")
- sent19$year <- "2019"
- sentences <- data.frame(rbind(sent14, sent15, sent16, sent17, sent18,sent19)) # concatenate sentences
- plot(freq_terms(sentences$speech)) #Plots frequency of words
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement