Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- if (!require("install.load")) {
- install.packages("install.load")
- library(install.load)
- }
- install_load("twitteR","devtools","rjson","bit64","httr","stringr","bit64","plyr","RCurl","shiny","shinydashboard","tidyverse","tidytext","glue")
- #getData <- function() {
- # Establish twitter connection
- APIkey = "Stp89GWjQGWet14WjHQ7jKTzv"
- APIsecret = "07fTmyxZEl2zA3GReEFPlWfiL8kxoFzutJuVUD5QesWz2mOBE4"
- Accesstoken = "358439914-Lt0b49WJqHlTnWzJyKxcTmlM6vuH8CKNBbr4Dnba"
- Accesssecret = "NVyOQwqSqtBJheHdyNCwwWKSZFV6segjlVmu6Zu5dz7ne"
- origop = options("httr_oauth_cache")
- options(httr_oauth_cache=TRUE)
- setup_twitter_oauth(APIkey,APIsecret,Accesstoken,Accesssecret)
- options(httr_oauth_cache=origop)
- # tweet topics
- topics = c("information technology","education","politics","sport","weather")
- # dataframe
- topicsDf = setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("topic", "text"))
- forPython <- data.frame(topic=character(),
- text=character(),
- stringsAsFactors=FALSE)
- # create topics dataframe consisting from topic and tweets text
- for(index in 1:length(topics)){
- cat("doing topic" , topics[index])
- # Get tweets, strip retweets and convert to data.frame
- tweets = searchTwitter(topics[index], n = 100, lang = "en")
- tweets = strip_retweets(tweets,strip_manual = TRUE,strip_mt = TRUE)
- tweetsDf = twListToDF(tweets)
- toPaste = data.frame(topics[index], tweetsDf[1, 1])
- names(toPaste) = c("topic", "text")
- for (tindex in 2:nrow(tweetsDf)){
- newline = data.frame(topics[index], tweetsDf[tindex, 1])
- names(newline) = c("topic", "text")
- toPaste = rbind(toPaste, newline)
- }
- forPython = rbind(forPython, toPaste)
- # parse all tweets from topic
- dataString = paste(tweetsDf[,1], collapse = " ")
- # Clean the datastring
- dataString = gsub("[^[:alpha:][:space:]]","",dataString)
- # remove any dollar signs (they're special characters in R)
- dataString <- gsub("\\$", "", dataString)
- # set name of the topic and text to columns of topicsDf
- topicsDf[index, "topic"] = topics[index]
- topicsDf[index, "text"] = dataString
- }
- # TODO save topics dataframe to csv
- write.csv(topicsDf, file = "Sentiment.csv",row.names = FALSE,fileEncoding = "UTF-8")
- write.csv(forPython, file = "Tweets.csv",row.names = FALSE,fileEncoding = "UTF-8")
- # a = as.data.frame(dataString)
- #
- # write.csv(a, file = "tweet.csv", row.names = FALSE)
- #
- sentimentDf = setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("sentiment_label", "sentiment_perc"))
- # add sentiment to dataframe sentiment
- for(index in 1:nrow(topicsDf)){
- # tokenize
- tokens = data_frame(text = topicsDf[index,"text"]) %>% unnest_tokens(word, text)
- # get the sentiment for topic
- sentiment = tokens %>%
- inner_join(get_sentiments("bing")) %>% # pull out only sentiment words
- count(sentiment) %>% # count the # of positive & negative words
- spread(sentiment, n, fill = 0) %>% # made data wide rather than narrow
- mutate(sentiment = positive - negative) # # of positive words - # of negative words
- # get how much positve/negative sentiment is and its label
- if(sentiment$positive > sentiment$negative) {
- sentimentPerc = abs(round(sentiment$positive/(sentiment$positive + sentiment$negative)*100, 0))
- sentimentLbl = "positive"
- } else {
- sentimentPerc = abs(round(sentiment$negative/(sentiment$positive + sentiment$negative)*100, 0))
- sentimentLbl = "negative"
- }
- # save sentiment for current topic
- sentimentDf[index,"sentiment_label"] = sentimentLbl
- sentimentDf[index,"sentiment_perc"] = sentimentPerc
- }
- # params
- par(mfrow=c(2,3))
- # visualize results
- for(index in 1:nrow(sentimentDf)){
- # get other label
- if(sentimentDf[index,"sentiment_label"] == "positive"){
- otherLabel = "negative"
- } else {
- otherLabel = "positive"
- }
- lbls = c(sentimentDf[index,"sentiment_label"], otherLabel)
- # get other percentage
- otherPerc = 100 - sentimentDf[index,"sentiment_perc"]
- # create pie chart
- pie(c(sentimentDf[index,"sentiment_perc"],otherPerc), labels = c(sentimentDf[index,"sentiment_perc"],otherPerc), main=paste0("Sentiment for: ",topicsDf[index,"topic"]))
- }
- #}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement