Advertisement
Guest User

Untitled

a guest
Dec 16th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.38 KB | None | 0 0
  1. if (!require("install.load")) {
  2. install.packages("install.load")
  3. library(install.load)
  4. }
  5.  
  6. install_load("twitteR","devtools","rjson","bit64","httr","stringr","bit64","plyr","RCurl","shiny","shinydashboard","tidyverse","tidytext","glue")
  7.  
  8. #getData <- function() {
  9. # Establish twitter connection
  10. APIkey = "Stp89GWjQGWet14WjHQ7jKTzv"
  11. APIsecret = "07fTmyxZEl2zA3GReEFPlWfiL8kxoFzutJuVUD5QesWz2mOBE4"
  12. Accesstoken = "358439914-Lt0b49WJqHlTnWzJyKxcTmlM6vuH8CKNBbr4Dnba"
  13. Accesssecret = "NVyOQwqSqtBJheHdyNCwwWKSZFV6segjlVmu6Zu5dz7ne"
  14.  
  15. origop = options("httr_oauth_cache")
  16. options(httr_oauth_cache=TRUE)
  17. setup_twitter_oauth(APIkey,APIsecret,Accesstoken,Accesssecret)
  18. options(httr_oauth_cache=origop)
  19.  
  20. # tweet topics
  21. topics = c("information technology","education","politics","sport","weather")
  22.  
  23. # dataframe
  24. topicsDf = setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("topic", "text"))
  25. forPython <- data.frame(topic=character(),
  26. text=character(),
  27. stringsAsFactors=FALSE)
  28. # create topics dataframe consisting from topic and tweets text
  29. for(index in 1:length(topics)){
  30. cat("doing topic" , topics[index])
  31. # Get tweets, strip retweets and convert to data.frame
  32. tweets = searchTwitter(topics[index], n = 100, lang = "en")
  33. tweets = strip_retweets(tweets,strip_manual = TRUE,strip_mt = TRUE)
  34. tweetsDf = twListToDF(tweets)
  35. toPaste = data.frame(topics[index], tweetsDf[1, 1])
  36. names(toPaste) = c("topic", "text")
  37. for (tindex in 2:nrow(tweetsDf)){
  38. newline = data.frame(topics[index], tweetsDf[tindex, 1])
  39. names(newline) = c("topic", "text")
  40. toPaste = rbind(toPaste, newline)
  41. }
  42. forPython = rbind(forPython, toPaste)
  43. # parse all tweets from topic
  44. dataString = paste(tweetsDf[,1], collapse = " ")
  45.  
  46. # Clean the datastring
  47. dataString = gsub("[^[:alpha:][:space:]]","",dataString)
  48.  
  49. # remove any dollar signs (they're special characters in R)
  50. dataString <- gsub("\\$", "", dataString)
  51.  
  52. # set name of the topic and text to columns of topicsDf
  53. topicsDf[index, "topic"] = topics[index]
  54. topicsDf[index, "text"] = dataString
  55.  
  56. }
  57.  
  58. # TODO save topics dataframe to csv
  59. write.csv(topicsDf, file = "Sentiment.csv",row.names = FALSE,fileEncoding = "UTF-8")
  60. write.csv(forPython, file = "Tweets.csv",row.names = FALSE,fileEncoding = "UTF-8")
  61.  
  62. # a = as.data.frame(dataString)
  63. #
  64. # write.csv(a, file = "tweet.csv", row.names = FALSE)
  65. #
  66.  
  67. sentimentDf = setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("sentiment_label", "sentiment_perc"))
  68.  
  69. # add sentiment to dataframe sentiment
  70. for(index in 1:nrow(topicsDf)){
  71. # tokenize
  72. tokens = data_frame(text = topicsDf[index,"text"]) %>% unnest_tokens(word, text)
  73.  
  74. # get the sentiment for topic
  75. sentiment = tokens %>%
  76. inner_join(get_sentiments("bing")) %>% # pull out only sentiment words
  77. count(sentiment) %>% # count the # of positive & negative words
  78. spread(sentiment, n, fill = 0) %>% # made data wide rather than narrow
  79. mutate(sentiment = positive - negative) # # of positive words - # of negative words
  80.  
  81. # get how much positve/negative sentiment is and its label
  82. if(sentiment$positive > sentiment$negative) {
  83. sentimentPerc = abs(round(sentiment$positive/(sentiment$positive + sentiment$negative)*100, 0))
  84. sentimentLbl = "positive"
  85. } else {
  86. sentimentPerc = abs(round(sentiment$negative/(sentiment$positive + sentiment$negative)*100, 0))
  87. sentimentLbl = "negative"
  88. }
  89.  
  90. # save sentiment for current topic
  91. sentimentDf[index,"sentiment_label"] = sentimentLbl
  92. sentimentDf[index,"sentiment_perc"] = sentimentPerc
  93. }
  94.  
  95. # params
  96. par(mfrow=c(2,3))
  97.  
  98. # visualize results
  99. for(index in 1:nrow(sentimentDf)){
  100.  
  101. # get other label
  102. if(sentimentDf[index,"sentiment_label"] == "positive"){
  103. otherLabel = "negative"
  104. } else {
  105. otherLabel = "positive"
  106. }
  107.  
  108. lbls = c(sentimentDf[index,"sentiment_label"], otherLabel)
  109.  
  110.  
  111. # get other percentage
  112. otherPerc = 100 - sentimentDf[index,"sentiment_perc"]
  113.  
  114. # create pie chart
  115. pie(c(sentimentDf[index,"sentiment_perc"],otherPerc), labels = c(sentimentDf[index,"sentiment_perc"],otherPerc), main=paste0("Sentiment for: ",topicsDf[index,"topic"]))
  116. }
  117.  
  118. #}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement