Advertisement
Guest User

Untitled

a guest
Nov 13th, 2018
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.01 KB | None | 0 0
  1. ---
  2. title: "warmup07-vincent-chen"
  3. author: "Vincent Chen"
  4. date: "November 13, 2018"
  5. output: html_document
  6. ---
  7.  
  8. ```{r setup, include=FALSE}
  9. knitr::opts_chunk$set(echo = TRUE)
  10. ```
  11.  
  12. ## 1) Number of characters per tweet
  13. ```{r}
  14. data <- read.csv(file="text_emotion.csv", header=TRUE, sep=",")
  15. num_char <- nchar(as.character(data$content))
  16. summary(num_char)
  17.  
  18. # Create histogram of size 5.
  19. hist(num_char, xlab="Number of Characters", main="Frequency of Characters per Tweet", w=5)
  20.  
  21. # Are there any tweets with 0 characters?
  22. sum(num_char == 0)
  23.  
  24. # Are there any tweets with one character?
  25. # 1. How many?
  26. sum(num_char == 0)
  27.  
  28. # 2. Display its content
  29. data$content[which(num_char == 1)];
  30.  
  31. # 3. What is its location (ie index or position)?
  32. which(num_char == 1)
  33. ```
  34.  
  35. ## 2) Sentiment
  36. ```{r}
  37. # What are the different types of sentiments (ie categories)?
  38. levels(data$sentiment)
  39.  
  40. # Compute the frequencies (counts) of each sentiment (and display these frequencies)
  41. table(data$sentiment)
  42.  
  43. # Graph the relative frequencies with a horizontal barplot in decreasing order,in lcudingf names of sentiment types
  44. sentiment_count <- table(data$sentiment)
  45. sentiment_count <- sentiment_count[order(sentiment_count)]
  46. sentiment_count <- sentiment_count / sum(sentiment_count)
  47. barplot(sentiment_count, horiz=TRUE, names.arg = names(sentiment_count),las=1, xlab='Relative Frequency', main='Relative Frequency of Sentiment Types')
  48.  
  49. # Sentiment and length of tweets: compute a tale with the average length of characters per sentiment (ie avg # of characters for neutral tweets, happy tweets) display this table.
  50. avg_sentiment <- aggregate(num_char, by=list(data$sentiment), FUN=mean)
  51. names(avg_sentiment) <- c("sentiment", "mean length")
  52. avg_sentiment
  53.  
  54. ```
  55.  
  56.  
  57. ## 3) Author (usernames)
  58. ```{r}
  59. data$author <- as.character(data$author)
  60. # No longer than 15 characters; if longer display them. True, there are no names that are longer than 15 characters.
  61. sum(nchar(data$author) > 15) == 0
  62.  
  63. # Contain alphanumeric characters and underscores (if contrains other symbols, display them)
  64. data$author[grepl("^[A-Za-z0-9_]+$", data$author, perl = T) == FALSE]
  65.  
  66. # What is the number of characters of the shortest usernames?
  67. min(nchar(data$author))
  68.  
  69. # What are the names of these authors? Write commands for these questions.
  70. data$author[nchar(data$author) == 2]
  71. ```
  72.  
  73. ## 4) Various Symbols and Strings
  74. ```{r}
  75. # How may tweets contain at least one "^"?
  76. sum(grepl("\\^", data$content))
  77. # How many contains three or more consecutive dollar symbols "$"?
  78. sum(grepl("\\$$$", data$content))
  79.  
  80. # How many tweets do NOT contain the characters "a" or "A"?
  81.  
  82. # Display the first 10 elements of the tweets that do NOT contain the characters "a" or "A"
  83.  
  84. # # of exclamation symbols "!": compoute a vector with the number of exclamation symbols in each twet, and display its summary()
  85.  
  86. # What's the tweet ()content) with the largest number of explanation symbols!? Display itrs content.
  87.  
  88. # How many tweets contain the individual strings "omg" or "OMG""
  89. ```
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement