Advertisement
Guest User

Untitled

a guest
Aug 17th, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.64 KB | None | 0 0
  1. # most of the script written by Jessica Peterka-Bonetta
  2. # check it out on https://github.com/today-is-a-good-day/emojis/blob/master/emoji_analysis.R
  3. options(stringsAsFactors = FALSE)
  4. library(dplyr)
  5. library(stringr)
  6. library(rvest)
  7. library(Unicode)
  8. library(tm)
  9. library(base64enc)
  10. library(RMySQL)
  11. library(parallel)
  12. library(magrittr)
  13.  
  14. ## This is written for a system with minimum 5 cores.
  15. # If you have less then five cores, edit the mc.cores values or delete them (back to default)
  16. detectCores()
  17.  
  18. #Sys.setlocale(category = "LC_ALL", locale = "de_DE.UTF-8")
  19.  
  20.  
  21. ## ---- utility functions ----
  22. # this function outputs the emojis found in a string as well as their occurences
  23. count_matches %
  24. mclapply(count_matches, matchto = matchto, description = description, sentiment = sentiment, mc.cores = 1) %>%
  25. bind_rows
  26.  
  27. }
  28.  
  29. # fill in your username, password…
  30. # I stored the comments in a SQL-Database and extracted it from there
  31. con %
  32. select(EN, ftu8, native, unicode) %>%
  33. rename(description = EN, r.encoding = ftu8)
  34.  
  35.  
  36. # plain skin tones
  37. skin_tones %
  38. # remove plain skin tones emojis
  39. filter(!description %in% skin_tones) %>%
  40. # remove emojis with skin tones info, e.g. remove woman: light skin tone and only
  41. # keep woman
  42. filter(!grepl(":", description)) %>%
  43. mutate(description = tolower(description))
  44. # mutate(unicode = as.u_char(unicode))
  45. # all emojis with more than one unicode codepoint become NA
  46.  
  47. matchto %
  48. group_by(!!description) %>%
  49. summarise(n = sum(count)) %>%
  50. arrange(-n)
  51.  
  52. head(rank, 20)
  53.  
  54. total <- merge(rank, emDict, by="description")
  55.  
  56. # Write CSV in R
  57. write.csv(total, file = ".../rank.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement