Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # most of the script written by Jessica Peterka-Bonetta
- # check it out on https://github.com/today-is-a-good-day/emojis/blob/master/emoji_analysis.R
- options(stringsAsFactors = FALSE)
- library(dplyr)
- library(stringr)
- library(rvest)
- library(Unicode)
- library(tm)
- library(base64enc)
- library(RMySQL)
- library(parallel)
- library(magrittr)
- ## This is written for a system with minimum 5 cores.
- # If you have less then five cores, edit the mc.cores values or delete them (back to default)
- detectCores()
- #Sys.setlocale(category = "LC_ALL", locale = "de_DE.UTF-8")
- ## ---- utility functions ----
- # this function outputs the emojis found in a string as well as their occurences
- count_matches %
- mclapply(count_matches, matchto = matchto, description = description, sentiment = sentiment, mc.cores = 1) %>%
- bind_rows
- }
- # fill in your username, password…
- # I stored the comments in a SQL-Database and extracted it from there
- con %
- select(EN, ftu8, native, unicode) %>%
- rename(description = EN, r.encoding = ftu8)
- # plain skin tones
- skin_tones %
- # remove plain skin tones emojis
- filter(!description %in% skin_tones) %>%
- # remove emojis with skin tones info, e.g. remove woman: light skin tone and only
- # keep woman
- filter(!grepl(":", description)) %>%
- mutate(description = tolower(description))
- # mutate(unicode = as.u_char(unicode))
- # all emojis with more than one unicode codepoint become NA
- matchto %
- group_by(!!description) %>%
- summarise(n = sum(count)) %>%
- arrange(-n)
- head(rank, 20)
- total <- merge(rank, emDict, by="description")
- # Write CSV in R
- write.csv(total, file = ".../rank.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement