Untitled

# most of the script written by Jessica Peterka-Bonetta
# check it out on https://github.com/today-is-a-good-day/emojis/blob/master/emoji_analysis.R
options(stringsAsFactors = FALSE)
library(dplyr)
library(stringr)
library(rvest)
library(Unicode)
library(tm)
library(base64enc)
library(RMySQL)
library(parallel)
library(magrittr)

## This is written for a system with minimum 5 cores.
# If you have less then five cores, edit the mc.cores values or delete them (back to default)
detectCores()

#Sys.setlocale(category = "LC_ALL", locale = "de_DE.UTF-8")


## ---- utility functions ----
# this function outputs the emojis found in a string as well as their occurences
count_matches %
    mclapply(count_matches, matchto = matchto, description = description, sentiment = sentiment, mc.cores = 1) %>%
    bind_rows

}

# fill in your username, password…
# I stored the comments in a SQL-Database and extracted it from there
con %
  select(EN, ftu8, native, unicode) %>%
  rename(description = EN, r.encoding = ftu8)


# plain skin tones
skin_tones %
  # remove plain skin tones emojis
  filter(!description %in% skin_tones) %>%
  # remove emojis with skin tones info, e.g. remove woman: light skin tone and only
  # keep woman
  filter(!grepl(":", description)) %>%
  mutate(description = tolower(description))
#  mutate(unicode = as.u_char(unicode))
# all emojis with more than one unicode codepoint become NA

matchto %
  group_by(!!description) %>%
  summarise(n = sum(count)) %>%
  arrange(-n)

head(rank, 20)

total <- merge(rank, emDict, by="description")

# Write CSV in R
write.csv(total, file = ".../rank.csv")