ANW (Tidy Tuesday 2020-12-15)

###################################################################################################
# American Ninja Warrior - Most common obstacles.
# Created by: reddit.com/user/brianhaas19

# Link to data
# https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15

# Setup
library(tidyverse)
library(scales)
library(patchwork)
theme_set(theme_bw())

# Load data
df <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-15/ninja_warrior.csv')

# Clean data

### Filter the data for the top 24 obstacles that appear most overall (top 25 has too many ties):
top_24_obstacles <- df %>%
  group_by(obstacle_name) %>%
  count() %>%
  arrange(-n) %>%
  .$obstacle_name %>%
  .[1:24]

### Create an ordered factor here so that when it's used in expand_grid() the order is preserved there also
### (reverse the order so that the most common appears on top of plots)
top_24_obstacles <- factor(rev(top_24_obstacles), levels = rev(top_24_obstacles), ordered = TRUE)

### Filter for obstacles in the top 24 only:
df <- df %>%
  filter(obstacle_name %in% top_24_obstacles) %>%
  mutate(obstacle_name = factor(obstacle_name, levels = top_24_obstacles, ordered = TRUE))

### Create the data frame for plotting:
### (this grid will contain NAs when an obstacle doesn't appear in a season)
data <- left_join(expand_grid(obstacle_name = top_24_obstacles,
                              season = unique(df$season)),
                  df %>%
                    group_by(season, obstacle_name) %>%
                    count(),
                  by = c("season", "obstacle_name")
)

### Get the totals:
### (use 11 as a stand-in for "Total"; this way the x-axis will be easier to keep in the correct order)
totals <- data %>%
  group_by(obstacle_name) %>%
  summarise(n = sum(n, na.rm = TRUE)) %>%
  mutate(season = 11) %>% # will convert this to "Total" later
  select(obstacle_name, season, n) %>%
  arrange(-n)

### Bind the totals to the rest of the data:
data <- rbind(data, totals)

# Visualize data
# Suggested parameters for `R`: fig.height=8, fig.width=6
p <- ggplot() +
  # Counts (colored points):
  geom_point(data = filter(data, !is.na(n), (season != 11)),
             aes(season, obstacle_name, color = n, fill = n),
             size = 6,
             shape = 21,
             stroke = 1,
             alpha = 0.2,
             show.legend = FALSE,) +
  geom_text(data = filter(data, !is.na(n), (season != 11)),
            aes(season, obstacle_name, color = n, label = n),
            size = 4,
            show.legend = FALSE) +
  # Zeros (these are the NAs):
  geom_point(data = filter(data, is.na(n)),
             aes(season, obstacle_name),
             color = "grey",
             fill = "grey",
             size = 6,
             shape = 21,
             stroke = 1,
             alpha = 0.2,
             show.legend = FALSE,) +
  geom_text(data = filter(data, is.na(n)),
            aes(season, obstacle_name),
            color = "grey50",
            label = "0",
            size = 4,
            show.legend = FALSE) +
  # Totals:
  geom_point(data = filter(data, (season == 11)),
             aes(season, obstacle_name),
             color = "black",
             fill = NA,
             size = 6,
             shape = 21,
             stroke = 1,
             alpha = 0.3,
             show.legend = FALSE) +
  geom_text(data = filter(data, (season == 11)),
            aes(season, obstacle_name, label = n),
            color = "black",
            size = 3.5,
            show.legend = FALSE) +
  scale_x_continuous(breaks = 1:11, labels = c(paste(1:10), "Total")) + # change 11 to "Total"
  scale_color_viridis_c(option = 'C', end = 0.75) +
  scale_fill_viridis_c(option = 'C', end = 0.75) +
  labs(x = "Season", y = "Obstacle",
       title = expression(paste(bold("American Ninja Warrior"), " - Most common obstacles.")),
       subtitle = str_wrap("The numbers indicate how many times the obstacle appeared in each season of American Ninja Warrior.", 60),
       caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19")))) +
  theme(panel.grid = element_line(linetype = "dashed", color = "grey80", size = 0.2),
        plot.title = element_text(hjust = 0),
        plot.subtitle = element_text(hjust = 0),
        plot.caption = element_text(color = "grey50", hjust = 1, vjust = -1))

# Use `patchwork` to add a second caption:
p <- p + plot_annotation(caption = expression(paste("Data source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15"))),
                         theme = theme(plot.caption = element_text(color = "grey50")))
p

# Uncomment to save to disk:
#ggsave(filename = str_c(getwd(), "/ANW.png"), plot = p, width = 6, height = 8)
###################################################################################################