Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###################################################################################################
- # American Ninja Warrior - Most common obstacles.
- # Created by: reddit.com/user/brianhaas19
- # Link to data
- # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15
- # Setup
- library(tidyverse)
- library(scales)
- library(patchwork)
- theme_set(theme_bw())
- # Load data
- df <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-15/ninja_warrior.csv')
- # Clean data
- ### Filter the data for the top 24 obstacles that appear most overall (top 25 has too many ties):
- top_24_obstacles <- df %>%
- group_by(obstacle_name) %>%
- count() %>%
- arrange(-n) %>%
- .$obstacle_name %>%
- .[1:24]
- ### Create an ordered factor here so that when it's used in expand_grid() the order is preserved there also
- ### (reverse the order so that the most common appears on top of plots)
- top_24_obstacles <- factor(rev(top_24_obstacles), levels = rev(top_24_obstacles), ordered = TRUE)
- ### Filter for obstacles in the top 24 only:
- df <- df %>%
- filter(obstacle_name %in% top_24_obstacles) %>%
- mutate(obstacle_name = factor(obstacle_name, levels = top_24_obstacles, ordered = TRUE))
- ### Create the data frame for plotting:
- ### (this grid will contain NAs when an obstacle doesn't appear in a season)
- data <- left_join(expand_grid(obstacle_name = top_24_obstacles,
- season = unique(df$season)),
- df %>%
- group_by(season, obstacle_name) %>%
- count(),
- by = c("season", "obstacle_name")
- )
- ### Get the totals:
- ### (use 11 as a stand-in for "Total"; this way the x-axis will be easier to keep in the correct order)
- totals <- data %>%
- group_by(obstacle_name) %>%
- summarise(n = sum(n, na.rm = TRUE)) %>%
- mutate(season = 11) %>% # will convert this to "Total" later
- select(obstacle_name, season, n) %>%
- arrange(-n)
- ### Bind the totals to the rest of the data:
- data <- rbind(data, totals)
- # Visualize data
- # Suggested parameters for `R`: fig.height=8, fig.width=6
- p <- ggplot() +
- # Counts (colored points):
- geom_point(data = filter(data, !is.na(n), (season != 11)),
- aes(season, obstacle_name, color = n, fill = n),
- size = 6,
- shape = 21,
- stroke = 1,
- alpha = 0.2,
- show.legend = FALSE,) +
- geom_text(data = filter(data, !is.na(n), (season != 11)),
- aes(season, obstacle_name, color = n, label = n),
- size = 4,
- show.legend = FALSE) +
- # Zeros (these are the NAs):
- geom_point(data = filter(data, is.na(n)),
- aes(season, obstacle_name),
- color = "grey",
- fill = "grey",
- size = 6,
- shape = 21,
- stroke = 1,
- alpha = 0.2,
- show.legend = FALSE,) +
- geom_text(data = filter(data, is.na(n)),
- aes(season, obstacle_name),
- color = "grey50",
- label = "0",
- size = 4,
- show.legend = FALSE) +
- # Totals:
- geom_point(data = filter(data, (season == 11)),
- aes(season, obstacle_name),
- color = "black",
- fill = NA,
- size = 6,
- shape = 21,
- stroke = 1,
- alpha = 0.3,
- show.legend = FALSE) +
- geom_text(data = filter(data, (season == 11)),
- aes(season, obstacle_name, label = n),
- color = "black",
- size = 3.5,
- show.legend = FALSE) +
- scale_x_continuous(breaks = 1:11, labels = c(paste(1:10), "Total")) + # change 11 to "Total"
- scale_color_viridis_c(option = 'C', end = 0.75) +
- scale_fill_viridis_c(option = 'C', end = 0.75) +
- labs(x = "Season", y = "Obstacle",
- title = expression(paste(bold("American Ninja Warrior"), " - Most common obstacles.")),
- subtitle = str_wrap("The numbers indicate how many times the obstacle appeared in each season of American Ninja Warrior.", 60),
- caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19")))) +
- theme(panel.grid = element_line(linetype = "dashed", color = "grey80", size = 0.2),
- plot.title = element_text(hjust = 0),
- plot.subtitle = element_text(hjust = 0),
- plot.caption = element_text(color = "grey50", hjust = 1, vjust = -1))
- # Use `patchwork` to add a second caption:
- p <- p + plot_annotation(caption = expression(paste("Data source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15"))),
- theme = theme(plot.caption = element_text(color = "grey50")))
- p
- # Uncomment to save to disk:
- #ggsave(filename = str_c(getwd(), "/ANW.png"), plot = p, width = 6, height = 8)
- ###################################################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement