brianhaas19

ANW (Tidy Tuesday 2020-12-15)

Dec 23rd, 2020
953
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###################################################################################################
  2. # American Ninja Warrior - Most common obstacles.
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. # Link to data
  6. # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15
  7.  
  8. # Setup
  9. library(tidyverse)
  10. library(scales)
  11. library(patchwork)
  12. theme_set(theme_bw())
  13.  
  14. # Load data
  15. df <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-15/ninja_warrior.csv')
  16.  
  17. # Clean data
  18.  
  19. ### Filter the data for the top 24 obstacles that appear most overall (top 25 has too many ties):
  20. top_24_obstacles <- df %>%
  21.   group_by(obstacle_name) %>%
  22.   count() %>%
  23.   arrange(-n) %>%
  24.   .$obstacle_name %>%
  25.   .[1:24]
  26.  
  27. ### Create an ordered factor here so that when it's used in expand_grid() the order is preserved there also
  28. ### (reverse the order so that the most common appears on top of plots)
  29. top_24_obstacles <- factor(rev(top_24_obstacles), levels = rev(top_24_obstacles), ordered = TRUE)
  30.  
  31. ### Filter for obstacles in the top 24 only:
  32. df <- df %>%
  33.   filter(obstacle_name %in% top_24_obstacles) %>%
  34.   mutate(obstacle_name = factor(obstacle_name, levels = top_24_obstacles, ordered = TRUE))
  35.  
  36. ### Create the data frame for plotting:
  37. ### (this grid will contain NAs when an obstacle doesn't appear in a season)
  38. data <- left_join(expand_grid(obstacle_name = top_24_obstacles,
  39.                               season = unique(df$season)),
  40.                   df %>%
  41.                     group_by(season, obstacle_name) %>%
  42.                     count(),
  43.                   by = c("season", "obstacle_name")
  44. )
  45.  
  46. ### Get the totals:
  47. ### (use 11 as a stand-in for "Total"; this way the x-axis will be easier to keep in the correct order)
  48. totals <- data %>%
  49.   group_by(obstacle_name) %>%
  50.   summarise(n = sum(n, na.rm = TRUE)) %>%
  51.   mutate(season = 11) %>% # will convert this to "Total" later
  52.   select(obstacle_name, season, n) %>%
  53.   arrange(-n)
  54.  
  55. ### Bind the totals to the rest of the data:
  56. data <- rbind(data, totals)
  57.  
  58. # Visualize data
  59. # Suggested parameters for `R`: fig.height=8, fig.width=6
  60. p <- ggplot() +
  61.   # Counts (colored points):
  62.   geom_point(data = filter(data, !is.na(n), (season != 11)),
  63.              aes(season, obstacle_name, color = n, fill = n),
  64.              size = 6,
  65.              shape = 21,
  66.              stroke = 1,
  67.              alpha = 0.2,
  68.              show.legend = FALSE,) +
  69.   geom_text(data = filter(data, !is.na(n), (season != 11)),
  70.             aes(season, obstacle_name, color = n, label = n),
  71.             size = 4,
  72.             show.legend = FALSE) +
  73.   # Zeros (these are the NAs):
  74.   geom_point(data = filter(data, is.na(n)),
  75.              aes(season, obstacle_name),
  76.              color = "grey",
  77.              fill = "grey",
  78.              size = 6,
  79.              shape = 21,
  80.              stroke = 1,
  81.              alpha = 0.2,
  82.              show.legend = FALSE,) +
  83.   geom_text(data = filter(data, is.na(n)),
  84.             aes(season, obstacle_name),
  85.             color = "grey50",
  86.             label = "0",
  87.             size = 4,
  88.             show.legend = FALSE) +
  89.   # Totals:
  90.   geom_point(data = filter(data, (season == 11)),
  91.              aes(season, obstacle_name),
  92.              color = "black",
  93.              fill = NA,
  94.              size = 6,
  95.              shape = 21,
  96.              stroke = 1,
  97.              alpha = 0.3,
  98.              show.legend = FALSE) +
  99.   geom_text(data = filter(data, (season == 11)),
  100.             aes(season, obstacle_name, label = n),
  101.             color = "black",
  102.             size = 3.5,
  103.             show.legend = FALSE) +
  104.   scale_x_continuous(breaks = 1:11, labels = c(paste(1:10), "Total")) + # change 11 to "Total"
  105.   scale_color_viridis_c(option = 'C', end = 0.75) +
  106.   scale_fill_viridis_c(option = 'C', end = 0.75) +
  107.   labs(x = "Season", y = "Obstacle",
  108.        title = expression(paste(bold("American Ninja Warrior"), " - Most common obstacles.")),
  109.        subtitle = str_wrap("The numbers indicate how many times the obstacle appeared in each season of American Ninja Warrior.", 60),
  110.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19")))) +
  111.   theme(panel.grid = element_line(linetype = "dashed", color = "grey80", size = 0.2),
  112.         plot.title = element_text(hjust = 0),
  113.         plot.subtitle = element_text(hjust = 0),
  114.         plot.caption = element_text(color = "grey50", hjust = 1, vjust = -1))
  115.  
  116. # Use `patchwork` to add a second caption:
  117. p <- p + plot_annotation(caption = expression(paste("Data source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15"))),
  118.                          theme = theme(plot.caption = element_text(color = "grey50")))
  119. p
  120.  
  121. # Uncomment to save to disk:
  122. #ggsave(filename = str_c(getwd(), "/ANW.png"), plot = p, width = 6, height = 8)
  123. ###################################################################################################
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×