Advertisement
brianhaas19

ANW (Tidy Tuesday 2020-12-15)

Dec 23rd, 2020
1,497
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.92 KB | None | 0 0
  1. ###################################################################################################
  2. # American Ninja Warrior - Most common obstacles.
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. # Link to data
  6. # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15
  7.  
  8. # Setup
  9. library(tidyverse)
  10. library(scales)
  11. library(patchwork)
  12. theme_set(theme_bw())
  13.  
  14. # Load data
  15. df <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-15/ninja_warrior.csv')
  16.  
  17. # Clean data
  18.  
  19. ### Filter the data for the top 24 obstacles that appear most overall (top 25 has too many ties):
  20. top_24_obstacles <- df %>%
  21.   group_by(obstacle_name) %>%
  22.   count() %>%
  23.   arrange(-n) %>%
  24.   .$obstacle_name %>%
  25.   .[1:24]
  26.  
  27. ### Create an ordered factor here so that when it's used in expand_grid() the order is preserved there also
  28. ### (reverse the order so that the most common appears on top of plots)
  29. top_24_obstacles <- factor(rev(top_24_obstacles), levels = rev(top_24_obstacles), ordered = TRUE)
  30.  
  31. ### Filter for obstacles in the top 24 only:
  32. df <- df %>%
  33.   filter(obstacle_name %in% top_24_obstacles) %>%
  34.   mutate(obstacle_name = factor(obstacle_name, levels = top_24_obstacles, ordered = TRUE))
  35.  
  36. ### Create the data frame for plotting:
  37. ### (this grid will contain NAs when an obstacle doesn't appear in a season)
  38. data <- left_join(expand_grid(obstacle_name = top_24_obstacles,
  39.                               season = unique(df$season)),
  40.                   df %>%
  41.                     group_by(season, obstacle_name) %>%
  42.                     count(),
  43.                   by = c("season", "obstacle_name")
  44. )
  45.  
  46. ### Get the totals:
  47. ### (use 11 as a stand-in for "Total"; this way the x-axis will be easier to keep in the correct order)
  48. totals <- data %>%
  49.   group_by(obstacle_name) %>%
  50.   summarise(n = sum(n, na.rm = TRUE)) %>%
  51.   mutate(season = 11) %>% # will convert this to "Total" later
  52.   select(obstacle_name, season, n) %>%
  53.   arrange(-n)
  54.  
  55. ### Bind the totals to the rest of the data:
  56. data <- rbind(data, totals)
  57.  
  58. # Visualize data
  59. # Suggested parameters for `R`: fig.height=8, fig.width=6
  60. p <- ggplot() +
  61.   # Counts (colored points):
  62.   geom_point(data = filter(data, !is.na(n), (season != 11)),
  63.              aes(season, obstacle_name, color = n, fill = n),
  64.              size = 6,
  65.              shape = 21,
  66.              stroke = 1,
  67.              alpha = 0.2,
  68.              show.legend = FALSE,) +
  69.   geom_text(data = filter(data, !is.na(n), (season != 11)),
  70.             aes(season, obstacle_name, color = n, label = n),
  71.             size = 4,
  72.             show.legend = FALSE) +
  73.   # Zeros (these are the NAs):
  74.   geom_point(data = filter(data, is.na(n)),
  75.              aes(season, obstacle_name),
  76.              color = "grey",
  77.              fill = "grey",
  78.              size = 6,
  79.              shape = 21,
  80.              stroke = 1,
  81.              alpha = 0.2,
  82.              show.legend = FALSE,) +
  83.   geom_text(data = filter(data, is.na(n)),
  84.             aes(season, obstacle_name),
  85.             color = "grey50",
  86.             label = "0",
  87.             size = 4,
  88.             show.legend = FALSE) +
  89.   # Totals:
  90.   geom_point(data = filter(data, (season == 11)),
  91.              aes(season, obstacle_name),
  92.              color = "black",
  93.              fill = NA,
  94.              size = 6,
  95.              shape = 21,
  96.              stroke = 1,
  97.              alpha = 0.3,
  98.              show.legend = FALSE) +
  99.   geom_text(data = filter(data, (season == 11)),
  100.             aes(season, obstacle_name, label = n),
  101.             color = "black",
  102.             size = 3.5,
  103.             show.legend = FALSE) +
  104.   scale_x_continuous(breaks = 1:11, labels = c(paste(1:10), "Total")) + # change 11 to "Total"
  105.   scale_color_viridis_c(option = 'C', end = 0.75) +
  106.   scale_fill_viridis_c(option = 'C', end = 0.75) +
  107.   labs(x = "Season", y = "Obstacle",
  108.        title = expression(paste(bold("American Ninja Warrior"), " - Most common obstacles.")),
  109.        subtitle = str_wrap("The numbers indicate how many times the obstacle appeared in each season of American Ninja Warrior.", 60),
  110.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19")))) +
  111.   theme(panel.grid = element_line(linetype = "dashed", color = "grey80", size = 0.2),
  112.         plot.title = element_text(hjust = 0),
  113.         plot.subtitle = element_text(hjust = 0),
  114.         plot.caption = element_text(color = "grey50", hjust = 1, vjust = -1))
  115.  
  116. # Use `patchwork` to add a second caption:
  117. p <- p + plot_annotation(caption = expression(paste("Data source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-12-15"))),
  118.                          theme = theme(plot.caption = element_text(color = "grey50")))
  119. p
  120.  
  121. # Uncomment to save to disk:
  122. #ggsave(filename = str_c(getwd(), "/ANW.png"), plot = p, width = 6, height = 8)
  123. ###################################################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement