brianhaas19

Washington Trails

Nov 28th, 2020
841
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###################################################################################################
  2. # Washington Trails - Length and elevation gain of 1,958 hiking trails in Washington State
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. ### Link to data
  6. # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24
  7.  
  8. ### Setup
  9. library(tidyverse)
  10. library(tidytuesdayR)
  11. library(scales)
  12. library(gganimate)
  13. theme_set(theme_bw())
  14.  
  15. ### Load data
  16. tt <- tt_load("2020-11-24")
  17. hike <- tt$hike_data
  18.  
  19. ### Clean data
  20. # The `length` variable needs to be separated into 2 variables; one numerical variable for the length in miles and
  21. # one categorical variable for type of trip. The `gain`, `highpoint` and `rating` variables simply need to be converted
  22. # to numeric variables. The post includes a script for cleaning the data:
  23. hike <- hike %>%
  24.   mutate(
  25.     trip = case_when(
  26.       grepl("roundtrip",length) ~ "roundtrip",
  27.       grepl("one-way",length) ~ "one-way",
  28.       grepl("of trails",length) ~ "trails"),
  29.    
  30.     length_total = as.numeric(gsub("(\\d+[.]\\d+).*","\\1", length)) * ((trip == "one-way") + 1),
  31.    
  32.     gain = as.numeric(gain),
  33.     highpoint = as.numeric(highpoint),
  34.     rating = as.numeric(rating),
  35.    
  36.     location_general = gsub("(.*)\\s[-][-].*","\\1",location)
  37.   )
  38.  
  39. ### Wrangle data
  40. # The hikes will be plotted as triangles using `geom_poly`. The triangles will be have the origin (0, 0) as one point,
  41. # (`length_total`, 0) as the second point, and (`length_total`, `gain`) as the third point. The data be wrangled into
  42. # the appropriate format using `pivot_longer()`. In addition to one triangle to represent each hike, there will be a
  43. # triangle representing the average hike. The data will be facetted by `location_general` and the average hike will be
  44. # added to each facet. Using `free` scales looks the best but makes comparing across facets hard. The average hike being
  45. # included on every facet will help with this.
  46. hike_points <- hike %>%
  47.   select(location_general, length_total, gain, highpoint) %>%
  48.   mutate(
  49.     id = row_number(),
  50.     x1 = 0,
  51.     y1 = 0,
  52.     x2 = length_total,
  53.     y2 = 0,
  54.     x3 = length_total,
  55.     y3 = gain
  56.   ) %>%
  57.   select(id, location_general, x1:y3)
  58.  
  59. # Pivot the table to long format once each for the x and y coordinates:
  60. hike_x <- hike_points %>%
  61.   pivot_longer(cols = c(x1, x2, x3), names_to = "x.position", values_to = "x") %>%
  62.   select(id, location_general, x)
  63. hike_y <- hike_points %>%
  64.   pivot_longer(cols = c(y1, y2, y3), names_to = "y.position", values_to = "y") %>%
  65.   select(id, y)
  66.  
  67. # Combine into one table of x-y coordinates:
  68. hike_xy <- cbind(hike_x, select(hike_y, -id))
  69.  
  70. # Get the average for all of the data:
  71. average_peak <- hike %>%
  72.   summarise(mean_length_total = mean(length_total),
  73.             mean_gain = mean(gain))
  74. average_peak_points <- data.frame(
  75.   id = c('1', '1', '1'),
  76.   x = c(0, average_peak$mean_length_total, average_peak$mean_length_total),
  77.   y = c(0, 0, average_peak$mean_gain)
  78. )
  79.  
  80. ### Visualize
  81. # Colors (taken from the WTA website):
  82. green <- "#4A7628"
  83. grey <- "#231F20"
  84. lightgrey <- "#F5F5F5"
  85.  
  86. # Plot
  87. # Recommended parameters for R notebook: fig.height=9, fig.width=12
  88. p <- ggplot(hike_xy, aes(x = x, y = y)) +
  89.   geom_polygon(aes(group = id), color = green, fill = green, alpha = 0.1, size = 0.1) +
  90.   geom_polygon(data = average_peak_points, aes(x, y, group = id), fill = grey, alpha = 0.4) +
  91.   scale_y_continuous(labels = comma) +
  92.   facet_wrap(~location_general, scales = "free") +
  93.   labs(x = "Hike Length (miles)", y = "Hike Elevation Gain (ft)",
  94.        title = expression(paste(bold("Washington Trails"), " - Length and elevation gain of 1,958 hiking trails in Washington State")),
  95.        subtitle = "The grey shaded area shows what the overall average hike looks like compared to the hikes in that region.",
  96.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24")))) +
  97.   theme(panel.grid = element_blank(),
  98.         plot.background = element_rect(fill = lightgrey),
  99.         strip.background = element_rect(fill = grey),
  100.         strip.text = element_text(color = "white"),
  101.         plot.caption = element_text(hjust = 0))
  102. p
  103.  
  104. # Uncomment this line to save to disk:
  105. ggsave(str_c(getwd(), "/washington_hiking.png"),
  106.        plot = p, height = 9, width = 12)
  107. ###################################################################################################
RAW Paste Data