brianhaas19

Washington Trails

Nov 28th, 2020
1,033
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###################################################################################################
  2. # Washington Trails - Length and elevation gain of 1,958 hiking trails in Washington State
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. ### Link to data
  6. # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24
  7.  
  8. ### Setup
  9. library(tidyverse)
  10. library(tidytuesdayR)
  11. library(scales)
  12. library(gganimate)
  13. theme_set(theme_bw())
  14.  
  15. ### Load data
  16. tt <- tt_load("2020-11-24")
  17. hike <- tt$hike_data
  18.  
  19. ### Clean data
  20. # The `length` variable needs to be separated into 2 variables; one numerical variable for the length in miles and
  21. # one categorical variable for type of trip. The `gain`, `highpoint` and `rating` variables simply need to be converted
  22. # to numeric variables. The post includes a script for cleaning the data:
  23. hike <- hike %>%
  24.   mutate(
  25.     trip = case_when(
  26.       grepl("roundtrip",length) ~ "roundtrip",
  27.       grepl("one-way",length) ~ "one-way",
  28.       grepl("of trails",length) ~ "trails"),
  29.    
  30.     length_total = as.numeric(gsub("(\\d+[.]\\d+).*","\\1", length)) * ((trip == "one-way") + 1),
  31.    
  32.     gain = as.numeric(gain),
  33.     highpoint = as.numeric(highpoint),
  34.     rating = as.numeric(rating),
  35.    
  36.     location_general = gsub("(.*)\\s[-][-].*","\\1",location)
  37.   )
  38.  
  39. ### Wrangle data
  40. # The hikes will be plotted as triangles using `geom_poly`. The triangles will be have the origin (0, 0) as one point,
  41. # (`length_total`, 0) as the second point, and (`length_total`, `gain`) as the third point. The data be wrangled into
  42. # the appropriate format using `pivot_longer()`. In addition to one triangle to represent each hike, there will be a
  43. # triangle representing the average hike. The data will be facetted by `location_general` and the average hike will be
  44. # added to each facet. Using `free` scales looks the best but makes comparing across facets hard. The average hike being
  45. # included on every facet will help with this.
  46. hike_points <- hike %>%
  47.   select(location_general, length_total, gain, highpoint) %>%
  48.   mutate(
  49.     id = row_number(),
  50.     x1 = 0,
  51.     y1 = 0,
  52.     x2 = length_total,
  53.     y2 = 0,
  54.     x3 = length_total,
  55.     y3 = gain
  56.   ) %>%
  57.   select(id, location_general, x1:y3)
  58.  
  59. # Pivot the table to long format once each for the x and y coordinates:
  60. hike_x <- hike_points %>%
  61.   pivot_longer(cols = c(x1, x2, x3), names_to = "x.position", values_to = "x") %>%
  62.   select(id, location_general, x)
  63. hike_y <- hike_points %>%
  64.   pivot_longer(cols = c(y1, y2, y3), names_to = "y.position", values_to = "y") %>%
  65.   select(id, y)
  66.  
  67. # Combine into one table of x-y coordinates:
  68. hike_xy <- cbind(hike_x, select(hike_y, -id))
  69.  
  70. # Get the average for all of the data:
  71. average_peak <- hike %>%
  72.   summarise(mean_length_total = mean(length_total),
  73.             mean_gain = mean(gain))
  74. average_peak_points <- data.frame(
  75.   id = c('1', '1', '1'),
  76.   x = c(0, average_peak$mean_length_total, average_peak$mean_length_total),
  77.   y = c(0, 0, average_peak$mean_gain)
  78. )
  79.  
  80. ### Visualize
  81. # Colors (taken from the WTA website):
  82. green <- "#4A7628"
  83. grey <- "#231F20"
  84. lightgrey <- "#F5F5F5"
  85.  
  86. # Plot
  87. # Recommended parameters for R notebook: fig.height=9, fig.width=12
  88. p <- ggplot(hike_xy, aes(x = x, y = y)) +
  89.   geom_polygon(aes(group = id), color = green, fill = green, alpha = 0.1, size = 0.1) +
  90.   geom_polygon(data = average_peak_points, aes(x, y, group = id), fill = grey, alpha = 0.4) +
  91.   scale_y_continuous(labels = comma) +
  92.   facet_wrap(~location_general, scales = "free") +
  93.   labs(x = "Hike Length (miles)", y = "Hike Elevation Gain (ft)",
  94.        title = expression(paste(bold("Washington Trails"), " - Length and elevation gain of 1,958 hiking trails in Washington State")),
  95.        subtitle = "The grey shaded area shows what the overall average hike looks like compared to the hikes in that region.",
  96.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24")))) +
  97.   theme(panel.grid = element_blank(),
  98.         plot.background = element_rect(fill = lightgrey),
  99.         strip.background = element_rect(fill = grey),
  100.         strip.text = element_text(color = "white"),
  101.         plot.caption = element_text(hjust = 0))
  102. p
  103.  
  104. # Uncomment this line to save to disk:
  105. ggsave(str_c(getwd(), "/washington_hiking.png"),
  106.        plot = p, height = 9, width = 12)
  107. ###################################################################################################
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×