Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###################################################################################################
- # Washington Trails - Length and elevation gain of 1,958 hiking trails in Washington State
- # Created by: reddit.com/user/brianhaas19
- ### Link to data
- # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24
- ### Setup
- library(tidyverse)
- library(tidytuesdayR)
- library(scales)
- library(gganimate)
- theme_set(theme_bw())
- ### Load data
- tt <- tt_load("2020-11-24")
- hike <- tt$hike_data
- ### Clean data
- # The `length` variable needs to be separated into 2 variables; one numerical variable for the length in miles and
- # one categorical variable for type of trip. The `gain`, `highpoint` and `rating` variables simply need to be converted
- # to numeric variables. The post includes a script for cleaning the data:
- hike <- hike %>%
- mutate(
- trip = case_when(
- grepl("roundtrip",length) ~ "roundtrip",
- grepl("one-way",length) ~ "one-way",
- grepl("of trails",length) ~ "trails"),
- length_total = as.numeric(gsub("(\\d+[.]\\d+).*","\\1", length)) * ((trip == "one-way") + 1),
- gain = as.numeric(gain),
- highpoint = as.numeric(highpoint),
- rating = as.numeric(rating),
- location_general = gsub("(.*)\\s[-][-].*","\\1",location)
- )
- ### Wrangle data
- # The hikes will be plotted as triangles using `geom_poly`. The triangles will be have the origin (0, 0) as one point,
- # (`length_total`, 0) as the second point, and (`length_total`, `gain`) as the third point. The data be wrangled into
- # the appropriate format using `pivot_longer()`. In addition to one triangle to represent each hike, there will be a
- # triangle representing the average hike. The data will be facetted by `location_general` and the average hike will be
- # added to each facet. Using `free` scales looks the best but makes comparing across facets hard. The average hike being
- # included on every facet will help with this.
- hike_points <- hike %>%
- select(location_general, length_total, gain, highpoint) %>%
- mutate(
- id = row_number(),
- x1 = 0,
- y1 = 0,
- x2 = length_total,
- y2 = 0,
- x3 = length_total,
- y3 = gain
- ) %>%
- select(id, location_general, x1:y3)
- # Pivot the table to long format once each for the x and y coordinates:
- hike_x <- hike_points %>%
- pivot_longer(cols = c(x1, x2, x3), names_to = "x.position", values_to = "x") %>%
- select(id, location_general, x)
- hike_y <- hike_points %>%
- pivot_longer(cols = c(y1, y2, y3), names_to = "y.position", values_to = "y") %>%
- select(id, y)
- # Combine into one table of x-y coordinates:
- hike_xy <- cbind(hike_x, select(hike_y, -id))
- # Get the average for all of the data:
- average_peak <- hike %>%
- summarise(mean_length_total = mean(length_total),
- mean_gain = mean(gain))
- average_peak_points <- data.frame(
- id = c('1', '1', '1'),
- x = c(0, average_peak$mean_length_total, average_peak$mean_length_total),
- y = c(0, 0, average_peak$mean_gain)
- )
- ### Visualize
- # Colors (taken from the WTA website):
- green <- "#4A7628"
- grey <- "#231F20"
- lightgrey <- "#F5F5F5"
- # Plot
- # Recommended parameters for R notebook: fig.height=9, fig.width=12
- p <- ggplot(hike_xy, aes(x = x, y = y)) +
- geom_polygon(aes(group = id), color = green, fill = green, alpha = 0.1, size = 0.1) +
- geom_polygon(data = average_peak_points, aes(x, y, group = id), fill = grey, alpha = 0.4) +
- scale_y_continuous(labels = comma) +
- facet_wrap(~location_general, scales = "free") +
- labs(x = "Hike Length (miles)", y = "Hike Elevation Gain (ft)",
- title = expression(paste(bold("Washington Trails"), " - Length and elevation gain of 1,958 hiking trails in Washington State")),
- subtitle = "The grey shaded area shows what the overall average hike looks like compared to the hikes in that region.",
- caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-11-24")))) +
- theme(panel.grid = element_blank(),
- plot.background = element_rect(fill = lightgrey),
- strip.background = element_rect(fill = grey),
- strip.text = element_text(color = "white"),
- plot.caption = element_text(hjust = 0))
- p
- # Uncomment this line to save to disk:
- ggsave(str_c(getwd(), "/washington_hiking.png"),
- plot = p, height = 9, width = 12)
- ###################################################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement