Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###################################################################################################
- # Great American Beer Festival - Cumulative distribution of gold medal winning breweries from 1987 to 2020
- # Created by: reddit.com/user/brianhaas19
- # Link to data
- # https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-10-20/readme.md
- # Setup
- library(tidyverse)
- library(tidytuesdayR)
- library(scales)
- library(gganimate)
- library(transformr)
- theme_set(theme_bw())
- # Load data
- ### Load clean data set of GABF gold medal winners from CSV:
- gold_medal_map <- read_csv("https://pastebin.com/raw/TLpKkMnG")
- # Load additional data
- ### State ID's:
- state_ids <- structure(list(state = c("alabama", "alaska", "arizona", "arkansas",
- "california", "colorado", "connecticut", "delaware", "district of columbia",
- "florida", "georgia", "hawaii", "idaho", "illinois", "indiana",
- "iowa", "kansas", "kentucky", "louisiana", "maine", "maryland",
- "massachusetts", "michigan", "minnesota", "mississippi", "missouri",
- "montana", "nebraska", "nevada", "new hampshire", "new jersey",
- "new mexico", "new york", "north carolina", "north dakota", "ohio",
- "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina",
- "south dakota", "tennessee", "texas", "utah", "vermont", "virginia",
- "washington", "west virginia", "wisconsin", "wyoming"), id = c("AL",
- "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI",
- "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI",
- "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC",
- "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT",
- "VT", "VA", "WA", "WV", "WI", "WY")), class = c("spec_tbl_df",
- "tbl_df", "tbl", "data.frame"), row.names = c(NA, -51L), spec = structure(list(
- cols = list(state = structure(list(), class = c("collector_character",
- "collector")), id = structure(list(), class = c("collector_character",
- "collector"))), default = structure(list(), class = c("collector_guess",
- "collector")), skip = 1L), class = "col_spec"))
- ### Get map data from the `maps` package:
- states <- map_data("state")
- names(states) <- names(states) %>% str_replace("region", "state")
- # Colors and Themes
- colors <- c("GABF_blue" = "#005F85",
- "GABF_gold" = "#C69F32")
- map_theme <- theme(
- axis.text = element_blank(),
- axis.line = element_blank(),
- axis.ticks = element_blank(),
- panel.border = element_blank(),
- panel.grid = element_blank(),
- axis.title = element_blank(),
- plot.title = element_text(color = colors["GABF_blue"]),
- plot.subtitle = element_text(color = colors["GABF_blue"], size = 18),
- plot.caption = element_text(color = colors["GABF_blue"], hjust = 0)
- )
- # Wrangle data
- ### Create a data frame showing the cumulative number of gold medals won over time:
- cum_medals <- gold_medal_map %>%
- group_by(state_id, year) %>%
- mutate(sum_medal = sum(n)) %>%
- distinct(year, state_id, sum_medal) %>%
- ungroup() %>%
- group_by(state_id) %>%
- mutate(cum_medal = cumsum(sum_medal)) %>%
- select(year, state_id, cum_medal)
- ### Build a separate data frame with one row for each year from 1987 to 2020 and state.
- ### Even though not every state won a medal every year they will need to have a row in this table.
- ### Then fill in the missing years with the most recent total:
- ### Build the data frame skeleton:
- cum_medals_df <- expand_grid(
- year = 1987:2020,
- state_id = filter(state_ids, !(id %in% c("AK", "HI", "DC")))$id
- ) %>%
- arrange(state_id, year) # this step is crucial so that dplyr::fill() works later
- ### Add the cumulative medal totals:
- cum_medals_df <- cum_medals_df %>%
- left_join(cum_medals, by = c("year", "state_id"))
- ### Enter a zero for any states who did not win a medal in 1987 so that we can use `fill(direction = "down")`:
- cum_medals_df[(cum_medals_df$year == 1987) & is.na(cum_medals_df$cum_medal), "cum_medal"] <- 0
- ### Fill the missing values with the most recent total:
- cum_medals_df <- cum_medals_df %>%
- fill(cum_medal)
- cum_medals_df
- ### Now that the table has been filled go back and replace all zeroes with NA so that they appear as gray on the map.
- ### This has to be done after using `fill()`, otherwise the filling process doesn't work.
- ### For example it starts grabbing values from other states and years to fill the `NA`s for 1987.
- cum_medals_df$cum_medal[cum_medals_df$cum_medal == 0] <- NA
- ### Finally, build a data frame of all years and cumulative medal totals combined with the map data:
- ### (large data frame, ~500k rows)
- ### Ref: https://stackoverflow.com/a/28506694
- repeat_data_frame <- function(d, n) {
- if ("data.table" %in% class(d)) return(d[rep(seq_len(nrow(d)), n)])
- return(d[rep(seq_len(nrow(d)), n), ])
- }
- states_repeat <- repeat_data_frame(states, 34)
- years <- rep(1987:2020, each = nrow(states))
- states_repeat$year <- years
- states_cum_medals <- states_repeat %>%
- left_join(state_ids, by = "state") %>%
- rename(state_id = id) %>%
- left_join(cum_medals_df, by = c("year", "state_id"))
- states_cum_medals
- # Visualize
- ### Static plot with all medals and states filled to 2020 levels (cumulative totals):
- ### Recommended parameters for R notebook: fig.height=6, fig.width=12
- g <- ggplot(data = filter(states_cum_medals, year == 2020)) +
- geom_polygon(aes(x = long, y = lat, group = group, fill = cum_medal),
- size = 0.2,
- color = colors["GABF_blue"]) +
- geom_jitter(data = gold_medal_map,
- aes(long, lat, size = n),
- color = colors["GABF_gold"],
- alpha = 0.3,
- show.legend = FALSE) +
- scale_size_continuous(range = c(3, 8)) +
- scale_fill_gradient("Total", breaks = seq(50, 250, by = 50),
- low = "white", high = colors["GABF_blue"], limits = c(0, 282)) +
- guides(fill = guide_colourbar(barheight = unit(3.5, "in"), # https://github.com/tidyverse/ggplot2/pull/2541
- ticks.colour = "black", frame.colour = "black")) +
- labs(title = expression(paste(bold("Great American Beer Festival"), " - Cumulative distribution of gold medal winning breweries from 1987 to 2020")),
- caption = expression(paste("Created by: ", italic("reddit.com/u/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-10-20/readme.md")))) +
- coord_quickmap() +
- map_theme
- g
- ### ggsave(str_c(getwd(), "/GABF_cum_total.png"), plot = g, width = 12, height = 6) # uncomment to save to disk
- ### Animated plot with annual medals awarede and cumulative medal totals:
- num_years <- n_distinct(gold_medal_map$year)
- p <- ggplot() +
- geom_polygon(data = states_cum_medals,
- aes(x = long, y = lat, group = group, fill = cum_medal),
- size = 0.2,
- color = colors["GABF_blue"]) +
- geom_point(data = gold_medal_map,
- aes(long, lat, size = n),
- color = colors["GABF_gold"],
- alpha = 0.7,
- show.legend = FALSE) +
- ggrepel::geom_label_repel(data = filter(gold_medal_map, n > 1),
- label.size = 0,
- fill = alpha(c("white"), 0.6),
- aes(long, lat, label = label),
- color = colors["GABF_blue"],
- seed = 1) +
- scale_size_continuous(range = c(3, 8)) +
- scale_fill_gradient("Total", breaks = seq(50, 250, by = 50),
- low = "white", high = colors["GABF_blue"], limits = c(0, 282)) +
- guides(fill = guide_colourbar(barheight = unit(3.5, "in"), # https://github.com/tidyverse/ggplot2/pull/2541
- ticks.colour = "black", frame.colour = "black")) +
- coord_quickmap() +
- map_theme +
- labs(title = expression(paste(bold("Great American Beer Festival"), " - Cumulative distribution of gold medal winning breweries from 1987 to 2020")),
- subtitle = "Year: {next_state}",
- caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-10-20/readme.md")))) +
- transition_states(year, transition_length = 1, state_length = 1)
- animate(p,
- nframes = num_years * 2, duration = num_years*2,
- height = 6, width = 12, units = "in", res = 150)
- #anim_save(str_c(getwd(), "/GABF_cum_label.gif"), plot = p) # uncomment to save to disk
- ###################################################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement