binkleym

Excess Deaths in the United States

Aug 25th, 2020
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.80 KB | None | 0 0
  1. # Pull mortality data from the CDC and plot excess deaths - Mat Binkley
  2.  
  3. library(dplyr)
  4. library(tidyr)
  5. library(ggplot2)
  6. library(janitor)
  7. library(cowplot)
  8.  
  9. ### Data on finalized (2014-2018) and provisional (2019-2020) all-causes death counts
  10. finalized_ss   <- readr::read_csv("https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD")
  11. provisional_ss <- readr::read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")
  12.  
  13. ### Take the data, clean the names up a bit, and pick the columns we want
  14. finalized <-
  15.   finalized_ss %>%
  16.   janitor::clean_names() %>%
  17.   select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  18.  
  19. provisional <-
  20.   provisional_ss %>%
  21.   janitor::clean_names() %>%
  22.   select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  23.  
  24. ### Take the finalized and provisional data, stack them together,
  25. ### and rename the rows to shorter names
  26. deaths <-
  27.   finalized %>%
  28.   bind_rows(provisional) %>%
  29.   rename(year = mmwr_year,
  30.          week = mmwr_week,
  31.          deaths = all_cause,
  32.          state = jurisdiction_of_occurrence) %>%
  33.   filter(state %in% c("United States")) %>%  
  34.   mutate(thisyear = (year == 2020)) %>%
  35.   group_by(state, year) %>%
  36.  
  37.   ### Only look at data from 2015 onward
  38.   filter(!(year == 2014 & week == 1)) %>%
  39.  
  40.   ### Also filter out the most recent two weeks worth of data as the provisional
  41.   ### data it is based on as:
  42.   ###
  43.   ### "Last available weeks (not just lastone) are incomplete, and therefore,
  44.   ###  the user need to be extremely cautious about it. The share of
  45.   ###  incompleteness changes slightly every week. "
  46.   ###
  47.   ### https://www.mortality.org/Public/STMF_DOC/STMFmetadata.pdf
  48.   filter(!(year == 2020 & week %in% seq(max(week) - 2, max(week))))
  49.  
  50.  
  51. ### Create a graph of weekly deaths by state
  52. g_weekly_deaths <-
  53.   ggplot(data = deaths, aes(x = week, y = deaths, group = year)) +
  54.   theme_bw() +
  55.   geom_line(aes(col = thisyear)) +
  56.   facet_wrap(~ state, scales = "free_y") +
  57.   scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "red")) +
  58.   guides(col = FALSE) +
  59.   ggtitle("Weekly deaths") +
  60.   labs(x = "Week", y = "") +
  61.   geom_hline(yintercept = 0, col = "gray") +
  62.   scale_y_continuous(labels = scales::comma, limits = c(0, 80000))
  63. print(g_weekly_deaths)
  64.  
  65.  
  66. ### How current is the data?   Shows the lag in weeks
  67. deaths %>%
  68.   filter(year == 2020) %>%
  69.   group_by(state) %>%
  70.   summarise(last_week = max(week)) %>%
  71.   mutate(
  72.     current_week = lubridate::week(Sys.Date()),
  73.     lag = current_week - last_week
  74.   ) %>%
  75.   data.frame()
  76.  
  77. ### Estimate excess deaths
  78. recent_deaths <- deaths %>%
  79.   filter(year >= 2015 & year <= 2019) %>%
  80.   group_by(state, week) %>%
  81.   summarise(median_deaths = median(deaths)) %>%
  82.   ungroup()
  83. excess_deaths <- deaths %>%
  84.   filter(year >= 2015) %>%
  85.   left_join(recent_deaths) %>%
  86.   mutate(excess = deaths - median_deaths) %>%
  87.   mutate(thisyear = (year == 2020))
  88.  
  89. ### Create a graph of weekly excess deaths by state
  90. g_excess_deaths <-
  91.   ggplot(data = excess_deaths, aes(x = week, y = excess, group = year)) +
  92.   theme_bw() +
  93.   geom_hline(yintercept = 0, col = "gray") +
  94.   geom_line(aes(col = thisyear)) +
  95.   facet_wrap(~ state, scales = "free_y") +
  96.   scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "red")) +
  97.   guides(col = FALSE) +
  98.   ggtitle("Excess deaths") +
  99.   labs(x = "Week", y = "") +
  100.   scale_y_continuous(labels = scales::comma)
  101. print(g_excess_deaths)
  102.  
  103. ### Summarize excess deaths
  104. excess_deaths %>%
  105.   filter(year == 2020) %>%
  106.   group_by(state) %>%
  107.   summarise(
  108.     excess = sum(excess),
  109.     last_week = max(week),
  110.     as_at = as.Date("2020-01-01") + 7 * (last_week - 1)
  111.   ) %>%
  112.   select(state, excess, as_at) %>%
  113.   data.frame()
  114.  
  115. plot_grid(g_weekly_deaths,
  116.           g_excess_deaths,
  117.           nrow = 1, ncol = 2, align = "hv")
Add Comment
Please, Sign In to add comment