MetricT

Excess deaths by state in US

Jul 12th, 2020
2,217
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Largely based on technique from:  https://robjhyndman.com/hyndsight/excess-deaths/
  2.  
  3. library(dplyr)
  4. library(tidyr)
  5. library(ggplot2)
  6. library(janitor)
  7. library(cowplot)
  8.  
  9. ### Data on finalized (2014-2018) and provisional (2019-2020) all-causes death counts
  10. finalized_ss   <- readr::read_csv("https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD")
  11. provisional_ss <- readr::read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")
  12.  
  13. ### Take the data, clean the names up a bit, and pick the colums we want to use
  14. finalized <-
  15.   finalized_ss %>%
  16.   janitor::clean_names() %>%
  17.   select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  18.  
  19. provisional <-
  20.   provisional_ss %>%
  21.   janitor::clean_names() %>%
  22.   select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  23.  
  24. ### Take the finalized and provisional data, stack them together,
  25. ### and rename the rows to shorter names
  26. deaths <-
  27.   finalized %>%
  28.   bind_rows(provisional) %>%
  29.   rename(year = mmwr_year,
  30.          week = mmwr_week,
  31.          deaths = all_cause,
  32.          state = jurisdiction_of_occurrence) %>%
  33.  
  34.   ### Comment out the line below to get graphs for all 50 states
  35.   filter(state %in% c("Tennessee", "United States")) %>%
  36.  
  37.   mutate(thisyear = (year == 2020)) %>%
  38.   group_by(state, year) %>%
  39.  
  40.   ### Filter out the first week in 2014, as it appears to be a huge outlier
  41.   filter(!(year == 2014 & week == 1)) %>%
  42.  
  43.   ### Also filter out the most recent two weeks worth of data as the provisional
  44.   ### data it is based on as:
  45.   ###
  46.   ### "Last available weeks (not just lastone) are incomplete, and therefore,
  47.   ###  the user need to be extremely cautious about it. The share of
  48.   ###  incompleteness changes slightly every week. "
  49.   ###
  50.   ### https://www.mortality.org/Public/STMF_DOC/STMFmetadata.pdf
  51.   filter(!(year == 2020 & week == max(week))) %>%
  52.   filter(!(year == 2020 & week == max(week)))
  53.  
  54. ### Create a graph of weekly deaths by state
  55. g_weekly_deaths <-
  56.   ggplot(data = deaths, aes(x = week, y = deaths, group = year)) +
  57.   theme_bw() +
  58.   geom_line(aes(col = thisyear)) +
  59.   facet_wrap(~ state, scales = "free_y") +
  60.   scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "red")) +
  61.   guides(col = FALSE) +
  62.   ggtitle("Weekly deaths") +
  63.   labs(x = "Week", y = "") +
  64.   scale_y_continuous(labels = scales::comma)
  65. print(g_weekly_deaths)
  66.  
  67. ### How current is the data?   Shows the lag in weeks
  68. deaths %>%
  69.   filter(year == 2020) %>%
  70.   group_by(state) %>%
  71.   summarise(last_week = max(week)) %>%
  72.   mutate(
  73.     current_week = lubridate::week(Sys.Date()),
  74.     lag = current_week - last_week
  75.   ) %>%
  76.   data.frame()
  77.  
  78. ### Estimate excess deaths
  79. recent_deaths <- deaths %>%
  80.   filter(year >= 2015 & year <= 2019) %>%
  81.   group_by(state, week) %>%
  82.   summarise(median_deaths = median(deaths)) %>%
  83.   ungroup()
  84. excess_deaths <- deaths %>%
  85.   filter(year >= 2015) %>%
  86.   left_join(recent_deaths) %>%
  87.   mutate(excess = deaths - median_deaths) %>%
  88.   mutate(thisyear = (year == 2020))
  89.  
  90. ### Create a graph of weekly excess deaths by state
  91. g_excess_deaths <-
  92.   ggplot(data = excess_deaths, aes(x = week, y = excess, group = year)) +
  93.   theme_bw() +
  94.   geom_hline(yintercept = 0, col = "gray") +
  95.   geom_line(aes(col = thisyear)) +
  96.   facet_wrap(~ state, scales = "free_y") +
  97.   scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "red")) +
  98.   guides(col = FALSE) +
  99.   ggtitle("Excess deaths") +
  100.   labs(x = "Week", y = "") +
  101.   scale_y_continuous(labels = scales::comma)
  102. print(g_excess_deaths)
  103.  
  104. ### Summarize excess deaths
  105. excess_deaths %>%
  106.   filter(year == 2020) %>%
  107.   group_by(state) %>%
  108.   summarise(
  109.     excess = sum(excess),
  110.     last_week = max(week),
  111.     as_at = as.Date("2020-01-01") + 7 * (last_week - 1)
  112.   ) %>%
  113.   select(state, excess, as_at) %>%
  114.   data.frame()
  115.  
  116. plot_grid(g_weekly_deaths,
  117.           g_excess_deaths,
  118.           nrow = 2, ncol = 1, align = "hv")
RAW Paste Data