Advertisement
MetricT

TN Excess deaths vs Official COVID deaths

Sep 4th, 2021
5,735
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ################################################################################
  2. ### Pull mortality data from the CDC and graph excess death - /u/MetricT
  3. ################################################################################
  4.  
  5. library(tidyverse)
  6. library(janitor)
  7. library(cowplot)
  8. library(lubridate)
  9. library(tsibble)
  10. library(geofacet)
  11. library(cowplot)
  12. library(zoo)
  13. library(forecast)
  14. library(broom)
  15.  
  16. # Load data on finalized (2014-2018) and provisional (2019-2020) mortality
  17. finalized_ss   <- read_csv("https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD")
  18. provisional_ss <- read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")
  19.  
  20.  
  21. # Take the data and clean it up a bit
  22. finalized   <- finalized_ss   %>% clean_names() %>% select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  23. provisional <- provisional_ss %>% clean_names() %>% select("jurisdiction_of_occurrence", "mmwr_year", "mmwr_week", "all_cause")
  24.  
  25.  
  26. # Combine both datasets and filter the subset of states we want to look at
  27. deaths <-
  28.   finalized %>%
  29.   bind_rows(provisional) %>%
  30.   rename(year = mmwr_year,
  31.          week = mmwr_week,
  32.          deaths = all_cause,
  33.          state = jurisdiction_of_occurrence) %>%
  34.  
  35.   filter(state %in% c("Tennessee")) %>%
  36.  
  37.   # Quick hack to remove "week 53" in 2014 because yearweek() can't handle it
  38.   # This wouldn't be necessary if the spreadsheet included dates, it's an aliasing problem
  39.   filter(week != 53) %>%
  40.  
  41.   # Turn the year and week into a yearweek() datatype
  42.   mutate(yearweek = yearweek(date_decimal(year + (week - 1) / 52))) %>%
  43.   select(yearweek, deaths) %>%
  44.   arrange(yearweek) %>%
  45.  
  46.   # Remove the last week's worth of data since TN lags in sending mortality data to CDC
  47.   filter(row_number() <= n() - 1)
  48.  
  49. # Use a feasts STL() model to seasonally-adjust the deaths data
  50. deaths_mod <-
  51.   deaths %>%
  52.   as_tsibble(index = "yearweek") %>%
  53.   fill_gaps() %>%
  54.   mutate_if(is.numeric, ~ na.locf(.)) %>%
  55.   model(STL(deaths ~ trend() + season(window = "periodic"))) %>%
  56.   components()
  57.  
  58.  
  59. # Add the STL data values to our original death tibble
  60. deaths <-
  61.   deaths %>%
  62.   left_join(deaths_mod %>% as_tibble() %>% select(-.model, -deaths), by = "yearweek") %>%
  63.  
  64.   # Ditch the "yearweek" at this point by converting it into a regular date.
  65.   # "yearweek" was necessary for the tsibble model, but it's a weird way of
  66.   # tracking date that causes weird glitches.  For example, week("2014 W01") = 52.
  67.   mutate(date = as.Date(yearweek)) %>%
  68.  
  69.   # Add a decimal date to make linear regression in the next step easier
  70.   mutate(decimal_date = decimal_date(date))
  71.  
  72.  
  73. # Compute a linear regression using data from 2014-2019 (before COVID hit) so
  74. # we can subtract off deaths due to population/population growth.
  75. lin_reg <-
  76.   deaths %>%
  77.   filter(date < as.Date("2020-01-01")) %>%
  78.   lm(season_adjust ~ decimal_date, data = .) %>%
  79.   tidy() %>%
  80.   pull("estimate")
  81.  
  82.  
  83. # Add a "growth" term based on that regression, so we can subtract growth off and get excess deaths
  84. deaths <-
  85.   deaths %>%
  86.   mutate(growth = lin_reg[1] + lin_reg[2] * decimal_date) %>%
  87.   mutate(excess_deaths = season_adjust - growth) %>%
  88.   select(yearweek, date, decimal_date, deaths, trend, season_year, remainder, season_adjust, growth, excess_deaths) %>%
  89.   pivot_longer(-c(yearweek, date, decimal_date), names_to = "series", values_to = "value")
  90.  
  91.  
  92. ### Fetch total/new official deaths from TN DoH so we can graph against excess deaths
  93.  
  94. read_excel_url <- function(url, ...) {
  95.   tf <- tempfile(fileext = ".xlsx")
  96.   curl::curl_download(url, tf)
  97.   return(readxl::read_excel(tf, ...))
  98. }
  99.  
  100. official_deaths <-
  101.   "https://www.tn.gov/content/dam/tn/health/documents/cedep/novel-coronavirus/datasets/Public-Dataset-Daily-Case-Info.XLSX" %>%
  102.   read_excel_url(col_types = NULL) %>%
  103.   select(DATE, NEW_DEATHS, TOTAL_DEATHS) %>%
  104.   rename(date         = DATE,
  105.          new_deaths   = NEW_DEATHS,
  106.          total_deaths = TOTAL_DEATHS) %>%
  107.   mutate(date = as.Date(date),
  108.          yearweek = yearweek(date)) %>%
  109.   select(yearweek, new_deaths, total_deaths) %>%
  110.   group_by(yearweek) %>%
  111.   summarize(new_deaths = sum(new_deaths),
  112.             total_deaths = max(total_deaths)) %>%
  113.   ungroup() %>%
  114.   mutate(date = as.Date(yearweek)) %>%
  115.   select(yearweek, date, new_deaths, total_deaths)
  116.  
  117.  
  118. # Excess deaths since COVID arrived
  119. g_excess_deaths_covid_era <-
  120.   deaths %>%
  121.   filter(series %in% c("excess_deaths")) %>%
  122.   filter(date >= as.Date("2020-01-01"),
  123.          date <  as.Date("2021-07-01")) %>%
  124.   ggplot() +
  125.   theme_bw() +
  126.   geom_line(aes(x = date, y = value)) +
  127.   geom_line(data = official_deaths, aes(x = date, y = new_deaths), color = "darkblue", linetype = "dashed") +  
  128.   geom_hline(yintercept = 0) + #, linetype = "dashed") +
  129.   geom_vline(xintercept = as.Date("2020-03-20"), color = "darkred", linetype = "dotted") +
  130.   annotate("text", size = 4, label = "1st Official TN COVID Death - 2020-03-20", x = as.Date("2020-03-12"), y = 500, angle = "90", color = "darkred") +
  131.   scale_x_date(breaks = pretty_breaks(10)) +
  132.   scale_y_continuous(breaks = pretty_breaks(10), labels = scales::comma) +
  133.   labs(x = "", y = "New Excess Deaths per Week",
  134.        title = "Official Weekly COVID deaths (blue) vs Weekly Excess Deaths in Tennessee, Jan 2020 - Jun 2021 (black)",
  135.        #caption = "Mortality data to compute Excess Deaths from CDC.gov\nOfficial COVID Deaths from Tennessee Dept. of Health"
  136.        )
  137. print(g_excess_deaths_covid_era)
  138.  
  139.  
  140. # Cumulative Excess deaths since COVID arrived
  141. g_tot_excess_deaths_covid_era <-
  142.   deaths %>%
  143.   filter(series %in% c("excess_deaths")) %>%
  144.   filter(date >= as.Date("2020-01-01"),
  145.          date <  as.Date("2021-07-01")) %>%
  146.   ggplot() +
  147.   theme_bw() +
  148.   geom_line(aes(x = date, y = cumsum(value))) +
  149.   geom_line(data = official_deaths, aes(x = date, y = cumsum(new_deaths)), color = "darkblue", linetype = "dashed") +  
  150.   geom_hline(yintercept = 0) +
  151.   geom_vline(xintercept = as.Date("2020-03-20"), color = "darkred", linetype = "dotted") +
  152.   annotate("text", size = 4, label = "1st Official TN COVID Death - 2020-03-20", x = as.Date("2020-03-12"), y = 10000, angle = "90", color = "darkred") +
  153.   scale_x_date(breaks = pretty_breaks(10)) +
  154.   scale_y_continuous(breaks = pretty_breaks(10), labels = scales::comma) +
  155.   labs(x = "", y = "Cumulative Excess Deaths",
  156.        title = "Cumulative Official Weekly COVID deaths (blue) vs Weekly Excess Deaths in Tennessee, Jan 2020 - Jun 2021 (black)",
  157.        caption = "Mortality data to compute Excess Deaths from CDC.gov\nOfficial COVID Deaths from Tennessee Dept. of Health")
  158. print(g_tot_excess_deaths_covid_era)
  159.  
  160. plot_grid(g_excess_deaths_covid_era, g_tot_excess_deaths_covid_era, nrow = 2, ncol = 1, align = "hv")
Advertisement
RAW Paste Data Copied
Advertisement