################################################################################ ### Compute Rt (the reproduction number) for the given locations and populations ################################################################################ ### Load the necessary libraries packages <- c("tidyverse", "lubridate", "tsibble", "EpiEstim", "curl", "forecast", "TTR") new_packages <- packages[!(packages %in% installed.packages()[, "Package"])] if (length(new_packages)) install.packages(new_packages, quiet = TRUE) invisible(lapply(packages, "library", quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)) ### Choose the counties you want to add to the facet graph. They will also graph ### in this order, so arrange them how you want. my_location <- c("Montgomery", "Robertson", "Sumner", "Cheatham", "Davidson", "Wilson", "Dickson", "Williamson", "Rutherford") start_dates <- tribble( ~county, ~first_day, "Montgomery", "2020-08-31", "Robertson", "2020-08-10", "Sumner", "2020-08-03", "Cheatham", "2020-08-13", "Davidson", "2020-08-04", "Wilson", "2020-08-17", "Dickson", "2020-08-03", "Williamson", "2020-08-07", "Rutherford", "2020-08-13", ) %>% mutate(first_day = as.Date(first_day)) ################################################################################ ### Fetching data. The EpiEstim::estimate_R() function expects to be handed a ### data.frame or tibble with two columns: "dates" (date of confirmed case) ### and "I" (number of new incidents/cases). I add a third column "location" ### so I can make facet graphs of multiple locations at once. ################################################################################ ### The TN Dept of Health maintains a separate spreadsheet of confirmed cases for ### school-age children (aged 5-18 years) for each county. Load that and use ### it as our population. ### Function to read Excel spreadsheet from URL's read_excel_url <- function(url, ...) { tf <- tempfile(fileext = ".xlsx") curl::curl_download(url, tf) return(readxl::read_excel(tf, ...)) } ### Load case data for schools data <- "https://www.tn.gov/content/dam/tn/health/documents/cedep/novel-coronavirus/datasets/Public-Dataset-Daily-County-School.XLSX" %>% read_excel_url() %>% mutate(DATE = as.Date(DATE)) %>% select(DATE, NEW_CASES, COUNTY) %>% rename(dates = DATE, I = NEW_CASES, location = COUNTY) %>% # Filter it to just the locations specified filter(location %in% my_location) %>% # Filter out cases wh#ere "I" is a NA filter(!is.na(I)) %>% # Also, estimate_R can't handle negative numbers, so set negative values to 0. # These are usually due to suspected cases being lumped in and subsequently # found to not be COVID. mutate(I = if_else(I < 0, 0, I)) ################################################################################ ### The model EpiEstim uses requires the "serial interval", ie the lag in time ### between infection and symptoms appearing. I use values from the CDC paper ### below. ### https://wwwnc.cdc.gov/eid/article/26/6/20-0357_article ################################################################################ si_mean <- 3.96 si_std <- 4.75 ### Create a blank tibble to hold our EpiEstim output Rt_tib <- tribble( ~dates, ~mean_r, ~std_r, ~location, ) ### Iterate over all locations and generate the EpiEstim estimate for Rt for (this_location in data %>% pull("location") %>% unique()) { print(this_location) data_subset <- data %>% filter(location == this_location) %>% select(dates, I) estimate <- estimate_R(data_subset, method = "parametric_si", config = make_config(list(mean_si = si_mean, std_si = si_std))) e_dates <- estimate$dates %>% as_tibble() %>% filter(row_number() > 7) e_values <- estimate$R %>% as_tibble() %>% select("Mean(R)", "Std(R)") r_data <- e_dates %>% bind_cols(e_values) %>% janitor::clean_names() %>% rename(dates = value) %>% mutate(location = this_location) Rt_tib <- Rt_tib %>% bind_rows(r_data) } ### Take the Rt_tib estimates and compute the 7-day moving average trend_tib <- Rt_tib %>% select(dates, location, mean_r) %>% mutate(location = paste("values:", location, sep = "")) %>% pivot_wider(id_cols = "dates", names_from = "location", values_from = "mean_r") %>% # Use mstl() %>% trendcyle() mutate(across(starts_with("values:"), .fns = list(trend = ~ (.) %>% ts() %>% mstl() %>% trendcycle()), .names = "{fn}_{col}")) %>% select("dates", starts_with("trend_values")) %>% rename_at(vars(starts_with("trend_values:")), ~ str_replace(., "trend_values:", "")) %>% pivot_longer(-dates, names_to = "location", values_to = "trend") ### Add our trend and mask mandate data to the Rt tibble Rt_tib <- Rt_tib %>% left_join(trend_tib, by = c("dates" = "dates", "location" = "location")) %>% left_join(start_dates, by = c("location" = "county")) ################################################################################ ### Render the graph and done ################################################################################ title <- "Estimated Rt among children ages 5-18" subtitle <- paste("assuming mean(serial interval) = ", si_mean, " days and std(serial interval) = ", si_std, " days", sep = "") ### Order counties in the order given in my_location at the top of the script Rt_tib$location <- factor(Rt_tib$location, levels = my_location) g <- ggplot(data = Rt_tib, aes(x = as.Date(dates))) + theme_linedraw() + theme(strip.text.x = element_text(size = 16)) + geom_point(aes(y = mean_r), size = 1.0) + geom_ribbon(aes(ymin = mean_r - std_r, ymax = mean_r + std_r), color = NA, fill = "darkgrey", alpha = 0.2) + geom_line(aes(y = trend), color = "darkseagreen4", size = 1.2) + geom_hline(yintercept = 1, linetype = "dashed") + geom_vline(aes(xintercept = as.Date(first_day)), linetype = "dotted") + facet_wrap(~location) + scale_y_continuous(limits = c(0, 2)) + labs(title = title, subtitle = subtitle, x = "", y = "Rt") print(g)