binkleym

Rt of virus in TN school-age populations

Aug 10th, 2020 (edited)
426
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ################################################################################
  2. ### Compute Rt (the reproduction number) for the given locations and populations
  3. ################################################################################
  4. ### Load the necessary libraries
  5. packages <-
  6.   c("tidyverse", "lubridate", "tsibble", "EpiEstim", "curl", "forecast", "TTR")
  7.  
  8. new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
  9. if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
  10. invisible(lapply(packages, "library", quietly = TRUE,
  11.                  character.only = TRUE, warn.conflicts = FALSE))
  12.  
  13. ### Choose the counties you want to add to the facet graph.  They will also graph
  14. ### in this order, so arrange them how you want.
  15. my_location <-
  16.   c("Montgomery", "Robertson", "Sumner",
  17.     "Cheatham",   "Davidson",  "Wilson",
  18.     "Dickson",    "Williamson", "Rutherford")
  19.  
  20. start_dates <-
  21.   tribble(
  22.   ~county,       ~first_day,
  23.   "Montgomery",  "2020-08-31",
  24.   "Robertson",   "2020-08-10",
  25.   "Sumner",      "2020-08-03",
  26.   "Cheatham",    "2020-08-13",
  27.   "Davidson",    "2020-08-04",
  28.   "Wilson",      "2020-08-17",
  29.   "Dickson",     "2020-08-03",
  30.   "Williamson",  "2020-08-07",
  31.   "Rutherford",  "2020-08-13",
  32.   ) %>% mutate(first_day = as.Date(first_day))
  33.  
  34. ################################################################################
  35. ### Fetching data.   The EpiEstim::estimate_R() function expects to be handed a
  36. ### data.frame or tibble with two columns:   "dates" (date of confirmed case)
  37. ### and "I" (number of new incidents/cases).   I add a third column "location"
  38. ### so I can make facet graphs of multiple locations at once.
  39. ################################################################################
  40.  
  41. ### The TN Dept of Health maintains a separate spreadsheet of confirmed cases for
  42. ### school-age children (aged 5-18 years) for each county.    Load that and use
  43. ### it as our population.
  44.  
  45. ### Function to read Excel spreadsheet from URL's
  46. read_excel_url <- function(url, ...) {
  47.   tf <- tempfile(fileext = ".xlsx")
  48.   curl::curl_download(url, tf)
  49.   return(readxl::read_excel(tf, ...))
  50. }
  51.  
  52. ### Load case data for schools
  53. data <-
  54.   "https://www.tn.gov/content/dam/tn/health/documents/cedep/novel-coronavirus/datasets/Public-Dataset-Daily-County-School.XLSX" %>%
  55.   read_excel_url() %>%
  56.   mutate(DATE = as.Date(DATE)) %>%
  57.   select(DATE, NEW_CASES, COUNTY) %>%
  58.   rename(dates    = DATE,
  59.          I        = NEW_CASES,
  60.          location = COUNTY) %>%
  61.  
  62.   # Filter it to just the locations specified
  63.   filter(location %in% my_location) %>%
  64.  
  65.   # Filter out cases wh#ere "I" is a NA
  66.   filter(!is.na(I)) %>%
  67.  
  68.   # Also, estimate_R can't handle negative numbers, so set negative values to 0.
  69.   # These are usually due to suspected cases being lumped in and subsequently
  70.   # found to not be COVID.
  71.   mutate(I = if_else(I < 0, 0, I))
  72.  
  73. ################################################################################
  74. ### The model EpiEstim uses requires the "serial interval", ie the lag in time
  75. ### between infection and symptoms appearing.    I use values from the CDC paper
  76. ### below.
  77. ### https://wwwnc.cdc.gov/eid/article/26/6/20-0357_article
  78. ################################################################################
  79.  
  80. si_mean <- 3.96
  81. si_std  <- 4.75
  82.  
  83. ### Create a blank tibble to hold our EpiEstim output
  84. Rt_tib <-
  85.   tribble(
  86.     ~dates, ~mean_r, ~std_r, ~location,
  87.   )
  88.  
  89. ### Iterate over all locations and generate the EpiEstim estimate for Rt
  90. for (this_location in data %>% pull("location") %>% unique()) {
  91.   print(this_location)
  92.   data_subset <-
  93.     data %>%
  94.     filter(location == this_location) %>%
  95.     select(dates, I)
  96.   estimate <-
  97.     estimate_R(data_subset,
  98.                method = "parametric_si",
  99.                config = make_config(list(mean_si = si_mean,
  100.                                           std_si = si_std)))
  101.   e_dates  <- estimate$dates %>% as_tibble() %>% filter(row_number() > 7)
  102.   e_values <- estimate$R %>% as_tibble() %>% select("Mean(R)", "Std(R)")
  103.   r_data <-
  104.     e_dates %>%
  105.     bind_cols(e_values) %>%
  106.     janitor::clean_names() %>%
  107.     rename(dates = value) %>%
  108.     mutate(location = this_location)
  109.   Rt_tib <-
  110.     Rt_tib %>%
  111.     bind_rows(r_data)
  112. }
  113.  
  114. ### Take the Rt_tib estimates and compute the 7-day moving average
  115. trend_tib <-
  116.   Rt_tib %>%
  117.   select(dates, location, mean_r) %>%
  118.   mutate(location = paste("values:", location, sep = "")) %>%
  119.   pivot_wider(id_cols = "dates",
  120.               names_from = "location",
  121.               values_from = "mean_r") %>%
  122.   # Use mstl() %>% trendcyle()
  123.   mutate(across(starts_with("values:"),
  124.                 .fns = list(trend = ~ (.) %>% ts() %>% mstl() %>% trendcycle()),
  125.                 .names = "{fn}_{col}")) %>%
  126.   select("dates", starts_with("trend_values")) %>%
  127.   rename_at(vars(starts_with("trend_values:")),
  128.             ~ str_replace(., "trend_values:", "")) %>%
  129.   pivot_longer(-dates, names_to = "location", values_to = "trend")
  130.  
  131. ### Add our trend and mask mandate data to the Rt tibble
  132. Rt_tib <-
  133.   Rt_tib %>%
  134.   left_join(trend_tib, by = c("dates" = "dates", "location" = "location")) %>%
  135.   left_join(start_dates, by = c("location" = "county"))
  136.  
  137. ################################################################################
  138. ### Render the graph and done
  139. ################################################################################
  140.  
  141. title <- "Estimated Rt among children ages 5-18"
  142.  
  143. subtitle <- paste("assuming mean(serial interval) = ", si_mean,
  144.                   " days and std(serial interval) = ", si_std, " days", sep = "")
  145.  
  146. ### Order counties in the order given in my_location at the top of the script
  147. Rt_tib$location <- factor(Rt_tib$location, levels = my_location)
  148.  
  149. g <-
  150.   ggplot(data = Rt_tib, aes(x = as.Date(dates))) +
  151.   theme_linedraw() +
  152.   theme(strip.text.x = element_text(size = 16)) +
  153.   geom_point(aes(y = mean_r), size = 1.0) +
  154.   geom_ribbon(aes(ymin = mean_r - std_r,
  155.                   ymax = mean_r + std_r),
  156.               color = NA, fill = "darkgrey", alpha = 0.2) +
  157.   geom_line(aes(y = trend), color = "darkseagreen4", size = 1.2) +
  158.   geom_hline(yintercept = 1, linetype = "dashed") +
  159.   geom_vline(aes(xintercept = as.Date(first_day)), linetype = "dotted") +
  160.   facet_wrap(~location) +
  161.   scale_y_continuous(limits = c(0, 2)) +
  162.   labs(title = title, subtitle = subtitle, x = "", y = "Rt")
  163. print(g)
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×