Advertisement
binkleym

Rt of virus in TN school-age populations

Aug 10th, 2020 (edited)
834
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.37 KB | None | 0 0
  1. ################################################################################
  2. ### Compute Rt (the reproduction number) for the given locations and populations
  3. ################################################################################
  4. ### Load the necessary libraries
  5. packages <-
  6.   c("tidyverse", "lubridate", "tsibble", "EpiEstim", "curl", "forecast", "TTR")
  7.  
  8. new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
  9. if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
  10. invisible(lapply(packages, "library", quietly = TRUE,
  11.                  character.only = TRUE, warn.conflicts = FALSE))
  12.  
  13. ### Choose the counties you want to add to the facet graph.  They will also graph
  14. ### in this order, so arrange them how you want.
  15. my_location <-
  16.   c("Montgomery", "Robertson", "Sumner",
  17.     "Cheatham",   "Davidson",  "Wilson",
  18.     "Dickson",    "Williamson", "Rutherford")
  19.  
  20. start_dates <-
  21.   tribble(
  22.   ~county,       ~first_day,
  23.   "Montgomery",  "2020-08-31",
  24.   "Robertson",   "2020-08-10",
  25.   "Sumner",      "2020-08-03",
  26.   "Cheatham",    "2020-08-13",
  27.   "Davidson",    "2020-08-04",
  28.   "Wilson",      "2020-08-17",
  29.   "Dickson",     "2020-08-03",
  30.   "Williamson",  "2020-08-07",
  31.   "Rutherford",  "2020-08-13",
  32.   ) %>% mutate(first_day = as.Date(first_day))
  33.  
  34. ################################################################################
  35. ### Fetching data.   The EpiEstim::estimate_R() function expects to be handed a
  36. ### data.frame or tibble with two columns:   "dates" (date of confirmed case)
  37. ### and "I" (number of new incidents/cases).   I add a third column "location"
  38. ### so I can make facet graphs of multiple locations at once.
  39. ################################################################################
  40.  
  41. ### The TN Dept of Health maintains a separate spreadsheet of confirmed cases for
  42. ### school-age children (aged 5-18 years) for each county.    Load that and use
  43. ### it as our population.
  44.  
  45. ### Function to read Excel spreadsheet from URL's
  46. read_excel_url <- function(url, ...) {
  47.   tf <- tempfile(fileext = ".xlsx")
  48.   curl::curl_download(url, tf)
  49.   return(readxl::read_excel(tf, ...))
  50. }
  51.  
  52. ### Load case data for schools
  53. data <-
  54.   "https://www.tn.gov/content/dam/tn/health/documents/cedep/novel-coronavirus/datasets/Public-Dataset-Daily-County-School.XLSX" %>%
  55.   read_excel_url() %>%
  56.   mutate(DATE = as.Date(DATE)) %>%
  57.   select(DATE, NEW_CASES, COUNTY) %>%
  58.   rename(dates    = DATE,
  59.          I        = NEW_CASES,
  60.          location = COUNTY) %>%
  61.  
  62.   # Filter it to just the locations specified
  63.   filter(location %in% my_location) %>%
  64.  
  65.   # Filter out cases wh#ere "I" is a NA
  66.   filter(!is.na(I)) %>%
  67.  
  68.   # Also, estimate_R can't handle negative numbers, so set negative values to 0.
  69.   # These are usually due to suspected cases being lumped in and subsequently
  70.   # found to not be COVID.
  71.   mutate(I = if_else(I < 0, 0, I))
  72.  
  73. ################################################################################
  74. ### The model EpiEstim uses requires the "serial interval", ie the lag in time
  75. ### between infection and symptoms appearing.    I use values from the CDC paper
  76. ### below.
  77. ### https://wwwnc.cdc.gov/eid/article/26/6/20-0357_article
  78. ################################################################################
  79.  
  80. si_mean <- 3.96
  81. si_std  <- 4.75
  82.  
  83. ### Create a blank tibble to hold our EpiEstim output
  84. Rt_tib <-
  85.   tribble(
  86.     ~dates, ~mean_r, ~std_r, ~location,
  87.   )
  88.  
  89. ### Iterate over all locations and generate the EpiEstim estimate for Rt
  90. for (this_location in data %>% pull("location") %>% unique()) {
  91.   print(this_location)
  92.   data_subset <-
  93.     data %>%
  94.     filter(location == this_location) %>%
  95.     select(dates, I)
  96.   estimate <-
  97.     estimate_R(data_subset,
  98.                method = "parametric_si",
  99.                config = make_config(list(mean_si = si_mean,
  100.                                           std_si = si_std)))
  101.   e_dates  <- estimate$dates %>% as_tibble() %>% filter(row_number() > 7)
  102.   e_values <- estimate$R %>% as_tibble() %>% select("Mean(R)", "Std(R)")
  103.   r_data <-
  104.     e_dates %>%
  105.     bind_cols(e_values) %>%
  106.     janitor::clean_names() %>%
  107.     rename(dates = value) %>%
  108.     mutate(location = this_location)
  109.   Rt_tib <-
  110.     Rt_tib %>%
  111.     bind_rows(r_data)
  112. }
  113.  
  114. ### Take the Rt_tib estimates and compute the 7-day moving average
  115. trend_tib <-
  116.   Rt_tib %>%
  117.   select(dates, location, mean_r) %>%
  118.   mutate(location = paste("values:", location, sep = "")) %>%
  119.   pivot_wider(id_cols = "dates",
  120.               names_from = "location",
  121.               values_from = "mean_r") %>%
  122.   # Use mstl() %>% trendcyle()
  123.   mutate(across(starts_with("values:"),
  124.                 .fns = list(trend = ~ (.) %>% ts() %>% mstl() %>% trendcycle()),
  125.                 .names = "{fn}_{col}")) %>%
  126.   select("dates", starts_with("trend_values")) %>%
  127.   rename_at(vars(starts_with("trend_values:")),
  128.             ~ str_replace(., "trend_values:", "")) %>%
  129.   pivot_longer(-dates, names_to = "location", values_to = "trend")
  130.  
  131. ### Add our trend and mask mandate data to the Rt tibble
  132. Rt_tib <-
  133.   Rt_tib %>%
  134.   left_join(trend_tib, by = c("dates" = "dates", "location" = "location")) %>%
  135.   left_join(start_dates, by = c("location" = "county"))
  136.  
  137. ################################################################################
  138. ### Render the graph and done
  139. ################################################################################
  140.  
  141. title <- "Estimated Rt among children ages 5-18"
  142.  
  143. subtitle <- paste("assuming mean(serial interval) = ", si_mean,
  144.                   " days and std(serial interval) = ", si_std, " days", sep = "")
  145.  
  146. ### Order counties in the order given in my_location at the top of the script
  147. Rt_tib$location <- factor(Rt_tib$location, levels = my_location)
  148.  
  149. g <-
  150.   ggplot(data = Rt_tib, aes(x = as.Date(dates))) +
  151.   theme_linedraw() +
  152.   theme(strip.text.x = element_text(size = 16)) +
  153.   geom_point(aes(y = mean_r), size = 1.0) +
  154.   geom_ribbon(aes(ymin = mean_r - std_r,
  155.                   ymax = mean_r + std_r),
  156.               color = NA, fill = "darkgrey", alpha = 0.2) +
  157.   geom_line(aes(y = trend), color = "darkseagreen4", size = 1.2) +
  158.   geom_hline(yintercept = 1, linetype = "dashed") +
  159.   geom_vline(aes(xintercept = as.Date(first_day)), linetype = "dotted") +
  160.   facet_wrap(~location) +
  161.   scale_y_continuous(limits = c(0, 2)) +
  162.   labs(title = title, subtitle = subtitle, x = "", y = "Rt")
  163. print(g)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement