Advertisement
binkleym

New Infected Cases variation over week/month

Jul 15th, 2020
618
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.53 KB | None | 0 0
  1. ### Pull new confirmed cases and analyze data to find days or times of month
  2. ### with relatively lower/higher risk of infection. - MatB
  3.  
  4. library(tidyverse)
  5. library(tsibble)
  6. library(feasts)
  7. library(cowplot)
  8.  
  9. ### What state are we interested in?
  10. my_state <- "Ohio"
  11.  
  12. ### What county are we interested in?
  13. my_county <- "Franklin"
  14.  
  15. ### How many days between being infected and being confirmed.   We have
  16. ### "date of confirmed infection", so if we subtract the lag, it should
  17. ### give us a good estimate about the date the infection occurs.
  18. lag <- 6
  19.  
  20. ### Pull cases data from the NY Times repo below and change the path
  21. ### as needed
  22. ###
  23. ### https://github.com/nytimes/covid-19-data
  24. ###
  25. spreadsheet <- "../nytimes_covid-19-data/us-counties.csv"
  26.  
  27.  
  28. ### Take the data in the spreadsheet, which is total new cases, and convert
  29. ### it to new cases per day
  30. data <-
  31.   read_csv(spreadsheet, col_names = TRUE, col_types = "Dccddd") %>%
  32.   filter(state == my_state) %>%
  33.   filter(county == my_county) %>%
  34.   arrange(date) %>%
  35.   mutate(new_cases  = cases - lag(cases)) %>%
  36.   select(-state, -county, -fips, -deaths, -cases) %>%
  37.   filter(!is.na(new_cases)) %>%
  38.   as_tsibble(index = "date")
  39.  
  40. ### Decompose new cases using the STL() function from the "feasts"
  41. ### package, using a 7 day window for the trend, and seasonal periods of
  42. ### one week and one month.
  43. model_cases <-
  44.   data %>%
  45.   model(
  46.     STL(new_cases ~ trend(window = 7) +
  47.                     season(period = "week") +
  48.                     season(period = "month"), iterations = 100)
  49.   ) %>% components()
  50.  
  51. ### Graph the model
  52. g_stl <- model_cases %>% autoplot()
  53. print(g_stl)
  54.  
  55. ### Create a new tibble using the decomposed components
  56. new_cases_tib <-
  57.   tibble(date      = data %>% pull("date"),
  58.          values    = data %>% pull("new_cases"),
  59.          trend     = model_cases %>% pull("trend"),
  60.          s_week    = model_cases %>% pull("season_week"),
  61.          s_month   = model_cases %>% pull("season_month"),
  62.          s_adjust  = model_cases %>% pull("season_adjust"),
  63.          remainder = model_cases %>% pull("remainder"),
  64.   ) %>% as_tsibble(index = "date")
  65.  
  66. ### Let's plot the weekly pattern
  67. g_week <-
  68.   new_cases_tib %>%
  69.   mutate(date = as.Date(date) - lag) %>%
  70.   gg_season(s_week,  period = "1 week") +
  71.   xlab("Day of Week") +
  72.   ylab("New cases") +
  73.   ggtitle("Weekly Seasonal Component of New Infected")
  74.  
  75. ### Let's plot the monthly pattern
  76. g_month <-
  77.   new_cases_tib %>%
  78.   mutate(date = as.Date(date) - lag) %>%
  79.   gg_season(s_month,  period = "1 month") +
  80.   xlab("Day of Month") +
  81.   ylab("New cases") +
  82.   ggtitle("Monthly Seasonal Component of New Infected")
  83.  
  84. ### Header/footer strings
  85. title_string <- paste("COVID-19 Infection Risk assuming ", lag, " day lag between infection and symptoms\n", my_county, " County, ", my_state,
  86.                       " [", as.Date(data %>% tail(n = 1) %>% pull(date)), "]", sep = "")
  87. footer_string <- "Data Source:  https://github.com/nytimes/covid-19-data"
  88.  
  89. title  <- ggdraw() + draw_label(title_string, fontface = "bold")
  90. footer <- ggdraw() + draw_label(footer_string, size = 10)
  91.  
  92.  
  93. ### Let's smoosh it all together in one graph
  94. p_period <- plot_grid(g_week, g_month, nrow = 1, ncol = 2, align = "hv", axis = "lbrt")
  95. plot_grid(title,
  96.           g_stl,
  97.           p_period,
  98.           footer,
  99.           axis = "lbrt", nrow = 4, ncol = 1, rel_heights = c(0.1, 1, 0.5, 0.05),
  100.           align = "hv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement