Advertisement
MetricT

COVID-19 script 1.2

Mar 18th, 2020
1,530
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 7.19 KB | None | 0 0
  1. # Load libraries we need...
  2. packages <- c("tidyverse", "dplyr", "ggplot2", "scales", "ggthemes")
  3.  
  4. new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
  5. if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
  6. invisible(lapply(packages, "library", quietly = TRUE,
  7.                  character.only = TRUE, warn.conflicts = FALSE))
  8.  
  9. # As our data starts on 2020-03-05, we want to make our reference date the
  10. # day before the first case was announced
  11. ref_date <- as.Date("2020-03-04", tz = "UTC")
  12.  
  13. # A table with info on TN COVID cases
  14. # Data from:  https://www.tn.gov/health/cedep/ncov.html
  15. tn_covid_df <- read.table(textConnection(
  16. "date, totalinfected, totaldeaths
  17. 2020-03-05, 1, 0
  18. 2020-03-06, 1, 0
  19. 2020-03-07, 1, 0
  20. 2020-03-08, 3, 0
  21. 2020-03-09, 4, 0
  22. 2020-03-10, 7, 0
  23. 2020-03-11, 9, 0
  24. 2020-03-12, 18, 0
  25. 2020-03-13, 26, 0
  26. 2020-03-14, 32, 0
  27. 2020-03-15, 39, 0
  28. 2020-03-16, 52, 0
  29. 2020-03-17, 73, 0
  30. 2020-03-18, 98, 0"),
  31.   sep = ",",
  32.   colClasses = c("Date", "integer", "integer"),
  33.   header = TRUE)
  34.  
  35. # Take the difference to get a vector of new daily cases.
  36. tn_covid_df$newinfected <- c(tn_covid_df$totalinfected[1],
  37.                              diff(tn_covid_df$totalinfected))
  38. tn_covid_df$newdeaths   <- c(tn_covid_df$totaldeaths[1],
  39.                              diff(tn_covid_df$totaldeaths))
  40.  
  41. # What base log do you want to use (2, e, 10, 3.14159, 42, or something else).
  42. # This allows you to change the base for the exponential formula in the graph.
  43. # So if you want to know a * 2^b,  a' * 10^b', etc, just change the base here.
  44. log_base <- 2
  45.  
  46. # Compute the number of days since our reference date and the given date
  47. tn_covid_df$numdays <- difftime(as.POSIXct(tn_covid_df$date),
  48.                                 as.POSIXct(ref_date, tz = "UTC"),
  49.                                 units = "days") %>%
  50.                        as.numeric()
  51.  
  52. # Use non-linear fitting to do the exponential fit
  53. fit_totalinfected <- nls(totalinfected ~ I(a * log_base ^ (b * numdays)),
  54.                  data = tn_covid_df, start = list(a = 0.5, b = 0.5))
  55. summary(fit_totalinfected)
  56.  
  57. # Extract the regression coefficents so we can add the regression formula
  58. # to the graph
  59. intercept  <- coef(fit_totalinfected)[1]
  60. d_constant <- coef(fit_totalinfected)[2]
  61.  
  62. # Add the fit to our dataframe
  63. tn_covid_df$fit_totalinfected <- intercept *
  64.                                  log_base ^ (d_constant * tn_covid_df$numdays)
  65.  
  66. # Compute the doubling time
  67. doubling_time <- log(2, log_base) / d_constant
  68.  
  69.  
  70. ################################################################################
  71. ### Compute "doomsday" - the day we run out of hospital beds
  72. ################################################################################
  73.  
  74. # My estimate for the number of hospital beds, made by adding up the number of
  75. # beds each major hospital lists:
  76. # Vanderbilt University Medical - 1,019
  77. # TriStar Centennial            -   741
  78. # St Thomas Midtown             -   683
  79. # TriStar Skyline               -   353
  80. # St Thomas West                -   350
  81. # TriStar Southern Hills        -   126
  82. # Nashville General             -   150
  83. total_beds_nashville <- 3500
  84.  
  85. # What fraction of those beds are currently empty?
  86. # Data from:  https://www.statista.com/statistics/185904/hospital-occupancy-rate-in-the-us-since-2001/
  87. bed_vacancy_rate <- 0.341
  88.  
  89. # What fraction of confirmed infected require hospitalization.  This has been
  90. # ~20% in data from Wuhan, Italy, and NY State
  91. critical_fraction <- 0.2
  92.  
  93. # We will run out of hospital beds once infected cases reaches this level
  94. critical_number <- total_beds_nashville * bed_vacancy_rate / critical_fraction
  95.  
  96. # How many days until we exceed the critical number of cases beds (ie, run out)
  97. doomsdays <- ceiling(log(critical_number / 2^intercept, log_base) / d_constant)
  98. doomsdate <- ref_date + doomsdays
  99.  
  100.  
  101. ################################################################################
  102. ### Graph the number of total confirmed infected cases
  103. ################################################################################
  104.  
  105. # Create a text string with the growth formula based on current regression
  106. caption <- paste("Number of Total Confirmed Cases = ",
  107.                  round(log_base^intercept, 3),
  108.                  " * ", round(log_base, 3), "^(",
  109.                  round(d_constant, 3),
  110.                  " * Days since 2020-03-04)\nDoubling time = ",
  111.                  round(doubling_time, 2),
  112.                  " days\nEstimated date of last available bed: ",
  113.                  doomsdate, sep = "")
  114.  
  115. ylab     <- "Total Confirmed Cases"
  116. xlab     <- "Date"
  117. title    <- "Total Confirmed COVID-19 cases in TN"
  118. subtitle <- caption
  119.  
  120. p_total <- ggplot(data = tn_covid_df) +
  121.    theme_classic() +
  122.    theme(legend.title = element_blank()) +
  123.    theme(legend.position = "none") +
  124.    geom_point(data = tn_covid_df,
  125.               aes(x = date, y = totalinfected), color = "black") +
  126.    geom_line(data = tn_covid_df,
  127.              aes(x = date, y = fit_totalinfected), color = "darkred") +
  128.    scale_y_continuous(breaks = pretty_breaks()) +
  129.    labs(title = title, subtitle = caption, x = xlab, y = ylab)
  130. print(p_total)
  131.  
  132. readline(prompt = "Press [ENTER] to continue...")
  133.  
  134.  
  135. ################################################################################
  136. ### Graph the number of new confirmed infected cases
  137. ################################################################################
  138.  
  139. ### If we assume infected = a * B^(c * days), then
  140. ###
  141. ###  d(infected)/dt = c * log_e(B) * infected
  142.  
  143. # Add the fit to our dataframe
  144. tn_covid_df$fit_newinfected <- tn_covid_df$fit * log(log_base) * d_constant
  145.  
  146. # Create a text string with the growth formula based on current regression
  147. caption <- paste("Number of New Confirmed Cases = ",
  148.                  round(d_constant * log(log_base) * log_base^intercept, 3),
  149.                  " * ", round(log_base, 3), "^(",
  150.                  round(d_constant, 3),
  151.                  " * Days since 2020-03-04)", sep = "")
  152.  
  153. # Graph the number of new confirmed infected cases
  154. ylab     <- "New Confirmed Cases"
  155. xlab     <- "Date"
  156. title    <- "New Confirmed COVID-19 cases in TN"
  157.  
  158. p_new <- ggplot(data = tn_covid_df) +
  159.    theme_classic() +
  160.    theme(legend.title = element_blank()) +
  161.    theme(legend.position = "none") +
  162.    geom_point(data = tn_covid_df,
  163.               aes(x = date, y = newinfected), color = "black") +
  164.    geom_line(data = tn_covid_df,
  165.              aes(x = date, y = fit_newinfected), color = "darkred") +
  166.    scale_y_continuous(breaks = pretty_breaks()) +
  167.    labs(title = title, subtitle = caption, x = xlab, y = ylab)
  168. print(p_new)
  169.  
  170. readline(prompt = "Press [ENTER] to continue...")
  171.  
  172.  
  173. ################################################################################
  174. ### Project expected confirmed cases forward 4 weeks (output to console window)
  175. ################################################################################
  176.  
  177. for (day in seq(1, 28)) {
  178.    this_date <- ref_date + day
  179.    total_cases <- round(intercept * log_base ^ (d_constant * day), 0)
  180.    new_cases   <- round(log(log_base) * d_constant *
  181.                           intercept * log_base ^ (d_constant * day), 0)
  182.    print(paste(this_date, " - ", total_cases, "-", new_cases))
  183. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement