SHARE
TWEET

Untitled

a guest Mar 18th, 2020 89 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. require(DescTools)
  2. require(RCurl)
  3. require(rvest)
  4.  
  5. paths = list()
  6. paths$base  = ""
  7. paths$data  = file.path(paths$base, "Data")
  8. paths$plots = file.path(paths$base, "Plots")
  9.  
  10.  
  11. paths$dat  = file.path(paths$data, "nCoVtests.csv")
  12. paths$wiki = file.path(paths$data, "wikiCases.csv")
  13.  
  14. updateCDC  = T
  15. updateWiki = T
  16.  
  17.  
  18. if(updateCDC){
  19.     dat0 = read_html("https://www.cdc.gov/coronavirus/2019-ncov/cases-updates/testing-in-us.html")
  20.     dat  = html_table(html_nodes(dat0, "table"))[[1]]
  21.     colnames(dat) = c("Date", "CDC", "UPH")
  22.     dat$Incomplete = 0
  23.  
  24.     idx_inc = unique(grep("‡", dat$CDC), grep("‡", dat$UPH))
  25.     dat$Incomplete[idx_inc] = 1
  26.  
  27.     dat$CDC = as.numeric(gsub("‡", "", dat$CDC))
  28.     dat$UPH = as.numeric(gsub("‡", "", dat$UPH))
  29.  
  30.     write.csv(dat, paths$dat, row.names = F)
  31. }
  32.  
  33.  
  34. if(updateWiki){
  35.     htmlwiki = "https://en.wikipedia.org/w/index.php?title=Template:2019%E2%80%9320_coronavirus_pandemic_data/United_States_medical_cases_chart&action=edit"
  36.     wiki0 = getURL(htmlwiki)
  37.  
  38.     wiki = strsplit(wiki0, "Medical cases chart/Row", fixed = T)[[1]]
  39.  
  40.  
  41.     wiki = wiki[-1]
  42.     wiki = wiki[1:which(lapply(wiki, nchar) > 1000)[1]]
  43.  
  44.     wiki[length(wiki)] = strsplit(tail(wiki, 1), "\n|caption='''Cases:'''", fixed = T)[[1]][1]
  45.  
  46.     wiki = wiki[grep("2020", wiki)]
  47.     wiki = sapply(wiki, function(x) strsplit(x, "+", fixed = T)[[1]][1], USE.NAMES = F)
  48.     wiki = sapply(wiki, function(x) strsplit(x, "=", fixed = T)[[1]][1], USE.NAMES = F)
  49.  
  50.     wiki = lapply(wiki, function(x) strsplit(x, "|", fixed = T)[[1]][c(2, 3, 5)])
  51.  
  52.     wiki = as.data.frame(do.call(rbind, wiki), stringsAsFactors = F)
  53.  
  54.     colnames(wiki) = c("Date", "Deaths", "Cases")
  55.  
  56.     wiki$Deaths[wiki$Deaths == ""] = 0
  57.  
  58.     wiki$Date   = as.Date(wiki$Date, format = "%Y-%m-%d")
  59.     wiki$Deaths = as.numeric(wiki$Deaths)
  60.     wiki$Cases  = as.numeric(wiki$Cases)
  61.  
  62.     write.csv(wiki, paths$wiki, row.names = F)
  63.  
  64. }
  65.  
  66.  
  67.  
  68. dat = read.csv(paths$dat, stringsAsFactors = F)
  69.  
  70. dat$Total = dat$CDC + dat$UPH
  71. dat$cTot  = cumsum(dat$Total)
  72. dat       = dat[, c(1:3, 5:6, 4)]
  73.  
  74. dat$Date = paste0(dat$Date, "/2020")
  75. dat$Date = as.Date(dat$Date, format = "%m/%d/%Y")
  76. dat$Incomplete[55] = 0
  77. dat      = dat[!dat$Incomplete, ]
  78.  
  79.  
  80.  
  81.  
  82. wiki      = read.csv(paths$wiki, stringsAsFactors = F)
  83. wiki$Date = as.Date(wiki$Date, format = "%Y-%m-%d")
  84.  
  85.  
  86. dat = merge(dat, wiki, by = "Date", all = T)
  87. dat$Deaths[1:3] = dat$Cases[1:3] = 0
  88.  
  89.  
  90. idx_na  = which(is.na(dat$Deaths))
  91. idx_nna = which(!is.na(dat$Deaths))
  92. for(i in 1:length(idx_na)){
  93.     newvals = dat[idx_nna[max(which(idx_nna < idx_na[i]))], c("Cases", "Deaths")]
  94.  
  95.     dat[idx_na[i], c("Cases", "Deaths")] = newvals
  96. }
  97. dat = dat[dat$Incomplete == 0, ]
  98. dat = dat[!is.na(dat$Date), ]
  99.  
  100.  
  101.  
  102.  
  103. par(mfrow = c(2, 1))
  104. plot(dat$Date,  dat$cTot,
  105.      xlab = "",
  106.      ylab = "Number",
  107.      type = "b",
  108.      col = "Red",
  109.      log = "y")
  110. lines(dat$Date, dat$Cases, type = "b", col = "Blue")
  111. lines(dat$Date, dat$Deaths, type = "b", col = "Black")
  112. abline(h = axTicks(2), v = axTicks.Date(1, dat$Date),
  113.        lty = 3, col = "Light Grey")
  114.  
  115. legend("topleft", legend = c("Tests", "Cases", "Deaths"),
  116.        lwd = 2, bty = "n", col = c("Red", "Blue", "Black"))
  117.  
  118.  
  119.  
  120. plot(dat$cTot, dat$Cases,
  121.      xlab = "# Tests",
  122.      ylab = "# Cases",
  123.      panel.first = grid())
  124. lines(dat$cTot, (.00155*dat$cTot)^2)
  125.  
  126. legend("topleft", legend = "(.00155*nTests)^2", bty = "n", lwd = 2, col = "Black")
  127.  
  128.  
  129. if(FALSE){
  130.     res   = NULL
  131.     nTest = seq(10, 1000, by = 10)
  132.     props = seq(.0001, .1, length = length(nTest))
  133.  
  134.     for(i in 1:length(nTest)){
  135.         p = props[i]
  136.         q = 1-p
  137.         n = sum(sample(0:1, nTest[i], replace = T, prob = c(q, p)))
  138.         res = rbind(res, c(nTest[i], n))
  139.     }
  140.  
  141.     par(mfrow = c(2, 2))
  142.     plot(res[,1], res[,2], xlab = "# Tests", ylab = "# Cases")
  143.     plot(nTest, xlab = "Step", ylab = "# Tests")
  144.     plot(props, xlab = "Step", ylab = "Proportion Positive")
  145.  
  146. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top