Guest User

Untitled

a guest
Mar 18th, 2020
108
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. require(DescTools)
  2. require(RCurl)
  3. require(rvest)
  4.  
  5. paths = list()
  6. paths$base = ""
  7. paths$data = file.path(paths$base, "Data")
  8. paths$plots = file.path(paths$base, "Plots")
  9.  
  10.  
  11. paths$dat = file.path(paths$data, "nCoVtests.csv")
  12. paths$wiki = file.path(paths$data, "wikiCases.csv")
  13.  
  14. updateCDC = T
  15. updateWiki = T
  16.  
  17.  
  18. if(updateCDC){
  19. dat0 = read_html("https://www.cdc.gov/coronavirus/2019-ncov/cases-updates/testing-in-us.html")
  20. dat = html_table(html_nodes(dat0, "table"))[[1]]
  21. colnames(dat) = c("Date", "CDC", "UPH")
  22. dat$Incomplete = 0
  23.  
  24. idx_inc = unique(grep("‡", dat$CDC), grep("‡", dat$UPH))
  25. dat$Incomplete[idx_inc] = 1
  26.  
  27. dat$CDC = as.numeric(gsub("‡", "", dat$CDC))
  28. dat$UPH = as.numeric(gsub("‡", "", dat$UPH))
  29.  
  30. write.csv(dat, paths$dat, row.names = F)
  31. }
  32.  
  33.  
  34. if(updateWiki){
  35. htmlwiki = "https://en.wikipedia.org/w/index.php?title=Template:2019%E2%80%9320_coronavirus_pandemic_data/United_States_medical_cases_chart&action=edit"
  36. wiki0 = getURL(htmlwiki)
  37.  
  38. wiki = strsplit(wiki0, "Medical cases chart/Row", fixed = T)[[1]]
  39.  
  40.  
  41. wiki = wiki[-1]
  42. wiki = wiki[1:which(lapply(wiki, nchar) > 1000)[1]]
  43.  
  44. wiki[length(wiki)] = strsplit(tail(wiki, 1), "\n|caption='''Cases:'''", fixed = T)[[1]][1]
  45.  
  46. wiki = wiki[grep("2020", wiki)]
  47. wiki = sapply(wiki, function(x) strsplit(x, "+", fixed = T)[[1]][1], USE.NAMES = F)
  48. wiki = sapply(wiki, function(x) strsplit(x, "=", fixed = T)[[1]][1], USE.NAMES = F)
  49.  
  50. wiki = lapply(wiki, function(x) strsplit(x, "|", fixed = T)[[1]][c(2, 3, 5)])
  51.  
  52. wiki = as.data.frame(do.call(rbind, wiki), stringsAsFactors = F)
  53.  
  54. colnames(wiki) = c("Date", "Deaths", "Cases")
  55.  
  56. wiki$Deaths[wiki$Deaths == ""] = 0
  57.  
  58. wiki$Date = as.Date(wiki$Date, format = "%Y-%m-%d")
  59. wiki$Deaths = as.numeric(wiki$Deaths)
  60. wiki$Cases = as.numeric(wiki$Cases)
  61.  
  62. write.csv(wiki, paths$wiki, row.names = F)
  63.  
  64. }
  65.  
  66.  
  67.  
  68. dat = read.csv(paths$dat, stringsAsFactors = F)
  69.  
  70. dat$Total = dat$CDC + dat$UPH
  71. dat$cTot = cumsum(dat$Total)
  72. dat = dat[, c(1:3, 5:6, 4)]
  73.  
  74. dat$Date = paste0(dat$Date, "/2020")
  75. dat$Date = as.Date(dat$Date, format = "%m/%d/%Y")
  76. dat$Incomplete[55] = 0
  77. dat = dat[!dat$Incomplete, ]
  78.  
  79.  
  80.  
  81.  
  82. wiki = read.csv(paths$wiki, stringsAsFactors = F)
  83. wiki$Date = as.Date(wiki$Date, format = "%Y-%m-%d")
  84.  
  85.  
  86. dat = merge(dat, wiki, by = "Date", all = T)
  87. dat$Deaths[1:3] = dat$Cases[1:3] = 0
  88.  
  89.  
  90. idx_na = which(is.na(dat$Deaths))
  91. idx_nna = which(!is.na(dat$Deaths))
  92. for(i in 1:length(idx_na)){
  93. newvals = dat[idx_nna[max(which(idx_nna < idx_na[i]))], c("Cases", "Deaths")]
  94.  
  95. dat[idx_na[i], c("Cases", "Deaths")] = newvals
  96. }
  97. dat = dat[dat$Incomplete == 0, ]
  98. dat = dat[!is.na(dat$Date), ]
  99.  
  100.  
  101.  
  102.  
  103. par(mfrow = c(2, 1))
  104. plot(dat$Date, dat$cTot,
  105. xlab = "",
  106. ylab = "Number",
  107. type = "b",
  108. col = "Red",
  109. log = "y")
  110. lines(dat$Date, dat$Cases, type = "b", col = "Blue")
  111. lines(dat$Date, dat$Deaths, type = "b", col = "Black")
  112. abline(h = axTicks(2), v = axTicks.Date(1, dat$Date),
  113. lty = 3, col = "Light Grey")
  114.  
  115. legend("topleft", legend = c("Tests", "Cases", "Deaths"),
  116. lwd = 2, bty = "n", col = c("Red", "Blue", "Black"))
  117.  
  118.  
  119.  
  120. plot(dat$cTot, dat$Cases,
  121. xlab = "# Tests",
  122. ylab = "# Cases",
  123. panel.first = grid())
  124. lines(dat$cTot, (.00155*dat$cTot)^2)
  125.  
  126. legend("topleft", legend = "(.00155*nTests)^2", bty = "n", lwd = 2, col = "Black")
  127.  
  128.  
  129. if(FALSE){
  130. res = NULL
  131. nTest = seq(10, 1000, by = 10)
  132. props = seq(.0001, .1, length = length(nTest))
  133.  
  134. for(i in 1:length(nTest)){
  135. p = props[i]
  136. q = 1-p
  137. n = sum(sample(0:1, nTest[i], replace = T, prob = c(q, p)))
  138. res = rbind(res, c(nTest[i], n))
  139. }
  140.  
  141. par(mfrow = c(2, 2))
  142. plot(res[,1], res[,2], xlab = "# Tests", ylab = "# Cases")
  143. plot(nTest, xlab = "Step", ylab = "# Tests")
  144. plot(props, xlab = "Step", ylab = "Proportion Positive")
  145.  
  146. }
RAW Paste Data