Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(DescTools)
- require(RCurl)
- require(rvest)
- paths = list()
- paths$base = ""
- paths$data = file.path(paths$base, "Data")
- paths$plots = file.path(paths$base, "Plots")
- paths$dat = file.path(paths$data, "nCoVtests.csv")
- paths$wiki = file.path(paths$data, "wikiCases.csv")
- updateCDC = T
- updateWiki = T
- if(updateCDC){
- dat0 = read_html("https://www.cdc.gov/coronavirus/2019-ncov/cases-updates/testing-in-us.html")
- dat = html_table(html_nodes(dat0, "table"))[[1]]
- colnames(dat) = c("Date", "CDC", "UPH")
- dat$Incomplete = 0
- idx_inc = unique(grep("‡", dat$CDC), grep("‡", dat$UPH))
- dat$Incomplete[idx_inc] = 1
- dat$CDC = as.numeric(gsub("‡", "", dat$CDC))
- dat$UPH = as.numeric(gsub("‡", "", dat$UPH))
- write.csv(dat, paths$dat, row.names = F)
- }
- if(updateWiki){
- htmlwiki = "https://en.wikipedia.org/w/index.php?title=Template:2019%E2%80%9320_coronavirus_pandemic_data/United_States_medical_cases_chart&action=edit"
- wiki0 = getURL(htmlwiki)
- wiki = strsplit(wiki0, "Medical cases chart/Row", fixed = T)[[1]]
- wiki = wiki[-1]
- wiki = wiki[1:which(lapply(wiki, nchar) > 1000)[1]]
- wiki[length(wiki)] = strsplit(tail(wiki, 1), "\n|caption='''Cases:'''", fixed = T)[[1]][1]
- wiki = wiki[grep("2020", wiki)]
- wiki = sapply(wiki, function(x) strsplit(x, "+", fixed = T)[[1]][1], USE.NAMES = F)
- wiki = sapply(wiki, function(x) strsplit(x, "=", fixed = T)[[1]][1], USE.NAMES = F)
- wiki = lapply(wiki, function(x) strsplit(x, "|", fixed = T)[[1]][c(2, 3, 5)])
- wiki = as.data.frame(do.call(rbind, wiki), stringsAsFactors = F)
- colnames(wiki) = c("Date", "Deaths", "Cases")
- wiki$Deaths[wiki$Deaths == ""] = 0
- wiki$Date = as.Date(wiki$Date, format = "%Y-%m-%d")
- wiki$Deaths = as.numeric(wiki$Deaths)
- wiki$Cases = as.numeric(wiki$Cases)
- write.csv(wiki, paths$wiki, row.names = F)
- }
- dat = read.csv(paths$dat, stringsAsFactors = F)
- dat$Total = dat$CDC + dat$UPH
- dat$cTot = cumsum(dat$Total)
- dat = dat[, c(1:3, 5:6, 4)]
- dat$Date = paste0(dat$Date, "/2020")
- dat$Date = as.Date(dat$Date, format = "%m/%d/%Y")
- dat$Incomplete[55] = 0
- dat = dat[!dat$Incomplete, ]
- wiki = read.csv(paths$wiki, stringsAsFactors = F)
- wiki$Date = as.Date(wiki$Date, format = "%Y-%m-%d")
- dat = merge(dat, wiki, by = "Date", all = T)
- dat$Deaths[1:3] = dat$Cases[1:3] = 0
- idx_na = which(is.na(dat$Deaths))
- idx_nna = which(!is.na(dat$Deaths))
- for(i in 1:length(idx_na)){
- newvals = dat[idx_nna[max(which(idx_nna < idx_na[i]))], c("Cases", "Deaths")]
- dat[idx_na[i], c("Cases", "Deaths")] = newvals
- }
- dat = dat[dat$Incomplete == 0, ]
- dat = dat[!is.na(dat$Date), ]
- par(mfrow = c(2, 1))
- plot(dat$Date, dat$cTot,
- xlab = "",
- ylab = "Number",
- type = "b",
- col = "Red",
- log = "y")
- lines(dat$Date, dat$Cases, type = "b", col = "Blue")
- lines(dat$Date, dat$Deaths, type = "b", col = "Black")
- abline(h = axTicks(2), v = axTicks.Date(1, dat$Date),
- lty = 3, col = "Light Grey")
- legend("topleft", legend = c("Tests", "Cases", "Deaths"),
- lwd = 2, bty = "n", col = c("Red", "Blue", "Black"))
- plot(dat$cTot, dat$Cases,
- xlab = "# Tests",
- ylab = "# Cases",
- panel.first = grid())
- lines(dat$cTot, (.00155*dat$cTot)^2)
- legend("topleft", legend = "(.00155*nTests)^2", bty = "n", lwd = 2, col = "Black")
- if(FALSE){
- res = NULL
- nTest = seq(10, 1000, by = 10)
- props = seq(.0001, .1, length = length(nTest))
- for(i in 1:length(nTest)){
- p = props[i]
- q = 1-p
- n = sum(sample(0:1, nTest[i], replace = T, prob = c(q, p)))
- res = rbind(res, c(nTest[i], n))
- }
- par(mfrow = c(2, 2))
- plot(res[,1], res[,2], xlab = "# Tests", ylab = "# Cases")
- plot(nTest, xlab = "Step", ylab = "# Tests")
- plot(props, xlab = "Step", ylab = "Proportion Positive")
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement