Guest User

R-code COVID-19 US daily maps

a guest
Mar 29th, 2020
294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.68 KB | None | 0 0
  1. # Updated 2020-March-29
  2.  
  3. #### DESCRIPTION ####
  4. # 1. Downloads source data: COVID-19 case data (NY Times), state and county shapefiles and county pop. (Census Bureau)
  5. # 2. Merges data sets and does some data cleaning
  6. # 3. Generates and saves maps
  7.  
  8. #### PREAMBLE ####
  9. rm(list = ls()) # Clear workspace
  10. pl = c('data.table', 'sf', 'ggplot2', 'ggspatial') # R packages to load
  11. lapply(pl, require, character.only = TRUE) # Load R packages
  12. mapsdir = "~/Desktop/COVID/maps" ; setwd(mapsdir) # Directory to save data and maps ** SET THIS **
  13.  
  14. #### 1. DOWNLOAD AND READ IN SOURCE DATA ####
  15.  
  16. # NY Times: COVID-19 case data
  17. #   Website: https://www.nytimes.com/article/coronavirus-county-data-us.html
  18. #   Github: https://github.com/nytimes/covid-19-data/blob/master/us-counties.csv
  19. nytimes.covid19.url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
  20. nytimes.covid19.fn = "nytimes-covid19-us-counties.csv"
  21. if(!file.exists(nytimes.covid19.fn)) download.file(nytimes.covid19.url, nytimes.covid19.fn, method = 'curl')
  22. covid19.cases <- fread(nytimes.covid19.fn)
  23.  
  24. # Census Bureau: county population estimates
  25. #   Website: https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html
  26. census.population.url = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv"
  27. census.population.fn = "co-est2019-alldata.csv"
  28. if(!file.exists(census.population.fn)) download.file(census.population.url, census.population.fn, method = 'curl')
  29. census.pop <- fread(census.population.fn)
  30.  
  31. # Census Bureau: state shapefiles
  32. #   Website: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.2010.html
  33. state.shapfiles.url = "https://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_040_00_5m.zip"
  34. state.shapfiles.fn = "gz_2010_us_040_00_5m.zip"
  35. if(!file.exists(state.shapfiles.fn)) download.file(state.shapfiles.url, state.shapfiles.fn, method = 'curl')
  36. unzip(state.shapfiles.fn)
  37. state.sf <- st_read("gz_2010_us_040_00_5m.shp")
  38.  
  39. # Census Bureau: county shapefiles
  40. #   Website: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.2010.html
  41. county.shapfiles.url = "https://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_050_00_5m.zip"
  42. county.shapfiles.fn = "gz_2010_us_050_00_5m.zip"
  43. if(!file.exists(county.shapfiles.fn)) download.file(county.shapfiles.url, county.shapfiles.fn, method = 'curl')
  44. unzip(county.shapfiles.fn)
  45. county.sf <- st_read("gz_2010_us_050_00_5m.shp")
  46.  
  47. # Clean up workspace and clean up zip-extracted shapefile data
  48. rm(nytimes.covid19.url, nytimes.covid19.fn, census.population.url, census.population.fn,
  49.    state.shapfiles.url, state.shapfiles.fn, county.shapfiles.url, county.shapfiles.fn)
  50. lapply(c("gz_2010_us_040_00_5m.dbf", "gz_2010_us_040_00_5m.prj", "gz_2010_us_040_00_5m.shp",
  51.          "gz_2010_us_040_00_5m.shx", "gz_2010_us_040_00_5m.xml", "gz_2010_us_050_00_5m.dbf",
  52.          "gz_2010_us_050_00_5m.prj", "gz_2010_us_050_00_5m.shp", "gz_2010_us_050_00_5m.shx",
  53.          "gz_2010_us_050_00_5m.xml"), unlink)
  54.  
  55. #### 2. MERGE AND CLEAN DATA ####
  56.  
  57. # Keep only continental U.S. for maps (National total includes full U.S.)
  58. state.sf <- subset(state.sf, STATE != '02' & STATE != '11' & STATE != '15' & STATE != '72')[1]
  59. county.sf <- subset(county.sf, STATE != '02' & STATE != '11' & STATE != '15' & STATE != '72')[1]
  60.  
  61. # Add state-county standardized FIPS code to COVID data (note that some fips are 'unknown' in the NYT data)
  62. covid19.cases[is.na(fips) & county == 'New York City', fips := 36061]
  63. covid19.cases[is.na(fips) & county == 'Doña Ana', fips := 35013]
  64. covid19.cases[is.na(fips) & county == 'Kansas City', fips := 29095]
  65. covid19.cases[!is.na(fips), scfips := sprintf('%05d', fips)]
  66.  
  67. # Form data.table with a list of available dates in COVID case data and national total cases as of each date
  68. covid19.cases[, date := as.Date(date)]
  69. covid.dates <- covid19.cases[, .(national_cases = sum(cases)), by = date]
  70. setkey(covid.dates, date)
  71.  
  72. # Form base set of continental U.S. counties, with population, to be used in map panel
  73. county.pop <- census.pop[SUMLEV == 50, .(scfips = paste0(sprintf('%02d', STATE), sprintf('%03d', COUNTY)),
  74.                                          state = STATE, state_name = STNAME, county_name = CTYNAME,
  75.                                          county_pop2019 = as.numeric(POPESTIMATE2019))]
  76. county.pop[scfips == '46102', scfips := '46113'] # County name and FIPS changed since 2010 shapefiles
  77. county.pop <- county.pop[state != 2 & state != 11 & state != 15] # Drop Alaska, Hawaii, DC
  78.  
  79. # Add county GEO_ID identifiers to enable merge to shapefiles
  80. county.geoid <- data.table(GEO_ID = as.character(county.sf$GEO_ID))
  81. county.geoid[, scfips := transpose(strsplit(GEO_ID, 'US'))[2]]
  82. setkey(county.geoid, scfips) ; setkey(county.pop, scfips)
  83. county.pop <- merge(county.pop, county.geoid, all.x = TRUE)
  84.  
  85. # Form a balanced panel of county-X-dates for mapping
  86. county.pop[, temp_m := 1] ; covid.dates[, temp_m := 1]
  87. setkey(county.pop, temp_m) ; setkey(covid.dates, temp_m)
  88. map_panel <- merge(county.pop, covid.dates[, .(date, temp_m)], allow.cartesian = TRUE)
  89. map_panel$temp_m <- NULL ; covid.dates$temp_m <- NULL
  90.  
  91. # Merge COVID county-date case counts to the map panel
  92. temp_covid19 <- covid19.cases[!is.na(scfips), .(cases = sum(cases), deaths = sum(deaths)), by = .(scfips, date)]
  93. setkey(map_panel, scfips, date) ; setkey(temp_covid19, scfips, date)
  94. map_panel <- merge(map_panel, temp_covid19, all.x = T)
  95. map_panel[is.na(cases), cases := 0]
  96. map_panel[is.na(deaths), deaths := 0]
  97.  
  98. # Determine county-level cases per 100k pop. by available date
  99. map_panel[, cases_per_100k := cases / county_pop2019 * 100000]
  100.  
  101. # Define the log-scale bins for mapping
  102. map_panel[cases_per_100k == 0, bin_cases := 'B1']
  103. map_panel[cases_per_100k > 0 & cases_per_100k <= 10, bin_cases := 'B2']
  104. map_panel[cases_per_100k > 10 & cases_per_100k <= 100, bin_cases := 'B3']
  105. map_panel[cases_per_100k > 100 & cases_per_100k <= 1000, bin_cases := 'B4']
  106. map_panel[cases_per_100k > 1000, bin_cases := 'B5']
  107. table(map_panel$bin_cases, useNA = 'always')
  108.  
  109. # Clean up data workspace some
  110. rm(census.pop, county.geoid, county.pop, covid19.cases, temp_covid19)
  111.  
  112. #### 3. GENERATE AND SAVE MAPS ####
  113.  
  114. # Loop through dates and generate/save map for each
  115.  
  116. lapply(1:length(covid.dates$date), function(d) {
  117.  
  118.   annotation_text <- paste0('COVID-19 cases as of\n',
  119.                             format(as.Date(covid.dates$date[d]), "%B %d, %Y"),
  120.                             '\nNational total: ',
  121.                             ifelse(covid.dates$national_cases[d] <= 999,
  122.                                    formatC(covid.dates$national_cases[d]),
  123.                                    formatC(covid.dates$national_cases[d], big.mark = ',')))
  124.  
  125.   daily_map_data <- map_panel[date == covid.dates$date[d], .(GEO_ID, date, bin_cases)]
  126.   daily_map_data <- merge(daily_map_data, county.sf, by = 'GEO_ID')
  127.  
  128.   ggplot() +
  129.     geom_sf(data = daily_map_data, aes(fill = bin_cases, color = bin_cases), size = 0.1) +
  130.     geom_sf(data = state.sf, color = 'black', fill = NA, size = 0.1) +
  131.     coord_sf() + theme_bw() + xlab('') + ylab('') +
  132.     theme(panel.grid.major = element_line(color = 'white'), panel.grid.minor = element_blank(), panel.border = element_blank(),
  133.           panel.background = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
  134.           axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line = element_blank(),
  135.           legend.background = element_rect(color = 'black', fill = 'white'), legend.position = c(0.89, 0.19)) +
  136.     annotate("label", x = -114, y = 26, label = annotation_text) +
  137.     scale_fill_manual(name = 'Cases per 100,000\ncounty population', breaks = c('B1', 'B2', 'B3', 'B4', 'B5'), limits = c('B1', 'B2', 'B3', 'B4', 'B5'),
  138.                       values = c("#FEF0D9", "#FDCC8A", "#FC8D59", "#E34A33", "#B30000"),
  139.                       labels = c('None', '0 - 10', '10 - 100', '100 - 1000', 'More than 1000')) +
  140.     scale_color_manual(name = 'Cases per 100,000\ncounty population', breaks = c('B1', 'B2', 'B3', 'B4', 'B5'), limits = c('B1', 'B2', 'B3', 'B4', 'B5'),
  141.                        values = c("#FEF0D9", "#FDCC8A", "#FC8D59", "#E34A33", "#B30000"),
  142.                        labels = c('None', '0 - 10', '10 - 100', '100 - 1000', 'More than 1000'))
  143.  
  144.   ggsave(paste0("Map_cases_per_capita_", covid.dates$date[d], ".png"), width = 8, height = 5, units = 'in', dpi = 100)  # Low-res PNG for animation GIF
  145.  
  146. })
  147.  
  148.  
  149. ## Then can convert the saved .png maps to animated .gif
  150. ## To process saved maps using a linux system command, run something like:
  151. ##   system('convert -delay 33 -loop 0 Map_cases*.png COVID_map_cases_animation.gif')
Advertisement
Add Comment
Please, Sign In to add comment