Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Updated 2020-March-29
- #### DESCRIPTION ####
- # 1. Downloads source data: COVID-19 case data (NY Times), state and county shapefiles and county pop. (Census Bureau)
- # 2. Merges data sets and does some data cleaning
- # 3. Generates and saves maps
- #### PREAMBLE ####
- rm(list = ls()) # Clear workspace
- pl = c('data.table', 'sf', 'ggplot2', 'ggspatial') # R packages to load
- lapply(pl, require, character.only = TRUE) # Load R packages
- mapsdir = "~/Desktop/COVID/maps" ; setwd(mapsdir) # Directory to save data and maps ** SET THIS **
- #### 1. DOWNLOAD AND READ IN SOURCE DATA ####
- # NY Times: COVID-19 case data
- # Website: https://www.nytimes.com/article/coronavirus-county-data-us.html
- # Github: https://github.com/nytimes/covid-19-data/blob/master/us-counties.csv
- nytimes.covid19.url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
- nytimes.covid19.fn = "nytimes-covid19-us-counties.csv"
- if(!file.exists(nytimes.covid19.fn)) download.file(nytimes.covid19.url, nytimes.covid19.fn, method = 'curl')
- covid19.cases <- fread(nytimes.covid19.fn)
- # Census Bureau: county population estimates
- # Website: https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html
- census.population.url = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv"
- census.population.fn = "co-est2019-alldata.csv"
- if(!file.exists(census.population.fn)) download.file(census.population.url, census.population.fn, method = 'curl')
- census.pop <- fread(census.population.fn)
- # Census Bureau: state shapefiles
- # Website: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.2010.html
- state.shapfiles.url = "https://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_040_00_5m.zip"
- state.shapfiles.fn = "gz_2010_us_040_00_5m.zip"
- if(!file.exists(state.shapfiles.fn)) download.file(state.shapfiles.url, state.shapfiles.fn, method = 'curl')
- unzip(state.shapfiles.fn)
- state.sf <- st_read("gz_2010_us_040_00_5m.shp")
- # Census Bureau: county shapefiles
- # Website: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.2010.html
- county.shapfiles.url = "https://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_050_00_5m.zip"
- county.shapfiles.fn = "gz_2010_us_050_00_5m.zip"
- if(!file.exists(county.shapfiles.fn)) download.file(county.shapfiles.url, county.shapfiles.fn, method = 'curl')
- unzip(county.shapfiles.fn)
- county.sf <- st_read("gz_2010_us_050_00_5m.shp")
- # Clean up workspace and clean up zip-extracted shapefile data
- rm(nytimes.covid19.url, nytimes.covid19.fn, census.population.url, census.population.fn,
- state.shapfiles.url, state.shapfiles.fn, county.shapfiles.url, county.shapfiles.fn)
- lapply(c("gz_2010_us_040_00_5m.dbf", "gz_2010_us_040_00_5m.prj", "gz_2010_us_040_00_5m.shp",
- "gz_2010_us_040_00_5m.shx", "gz_2010_us_040_00_5m.xml", "gz_2010_us_050_00_5m.dbf",
- "gz_2010_us_050_00_5m.prj", "gz_2010_us_050_00_5m.shp", "gz_2010_us_050_00_5m.shx",
- "gz_2010_us_050_00_5m.xml"), unlink)
- #### 2. MERGE AND CLEAN DATA ####
- # Keep only continental U.S. for maps (National total includes full U.S.)
- state.sf <- subset(state.sf, STATE != '02' & STATE != '11' & STATE != '15' & STATE != '72')[1]
- county.sf <- subset(county.sf, STATE != '02' & STATE != '11' & STATE != '15' & STATE != '72')[1]
- # Add state-county standardized FIPS code to COVID data (note that some fips are 'unknown' in the NYT data)
- covid19.cases[is.na(fips) & county == 'New York City', fips := 36061]
- covid19.cases[is.na(fips) & county == 'Doña Ana', fips := 35013]
- covid19.cases[is.na(fips) & county == 'Kansas City', fips := 29095]
- covid19.cases[!is.na(fips), scfips := sprintf('%05d', fips)]
- # Form data.table with a list of available dates in COVID case data and national total cases as of each date
- covid19.cases[, date := as.Date(date)]
- covid.dates <- covid19.cases[, .(national_cases = sum(cases)), by = date]
- setkey(covid.dates, date)
- # Form base set of continental U.S. counties, with population, to be used in map panel
- county.pop <- census.pop[SUMLEV == 50, .(scfips = paste0(sprintf('%02d', STATE), sprintf('%03d', COUNTY)),
- state = STATE, state_name = STNAME, county_name = CTYNAME,
- county_pop2019 = as.numeric(POPESTIMATE2019))]
- county.pop[scfips == '46102', scfips := '46113'] # County name and FIPS changed since 2010 shapefiles
- county.pop <- county.pop[state != 2 & state != 11 & state != 15] # Drop Alaska, Hawaii, DC
- # Add county GEO_ID identifiers to enable merge to shapefiles
- county.geoid <- data.table(GEO_ID = as.character(county.sf$GEO_ID))
- county.geoid[, scfips := transpose(strsplit(GEO_ID, 'US'))[2]]
- setkey(county.geoid, scfips) ; setkey(county.pop, scfips)
- county.pop <- merge(county.pop, county.geoid, all.x = TRUE)
- # Form a balanced panel of county-X-dates for mapping
- county.pop[, temp_m := 1] ; covid.dates[, temp_m := 1]
- setkey(county.pop, temp_m) ; setkey(covid.dates, temp_m)
- map_panel <- merge(county.pop, covid.dates[, .(date, temp_m)], allow.cartesian = TRUE)
- map_panel$temp_m <- NULL ; covid.dates$temp_m <- NULL
- # Merge COVID county-date case counts to the map panel
- temp_covid19 <- covid19.cases[!is.na(scfips), .(cases = sum(cases), deaths = sum(deaths)), by = .(scfips, date)]
- setkey(map_panel, scfips, date) ; setkey(temp_covid19, scfips, date)
- map_panel <- merge(map_panel, temp_covid19, all.x = T)
- map_panel[is.na(cases), cases := 0]
- map_panel[is.na(deaths), deaths := 0]
- # Determine county-level cases per 100k pop. by available date
- map_panel[, cases_per_100k := cases / county_pop2019 * 100000]
- # Define the log-scale bins for mapping
- map_panel[cases_per_100k == 0, bin_cases := 'B1']
- map_panel[cases_per_100k > 0 & cases_per_100k <= 10, bin_cases := 'B2']
- map_panel[cases_per_100k > 10 & cases_per_100k <= 100, bin_cases := 'B3']
- map_panel[cases_per_100k > 100 & cases_per_100k <= 1000, bin_cases := 'B4']
- map_panel[cases_per_100k > 1000, bin_cases := 'B5']
- table(map_panel$bin_cases, useNA = 'always')
- # Clean up data workspace some
- rm(census.pop, county.geoid, county.pop, covid19.cases, temp_covid19)
- #### 3. GENERATE AND SAVE MAPS ####
- # Loop through dates and generate/save map for each
- lapply(1:length(covid.dates$date), function(d) {
- annotation_text <- paste0('COVID-19 cases as of\n',
- format(as.Date(covid.dates$date[d]), "%B %d, %Y"),
- '\nNational total: ',
- ifelse(covid.dates$national_cases[d] <= 999,
- formatC(covid.dates$national_cases[d]),
- formatC(covid.dates$national_cases[d], big.mark = ',')))
- daily_map_data <- map_panel[date == covid.dates$date[d], .(GEO_ID, date, bin_cases)]
- daily_map_data <- merge(daily_map_data, county.sf, by = 'GEO_ID')
- ggplot() +
- geom_sf(data = daily_map_data, aes(fill = bin_cases, color = bin_cases), size = 0.1) +
- geom_sf(data = state.sf, color = 'black', fill = NA, size = 0.1) +
- coord_sf() + theme_bw() + xlab('') + ylab('') +
- theme(panel.grid.major = element_line(color = 'white'), panel.grid.minor = element_blank(), panel.border = element_blank(),
- panel.background = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
- axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line = element_blank(),
- legend.background = element_rect(color = 'black', fill = 'white'), legend.position = c(0.89, 0.19)) +
- annotate("label", x = -114, y = 26, label = annotation_text) +
- scale_fill_manual(name = 'Cases per 100,000\ncounty population', breaks = c('B1', 'B2', 'B3', 'B4', 'B5'), limits = c('B1', 'B2', 'B3', 'B4', 'B5'),
- values = c("#FEF0D9", "#FDCC8A", "#FC8D59", "#E34A33", "#B30000"),
- labels = c('None', '0 - 10', '10 - 100', '100 - 1000', 'More than 1000')) +
- scale_color_manual(name = 'Cases per 100,000\ncounty population', breaks = c('B1', 'B2', 'B3', 'B4', 'B5'), limits = c('B1', 'B2', 'B3', 'B4', 'B5'),
- values = c("#FEF0D9", "#FDCC8A", "#FC8D59", "#E34A33", "#B30000"),
- labels = c('None', '0 - 10', '10 - 100', '100 - 1000', 'More than 1000'))
- ggsave(paste0("Map_cases_per_capita_", covid.dates$date[d], ".png"), width = 8, height = 5, units = 'in', dpi = 100) # Low-res PNG for animation GIF
- })
- ## Then can convert the saved .png maps to animated .gif
- ## To process saved maps using a linux system command, run something like:
- ## system('convert -delay 33 -loop 0 Map_cases*.png COVID_map_cases_animation.gif')
Advertisement
Add Comment
Please, Sign In to add comment