Advertisement
MetricT

New Cases/Deaths binned by political leaning quintile

Sep 24th, 2020
844
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 16.37 KB | None | 0 0
  1. ### Idea from:  https://www.reddit.com/r/dataisbeautiful/comments/i1sedr/oc_united_states_covid19_deaths_curve_with/
  2. ### Implementation by /u/MetricT
  3.  
  4. library(tidyverse)
  5. library(tidycensus)
  6. library(tigris)
  7. library(forecast)
  8. library(naniar)
  9. library(useful)
  10. library(TTR)
  11. library(politicaldata)
  12.  
  13. ### Download this Github repo and add the path to the spreadsheet
  14. ### https://github.com/nytimes/covid-19-data
  15.  
  16. # Load the spreadsheet (which contains total cases/deaths) and compute
  17. # new cases/deaths
  18. spreadsheet <-
  19.   "../Datasets/nytimes/covid-19-data/us-counties.csv" %>%
  20.   read_csv(col_names = TRUE, col_types = "Dcccdd") %>%
  21.   mutate(state_fips = substr(fips, 1, 2)) %>%
  22.   filter(!state_fips %in% c(69, 72, 78)) %>%
  23.   filter(!is.na(state_fips)) %>%
  24.   select(date, fips, cases, deaths) %>%
  25.   arrange(date) %>%
  26.   pivot_wider(id_cols = "date",
  27.               names_from = "fips",
  28.               values_from = c("cases", "deaths")) %>%
  29.   mutate_if(is.numeric, ~ (replace_na(., 0))) %>%
  30.   mutate(across(-date, ~ . - lag(.))) %>%
  31.   mutate_if(is.numeric, ~ (replace_na(., 0))) %>%
  32.   mutate("cases_53061" = if_else(date == as.Date("2020-01-21"), 1, `cases_53061`)) %>%
  33.   pivot_longer(-date, names_to = c("type", "fips"), names_sep = "_", values_to = "values") %>%
  34.   pivot_wider(id_cols = c("date", "fips"), names_from = "type", values_from = "values")
  35.  
  36. current_date <- spreadsheet %>% arrange(date) %>% tail(n = 1) %>% pull("date")
  37.  
  38.  
  39. ### Download this Github repo and add the path to the spreadsheet
  40. ### https://github.com/tonmcg/US_County_Level_Election_Results_08-16
  41.  
  42. pres_2016 <-
  43.   "../Datasets/US_County_Level_Election_Results_08-16/2016_US_County_Level_Presidential_Results.csv" %>%
  44.   read_csv(col_names = TRUE, col_types = "ddddddncccc") %>%
  45.   rename(index = X1,
  46.          fips = combined_fips,
  47.          dem = per_dem,
  48.          rep = per_gop) %>%
  49.   mutate(fips = str_pad(fips, 5, pad = "0"),
  50.          year = 2016,
  51.          other = 1 - dem - rep) %>%
  52.   mutate(fips = if_else(fips == "46113", "46102", fips)) %>%
  53.   mutate(fips = if_else(fips == "02270", "02158", fips)) %>%
  54.   rowwise() %>%
  55.   mutate(winner = max(dem, rep, other)) %>%
  56.   ungroup() %>%
  57.   mutate(winner = case_when(
  58.          winner == dem ~ "Democrat",
  59.          winner == rep ~ "Republican",
  60.          winner == other ~ "Other")) %>%
  61.   mutate(margin = rep - dem) %>%
  62.   select(year, total_votes, dem, rep, other, winner, margin, fips)
  63.  
  64. population <-
  65.   get_acs(geography   = "county",
  66.           variables   = c("B01003_001"),
  67.           year        = 2016,
  68.           geometry    = FALSE,
  69.           cache_table = TRUE) %>%
  70.   rename(population = estimate) %>%
  71.   select(GEOID, population)
  72.  
  73. pres_2016 <-
  74.   pres_2016 %>%
  75.   left_join(population, by = c("fips"= "GEOID")) %>%
  76.   mutate(voter_turnout = total_votes / population)
  77.  
  78. pres_2016 <-
  79.   pres_2016 %>%
  80.   arrange(desc(margin)) %>%
  81.   mutate(cum_population = cumsum(population),
  82.          percent = population / 322087547,
  83.          cum_percent = cum_population / 322087547) %>%
  84.   mutate(subtype = case_when(
  85.     cum_percent < 0.2 ~ "Very\nGOP",
  86.     cum_percent < 0.4 ~ "GOP",
  87.     cum_percent < 0.6 ~ "Swing\nCounties",
  88.     cum_percent < 0.8 ~ "Dem",
  89.     cum_percent <= 1  ~ "Very\nDem"))
  90.  
  91. ################################################################################
  92. ### Take our spreadsheet data, reformat it a bit, and compute a 7-day SMA
  93. ################################################################################
  94. data <-
  95.   spreadsheet %>%
  96.   left_join(pres_2016, by = "fips") %>%
  97.   group_by(date, subtype) %>%
  98.   summarize(cases = sum(cases, na.rm = TRUE),
  99.             deaths = sum(deaths, na.rm = TRUE)) %>%
  100.   ungroup() %>%
  101.   pivot_wider(id_cols = c("date"), names_from = "subtype", values_from = c("cases", "deaths")) %>%
  102.   ### Take a 7-day SMA of values to smooth them out a bit
  103.   mutate(across(-date,
  104.                 .fns = list(sma   = ~ SMA(., n = 7)),
  105.                 .names = "{fn}_{col}"
  106.   )) %>%
  107.   select("date", starts_with("sma_")) %>%
  108.   rename_at(vars(starts_with("sma_")), ~ str_replace(., "sma_", "")) %>%
  109.   filter(date >= as.Date("2020-03-01")) %>%
  110.   pivot_longer(-date, names_to = c("cases_deaths", "subtype"), names_sep = "_", values_to = "values") %>%
  111.   pivot_wider(id_cols = c("date", "subtype"), names_from = "cases_deaths", values_from = "values") %>%
  112.   mutate_if(is.numeric, ~ (replace_na(., 0)))
  113.  
  114. ################################################################################
  115. ### Draw the inset "Regional Curves" graph
  116. ################################################################################
  117. data$subtype <- factor(data$subtype, levels = c("Very\nGOP", "GOP", "Swing\nCounties", "Dem", "Very\nDem"))
  118.  
  119. g_regional_curves_cases <-
  120.   ggplot(data = data, aes(x = as.Date(date), y = cases)) +
  121.   theme_bw() +
  122.   theme(legend.position = "none") +
  123.   theme(strip.background = element_blank()) +
  124.  
  125.   geom_line(color = "black", size = 1) +
  126.   geom_area(aes(fill = as.factor(subtype))) +
  127.  
  128.   labs(title = "Cases by Political Lean", x = "", y = "") +
  129.   scale_y_continuous(labels = scales::comma) +
  130.  
  131.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  132.                                "Dem"             = "#8bb1ff",
  133.                                "Swing\nCounties" = "#FFFFFF",
  134.                                "GOP"             = "#ff8b98",
  135.                                "Very\nGOP"       = "#E9141D")) +
  136.    
  137.   facet_wrap(~ subtype, nrow = 5, ncol = 1, strip.position = "right")
  138. print(g_regional_curves_cases)
  139.  
  140. g_regional_curves_deaths <-
  141.   ggplot(data = data, aes(x = as.Date(date), y = deaths)) +
  142.   theme_bw() +
  143.   theme(legend.position = "none") +
  144.   theme(strip.background = element_blank()) +
  145.  
  146.   geom_line(color = "black", size = 1) +
  147.   geom_area(aes(fill = as.factor(subtype))) +
  148.  
  149.   labs(title = "Deaths by Political Lean", x = "", y = "") +
  150.   scale_y_continuous(labels = scales::comma) +
  151.  
  152.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  153.                                "Dem"             = "#8bb1ff",
  154.                                "Swing\nCounties" = "#FFFFFF",
  155.                                "GOP"             = "#ff8b98",
  156.                                "Very\nGOP"       = "#E9141D")) +
  157.  
  158.   facet_wrap(~ subtype, nrow = 5, ncol = 1, strip.position = "right")
  159. print(g_regional_curves_deaths)
  160.  
  161. ################################################################################
  162. ### Draw the inset USA map
  163. ################################################################################
  164. county_map <-
  165.   county_laea %>%
  166.   rename(fips = GEOID) %>%
  167.   mutate(fips = if_else(fips == "46113", "46102", fips)) %>%
  168.   mutate(fips = if_else(fips == "02270", "02158", fips)) %>%
  169.   left_join(pres_2016, by = "fips")
  170.  
  171. g_map_usa_regions <-
  172.   ggplot(data = county_map) +
  173.   theme_void() +
  174.   theme(legend.position = "none") +
  175.   geom_sf(aes(fill = subtype), color = "black", size = 0) +
  176.  
  177.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  178.                                "Dem"             = "#8bb1ff",
  179.                                "Swing\nCounties" = NA,
  180.                                "GOP"             = "#ff8b98",
  181.                                "Very\nGOP"       = "#E9141D")) +
  182.   geom_sf(data = state_laea, size = 0.1, color = "black", fill = NA)
  183. print(g_map_usa_regions)
  184.  
  185. ################################################################################
  186. ### Draw the main graph (stacked line graph of cases)
  187. ################################################################################
  188. caption <-
  189.   paste("Data Source:  The New York Times\n",
  190.         "Current as of ",
  191.         data %>% tail(n = 1) %>% pull("date") %>% format("%B %d, %Y"),
  192.         sep = "")
  193.  
  194. total_cases  <- spreadsheet %>% filter(date == (spreadsheet %>% tail(n = 1) %>% pull("date"))) %>% pull("cases") %>% sum()
  195. total_deaths <- spreadsheet %>% filter(date == (spreadsheet %>% tail(n = 1) %>% pull("date"))) %>% pull("deaths") %>% sum()
  196.  
  197. cases_growing_at <-
  198.   spreadsheet %>%
  199.   select(date, cases) %>%
  200.   group_by(date) %>%
  201.   summarize(cases = sum(cases)) %>%
  202.   tail(n = 7) %>%
  203.   pull(cases) %>%
  204.   mean() %>%
  205.   round() %>%
  206.   format(big.mark = ",", scientific = FALSE)
  207.  
  208. deaths_growing_at <-
  209.   spreadsheet %>%
  210.   select(date, deaths) %>%
  211.   group_by(date) %>%
  212.   summarize(deaths = sum(deaths)) %>%
  213.   tail(n = 7) %>%
  214.   pull(deaths) %>%
  215.   mean() %>%
  216.   round() %>%
  217.   format(big.mark = ",", scientific = FALSE)
  218.  
  219. cases_up_rate <-
  220.   spreadsheet %>%
  221.   select(date, cases) %>%
  222.   group_by(date) %>%
  223.   summarize(cases = sum(cases)) %>%
  224.   ungroup() %>%
  225.   mutate(new_cases_sma = SMA(cases, n = 7)) %>%
  226.   tail(n = 7) %>%
  227.   arrange(date) %>%
  228.   filter(row_number() %in% c(1, n())) %>%
  229.   pull(new_cases_sma)
  230.  
  231. deaths_up_rate <-
  232.   spreadsheet %>%
  233.   select(date, deaths) %>%
  234.   group_by(date) %>%
  235.   summarize(deaths = sum(deaths)) %>%
  236.   ungroup() %>%
  237.   mutate(new_deaths_sma = SMA(deaths, n = 7)) %>%
  238.   tail(n = 7) %>%
  239.   arrange(date) %>%
  240.   filter(row_number() %in% c(1, n())) %>%
  241.   pull(new_deaths_sma)
  242.  
  243. cases_up_rate <- round(100 * (cases_up_rate[2] - cases_up_rate[1]) / cases_up_rate[1], 2)
  244. cases_up_rate_txt <- "Up"
  245. if (cases_up_rate < 0) { cases_up_rate_txt <- "Down"}
  246.  
  247. deaths_up_rate <- round(100 * (deaths_up_rate[2] - deaths_up_rate[1]) / deaths_up_rate[1], 2)
  248. deaths_up_rate_txt <- "Up"
  249. if (deaths_up_rate < 0) { deaths_up_rate_txt <- "Down"}
  250.  
  251. subtitle_cases <-
  252.   paste(total_cases %>% format(big.mark = ",", scientific = FALSE),
  253.         " New Cases (",
  254.         round(100000 * total_cases / 327533795, 1),
  255.         " per 100,000 people)\n",
  256.         "Average ",cases_growing_at, " new cases/day last 7 days\n",
  257.         cases_up_rate_txt, " ", abs(cases_up_rate), "% from 7 days ago",
  258.         sep = "")
  259.  
  260. subtitle_deaths <-
  261.   paste(total_deaths %>% format(big.mark = ",", scientific = FALSE),
  262.         " New Deaths (",
  263.         round(100000 * total_deaths / 327533795, 1),
  264.         " per 100,000 people)\n",
  265.         "Average ",deaths_growing_at, " new deaths/day last 7 days\n",
  266.         deaths_up_rate_txt, " ", abs(deaths_up_rate), "% from 7 days ago",
  267.         sep = "")
  268.  
  269. g_cases_stacked <-
  270.   ggplot(data = data, aes(x = as.Date(date), y = cases, fill = subtype)) +
  271.   theme_linedraw() +
  272.   theme(legend.title = element_blank()) +
  273.   theme(legend.position = "none") +
  274.   geom_area(color="black", size = 0.4, alpha = 0.8) +
  275.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  276.                                "Dem"             = "#8bb1ff",
  277.                                "Swing\nCounties" = NA, #"goldenrod2",
  278.                                "GOP"             = "#ff8b98",
  279.                                "Very\nGOP"       = "#E9141D")) +
  280.   scale_y_continuous(labels = scales::comma) +
  281.   scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  282.   labs(title = "Daily COVID-19 Cases by winner of 2016 US Presidential Election binned by population quintile",
  283.        subtitle = subtitle_cases,
  284.        x = "Date",
  285.        y = "Daily Cases",
  286.        caption = caption)
  287. print(g_cases_stacked)
  288.  
  289.  
  290. g_deaths_stacked <-
  291.   ggplot(data = data, aes(x = as.Date(date), y = deaths, fill = subtype)) +
  292.   theme_linedraw() +
  293.   theme(legend.title = element_blank()) +
  294.   theme(legend.position = "none") +
  295.   geom_area(color="black", size = 0.4, alpha = 0.8) +
  296.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  297.                                "Dem"             = "#8bb1ff",
  298.                                "Swing\nCounties" = NA, #"goldenrod2",
  299.                                "GOP"             = "#ff8b98",
  300.                                "Very\nGOP"       = "#E9141D")) +
  301.   scale_y_continuous(labels = scales::comma) +
  302.   scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  303.   labs(title = "Daily COVID-19 Deaths by winner of 2016 US Presidential Election binned by population quintile",
  304.        subtitle = subtitle_deaths,
  305.        x = "Date",
  306.        y = "Daily Deaths",
  307.        caption = caption)
  308. print(g_deaths_stacked)
  309.  
  310.  
  311. ################################################################################
  312. ### Draw the inset stacked graph percent chart in the upper-right
  313. ################################################################################
  314.  
  315. per_data <-
  316.   data %>%
  317.   group_by(date, subtype) %>%
  318.   summarise(n_cases  = sum(cases),
  319.             n_deaths = sum(deaths)) %>%
  320.   mutate(per_cases  = n_cases  / sum(n_cases)) %>%
  321.   mutate(per_deaths = n_deaths / sum(n_deaths)) %>%
  322.   select(-n_cases, -n_deaths) %>%
  323.   filter(date >= as.Date("2020-03-16"))
  324.  
  325. g_cases_stacked_per <-
  326.   ggplot(data = per_data, aes(x = as.Date(date), y = per_cases, fill = subtype)) +
  327.   theme_linedraw() +
  328.   theme(legend.title = element_blank(),
  329.         legend.position = "none",
  330.         plot.background = element_rect(fill = "transparent",colour = NA)
  331.   ) +
  332.  
  333.   geom_area(color="black", size = 0.4, alpha = .8) +
  334.   scale_y_continuous(labels = scales::percent) +
  335.   geom_hline(yintercept = 0.5, linetype = "dashed") +
  336.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  337.                                "Dem"             = "#8bb1ff",
  338.                                "Swing\nCounties" = NA,
  339.                                "GOP"             = "#ff8b98",
  340.                                "Very\nGOP"       = "#E9141D")) +
  341.   labs(title = "Proportion of National Daily Cases",
  342.        #subtitle = "Dotted line represents fraction of voters in respective parties",
  343.        x = "", y = "")
  344. print(g_cases_stacked_per)
  345.  
  346.  
  347. g_deaths_stacked_per <-
  348.   ggplot(data = per_data, aes(x = as.Date(date), y = per_deaths, fill = subtype)) +
  349.   theme_linedraw() +
  350.   theme(legend.title = element_blank(),
  351.         legend.position = "none",
  352.         plot.background = element_rect(fill = "transparent",colour = NA)
  353.   ) +
  354.  
  355.   geom_area(color="black", size = 0.4, alpha = .8) +
  356.   scale_y_continuous(labels = scales::percent) +
  357.   geom_hline(yintercept = 0.5, linetype = "dashed") +
  358.  
  359.   scale_fill_manual(values = c("Very\nDem"       = "#0015BC",
  360.                                "Dem"             = "#8bb1ff",
  361.                                "Swing\nCounties" = NA,
  362.                                "GOP"             = "#ff8b98",
  363.                                "Very\nGOP"       = "#E9141D")) +
  364.   labs(title = "Proportion of National Daily Deaths",
  365.        #subtitle = "Dotted line represents fraction of voters in respective parties",
  366.        x = "", y = "")
  367. print(g_deaths_stacked_per)
  368.  
  369. ################################################################################
  370. ### Put it all together
  371. ################################################################################
  372.  
  373. g_final_cases <-
  374.   g_cases_stacked +
  375.   annotation_custom(ggplotGrob(g_cases_stacked_per),
  376.                     xmin = as.Date(data %>% tail(n = 1) %>% pull("date")) - 95 - 60,
  377.                     xmax = as.Date(data %>% tail(n = 1) %>% pull("date")) - 95,
  378.                     ymin = 67000,
  379.                     ymax = 38000) +
  380.   annotation_custom(ggplotGrob(g_map_usa_regions),
  381.                     xmin = as.Date(data %>% head(n = 1) %>% pull("date")) - 5,
  382.                     xmax = as.Date(data %>% head(n = 1) %>% pull("date")) - 5 + 45,
  383.                     ymax = 67000,
  384.                     ymin = 42000) +
  385.  
  386.   annotation_custom(ggplotGrob(g_regional_curves_cases),
  387.                     xmin = as.Date(data %>% head(n = 1) %>% pull("date")) - 10,
  388.                     xmax = as.Date(data %>% head(n = 1) %>% pull("date")) - 10 + 33,
  389.                     ymin = 7000,
  390.                     ymax = 40000)
  391. print(g_final_cases)
  392.  
  393. g_final_deaths <-
  394.   g_deaths_stacked +
  395.   annotation_custom(ggplotGrob(g_deaths_stacked_per),
  396.                     xmin = as.Date(data %>% tail(n = 1) %>% pull("date")) -60 - 60,
  397.                     xmax = as.Date(data %>% tail(n = 1) %>% pull("date")) -60,
  398.                     ymin = 1000,
  399.                     ymax = 1800) +
  400.   annotation_custom(ggplotGrob(g_map_usa_regions),
  401.                     xmin = as.Date(data %>% head(n = 1) %>% pull("date")) - 5,
  402.                     xmax = as.Date(data %>% head(n = 1) %>% pull("date")) - 5 + 45,
  403.                     ymin = 1100,
  404.                     ymax = 1800) +
  405.  
  406.   annotation_custom(ggplotGrob(g_regional_curves_deaths),
  407.                     xmin = as.Date(data %>% head(n = 1) %>% pull("date")) - 10,
  408.                     xmax = as.Date(data %>% head(n = 1) %>% pull("date")) - 10 + 33,
  409.                     ymin = 100,
  410.                     ymax = 1050)
  411. print(g_final_deaths)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement