Demographic vs Housing-to-Income ratio

library(tidycensus)
library(tidyverse)
library(cowplot)
library(viridis)
library(scales)
library(sf)
library(tigris)
library(rgeos)
library(geosphere)
library(gridExtra)

### Request your Census API key here:
# https://api.census.gov/data/key_signup.html
#
api_key_census <- "INSERT_YOUR_CENSUS_API_KEY_HERE"
census_api_key(api_key_census, install = TRUE)

### How to search Census for series (Painful!!!)
# vars <- load_variables(2018, "acs5", cache = TRUE)
# search <- vars %>% filter(grepl("TOTAL POPULATION", concept))

#1 B02001_001 Estimate!!Total                                                                                  RACE
#2 B02001_002 Estimate!!Total!!White alone                                                                     RACE
#3 B02001_003 Estimate!!Total!!Black or African American alone                                                 RACE
#4 B02001_004 Estimate!!Total!!American Indian and Alaska Native alone                                         RACE
#5 B02001_005 Estimate!!Total!!Asian alone                                                                     RACE
#6 B02001_006 Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone                                RACE
#7 B02001_007 Estimate!!Total!!Some other race alone                                                           RACE
#8 B02001_008 Estimate!!Total!!Two or more races                                                               RACE
#9 B02001_009 Estimate!!Total!!Two or more races!!Two races including Some other race                          RACE
#10 B02001_010 Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races RACE

### African-American
my_demographic <- c("B02001_003")
my_demographic_txt <- "African Americans"

### Hispanics
#my_demographic <- c("B03002_012")
#my_demographic_txt <- "Hispanics"

### Non-Hispanic whites
#my_demographic <- c("B03002_003")
#my_demographic_txt <- "Non-Hispanics Whites"

### Native American
#my_demographic <- c("B02001_004")
#my_demographic_txt <- "Native American"

### Asian
#my_demographic <- c("B02001_005")
#my_demographic_txt <- "Asian"

### FIPS code for Syracuse, NY
my_states <- c("36")
my_counties <- c("067")

### Pull tract area so we can compute population density
area_tract <-
  tracts(year = 2018,
         state = my_states,
         cb = TRUE,
         class = "sf",
         progress_bar = FALSE) %>%
  mutate(area = ALAND / 2589988) %>%
  select(GEOID, STATEFP, COUNTYFP, area) %>%
  st_drop_geometry() %>%
  filter(COUNTYFP %in% my_counties) %>%
  select(GEOID, area)

### Pull the demographic population of your chosen subset
subset_pop <-
  get_acs(geography   = "tract",
          variables   = my_demographic,
          state       = my_states,
          county      = my_counties,
          year        = 2018,
          geometry    = TRUE,
          cache_table = TRUE) %>%
  select(GEOID, estimate) %>%
  rename(subset_pop = estimate)

### Pull the total population
total_pop <-
  get_acs(geography   = "tract",
          variables   = c("B02001_001"),
          state       = my_states,
          county      = my_counties,
          year        = 2018,
          geometry    = FALSE,
          cache_table = TRUE) %>%
  select(GEOID, estimate) %>%
  rename(total_pop = estimate) %>%
  left_join(area_tract, by = "GEOID") %>%
  mutate(pop_density = total_pop / area)

### Pull median housing costs per month
housing_costs <-
  get_acs(geography   = "tract",
          variables   = c("B25105_001"),
          state       = my_states,
          county      = my_counties,
          year        = 2018,
          geometry    = TRUE,
          cache_table = TRUE) %>%
  select(GEOID, estimate) %>%
  rename(housing_costs = estimate)

### Pull median income
median_income <-
  get_acs(geography   = "tract",
          variables   = c("B19013_001"),
          state       = my_states,
          county      = my_counties,
          year        = 2018,
          geometry    = FALSE,
          cache_table = TRUE) %>%
  select(GEOID, estimate) %>%
  rename(income = estimate)

### Determine distance of tract from center of city, to see if housing costs depends on distance to center.
### Lat/lon are for Nashville TN.
center_lat <-  36.16587
center_lon <- -86.78434
tract_centers <- housing_costs$geometry %>% as_Spatial() %>% gCentroid(byid = TRUE)
dst <- tibble(tract_centers$x, tract_centers$y) %>% rename(lon = "tract_centers$x", lat = "tract_centers$y")
ctr <- c(center_lon, center_lat)
dist <- distVincentyEllipsoid(ctr, dst, a=6378137, b=6356752.3142, f=1/298.257223563) / 1609.34
housing_costs$distance_to_center <- dist

map_housing_to_income <-
  housing_costs %>%
  left_join(median_income, by = "GEOID") %>%
  mutate(housing_per = (12 * housing_costs) / income) #%>%
  #mutate(housing_per = ifelse(housing_per < 0.28, NA, housing_per))

map_subset_per_pop <-
  subset_pop %>%
  left_join(total_pop, by = "GEOID") %>%
  mutate(subset_per = subset_pop / total_pop)

data_combined <-
  (subset_pop %>% st_drop_geometry()) %>%
  left_join(total_pop, by = "GEOID") %>%
  left_join((housing_costs %>% st_drop_geometry()), by = "GEOID") %>%
  left_join(median_income, by = "GEOID") %>%
  mutate(housing_per = (12 * housing_costs) / income) %>%
  mutate(subset_per = subset_pop / total_pop) %>%
  as_tibble()

areawater <-
  area_water(state = my_states,
             county = my_counties,
             class = "sf")

linearwater <-
  linear_water(state = my_states,
               county = my_counties,
               class = "sf")

roads <-
  roads(state = my_states,
        county = my_counties,
        class = "sf")

interstates <- roads %>% filter(grepl("^I-|State Rte 840|Winfield", FULLNAME))
hwy         <- roads %>% filter(RTTYP %in% c("C", "S", "U")) %>% filter(!grepl("^Old", FULLNAME))
rivers      <- linearwater %>% filter(grepl(" Riv$", FULLNAME))
lakes       <- areawater

color_water      <- "steelblue2"
color_interstate <- "darkred"

### Use this if you want to plot population density on an absolute scale
graph_scale <-
  scale_fill_viridis(direction = -1,
                     option    = "inferno",
                     labels    = percent_format(accuracy = 0.1))

### Graph the counties
p_subset_per_pop <-
  ggplot() +
  theme_void() +
  theme(
    legend.title    = element_blank(),
    legend.position = "right",
    plot.title      = element_text(hjust = 0.5),
  ) +
  labs(title = paste(my_demographic_txt, " as % of Population\n[ ",
                     my_demographic, " / B02001_001 ]", sep = "")) +
  geom_sf(data = map_subset_per_pop, size = 0.05, alpha = 0.5,
          aes(fill = subset_per)) +

  geom_sf(data = areawater, size = 0.2, fill = color_water, color = color_water) +
  geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +

  graph_scale
print(p_subset_per_pop)

### Graph the counties
p_housing_to_income <-
  ggplot() +
  theme_void() +
  theme(
    legend.title    = element_blank(),
    legend.position = "right",
    plot.title      = element_text(hjust = 0.5),
  ) +
  labs(title = "Median Yearly Housing Costs as a percent of median Houshold Income exceeds 28%\n(12 * [B25105_001] / [B19013_001] > 0.28)") +
  geom_sf(data = map_housing_to_income, size = 0.05, alpha = 0.5,
          aes(fill = housing_per)) +

  geom_sf(data = lakes, size = 0.2, fill = color_water, color = color_water) +
  geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +

  graph_scale

fit <- lm(housing_per ~ subset_per * pop_density * income * distance_to_center, data = data_combined)
predict_fit <- 0.282728 -0.005900 * data_combined$distance_to_center

p_fit <-
  ggplot(data = data_combined, aes(y = housing_per, x = subset_per)) +
  theme_classic() +
  geom_point(shape = 19, size = 0.5, color = "black") +
  geom_smooth(method = "lm", formula = y ~ x) +
  geom_hline(yintercept = 0.28, alpha = 0.5, linetype = "dotted") +
  labs(
    x = paste(my_demographic_txt, " % of Population", sep = ""),
    y = "Median Housing to Income Ratio") +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

plot_list <- list(p_subset_per_pop, p_housing_to_income, p_fit)

layout_list <- rbind(
  c(1, 2),
  c(1, 2),
  c(1, 2),
  c(3, 3),
  c(3, 3)
)

charts <-
  grid.arrange(grobs         = plot_list,
               layout_matrix = layout_list,
               ncols = 2)