MetricT

Demographic vs Housing-to-Income ratio

Jun 16th, 2020
1,880
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(tidycensus)
  2. library(tidyverse)
  3. library(cowplot)
  4. library(viridis)
  5. library(scales)
  6. library(sf)
  7. library(tigris)
  8. library(rgeos)
  9. library(geosphere)
  10. library(gridExtra)
  11.  
  12. ### Request your Census API key here:
  13. # https://api.census.gov/data/key_signup.html
  14. #
  15. api_key_census <- "INSERT_YOUR_CENSUS_API_KEY_HERE"
  16. census_api_key(api_key_census, install = TRUE)
  17.  
  18. ### How to search Census for series (Painful!!!)
  19. # vars <- load_variables(2018, "acs5", cache = TRUE)
  20. # search <- vars %>% filter(grepl("TOTAL POPULATION", concept))
  21.  
  22. #1 B02001_001 Estimate!!Total                                                                                  RACE  
  23. #2 B02001_002 Estimate!!Total!!White alone                                                                     RACE  
  24. #3 B02001_003 Estimate!!Total!!Black or African American alone                                                 RACE  
  25. #4 B02001_004 Estimate!!Total!!American Indian and Alaska Native alone                                         RACE  
  26. #5 B02001_005 Estimate!!Total!!Asian alone                                                                     RACE  
  27. #6 B02001_006 Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone                                RACE  
  28. #7 B02001_007 Estimate!!Total!!Some other race alone                                                           RACE  
  29. #8 B02001_008 Estimate!!Total!!Two or more races                                                               RACE  
  30. #9 B02001_009 Estimate!!Total!!Two or more races!!Two races including Some other race                          RACE  
  31. #10 B02001_010 Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races RACE  
  32.  
  33. ### African-American
  34. my_demographic <- c("B02001_003")
  35. my_demographic_txt <- "African Americans"
  36.  
  37. ### Hispanics
  38. #my_demographic <- c("B03002_012")
  39. #my_demographic_txt <- "Hispanics"
  40.  
  41. ### Non-Hispanic whites
  42. #my_demographic <- c("B03002_003")
  43. #my_demographic_txt <- "Non-Hispanics Whites"
  44.  
  45. ### Native American
  46. #my_demographic <- c("B02001_004")
  47. #my_demographic_txt <- "Native American"
  48.  
  49. ### Asian
  50. #my_demographic <- c("B02001_005")
  51. #my_demographic_txt <- "Asian"
  52.  
  53. ### FIPS code for Syracuse, NY
  54. my_states <- c("36")
  55. my_counties <- c("067")
  56.  
  57. ### Pull tract area so we can compute population density
  58. area_tract <-
  59.   tracts(year = 2018,
  60.          state = my_states,
  61.          cb = TRUE,
  62.          class = "sf",
  63.          progress_bar = FALSE) %>%
  64.   mutate(area = ALAND / 2589988) %>%
  65.   select(GEOID, STATEFP, COUNTYFP, area) %>%
  66.   st_drop_geometry() %>%
  67.   filter(COUNTYFP %in% my_counties) %>%
  68.   select(GEOID, area)
  69.  
  70. ### Pull the demographic population of your chosen subset
  71. subset_pop <-
  72.   get_acs(geography   = "tract",
  73.           variables   = my_demographic,
  74.           state       = my_states,
  75.           county      = my_counties,
  76.           year        = 2018,
  77.           geometry    = TRUE,
  78.           cache_table = TRUE) %>%
  79.   select(GEOID, estimate) %>%
  80.   rename(subset_pop = estimate)
  81.  
  82. ### Pull the total population
  83. total_pop <-
  84.   get_acs(geography   = "tract",
  85.           variables   = c("B02001_001"),
  86.           state       = my_states,
  87.           county      = my_counties,
  88.           year        = 2018,
  89.           geometry    = FALSE,
  90.           cache_table = TRUE) %>%
  91.   select(GEOID, estimate) %>%
  92.   rename(total_pop = estimate) %>%
  93.   left_join(area_tract, by = "GEOID") %>%
  94.   mutate(pop_density = total_pop / area)
  95.  
  96. ### Pull median housing costs per month
  97. housing_costs <-
  98.   get_acs(geography   = "tract",
  99.           variables   = c("B25105_001"),
  100.           state       = my_states,
  101.           county      = my_counties,
  102.           year        = 2018,
  103.           geometry    = TRUE,
  104.           cache_table = TRUE) %>%
  105.   select(GEOID, estimate) %>%
  106.   rename(housing_costs = estimate)
  107.  
  108. ### Pull median income
  109. median_income <-
  110.   get_acs(geography   = "tract",
  111.           variables   = c("B19013_001"),
  112.           state       = my_states,
  113.           county      = my_counties,
  114.           year        = 2018,
  115.           geometry    = FALSE,
  116.           cache_table = TRUE) %>%
  117.   select(GEOID, estimate) %>%
  118.   rename(income = estimate)
  119.  
  120. ### Determine distance of tract from center of city, to see if housing costs depends on distance to center.
  121. ### Lat/lon are for Nashville TN.
  122. center_lat <-  36.16587
  123. center_lon <- -86.78434
  124. tract_centers <- housing_costs$geometry %>% as_Spatial() %>% gCentroid(byid = TRUE)
  125. dst <- tibble(tract_centers$x, tract_centers$y) %>% rename(lon = "tract_centers$x", lat = "tract_centers$y")
  126. ctr <- c(center_lon, center_lat)
  127. dist <- distVincentyEllipsoid(ctr, dst, a=6378137, b=6356752.3142, f=1/298.257223563) / 1609.34
  128. housing_costs$distance_to_center <- dist
  129.  
  130. map_housing_to_income <-
  131.   housing_costs %>%
  132.   left_join(median_income, by = "GEOID") %>%
  133.   mutate(housing_per = (12 * housing_costs) / income) #%>%
  134.   #mutate(housing_per = ifelse(housing_per < 0.28, NA, housing_per))
  135.  
  136. map_subset_per_pop <-
  137.   subset_pop %>%
  138.   left_join(total_pop, by = "GEOID") %>%
  139.   mutate(subset_per = subset_pop / total_pop)
  140.  
  141. data_combined <-
  142.   (subset_pop %>% st_drop_geometry()) %>%
  143.   left_join(total_pop, by = "GEOID") %>%
  144.   left_join((housing_costs %>% st_drop_geometry()), by = "GEOID") %>%
  145.   left_join(median_income, by = "GEOID") %>%
  146.   mutate(housing_per = (12 * housing_costs) / income) %>%
  147.   mutate(subset_per = subset_pop / total_pop) %>%
  148.   as_tibble()
  149.  
  150. areawater <-
  151.   area_water(state = my_states,
  152.              county = my_counties,
  153.              class = "sf")
  154.  
  155. linearwater <-
  156.   linear_water(state = my_states,
  157.                county = my_counties,
  158.                class = "sf")
  159.  
  160. roads <-
  161.   roads(state = my_states,
  162.         county = my_counties,
  163.         class = "sf")
  164.  
  165. interstates <- roads %>% filter(grepl("^I-|State Rte 840|Winfield", FULLNAME))
  166. hwy         <- roads %>% filter(RTTYP %in% c("C", "S", "U")) %>% filter(!grepl("^Old", FULLNAME))
  167. rivers      <- linearwater %>% filter(grepl(" Riv$", FULLNAME))
  168. lakes       <- areawater
  169.  
  170. color_water      <- "steelblue2"
  171. color_interstate <- "darkred"
  172.  
  173. ### Use this if you want to plot population density on an absolute scale
  174. graph_scale <-
  175.   scale_fill_viridis(direction = -1,
  176.                      option    = "inferno",
  177.                      labels    = percent_format(accuracy = 0.1))
  178.  
  179. ### Graph the counties
  180. p_subset_per_pop <-
  181.   ggplot() +
  182.   theme_void() +
  183.   theme(
  184.     legend.title    = element_blank(),
  185.     legend.position = "right",
  186.     plot.title      = element_text(hjust = 0.5),
  187.   ) +
  188.   labs(title = paste(my_demographic_txt, " as % of Population\n[ ",
  189.                      my_demographic, " / B02001_001 ]", sep = "")) +
  190.   geom_sf(data = map_subset_per_pop, size = 0.05, alpha = 0.5,
  191.           aes(fill = subset_per)) +
  192.  
  193.   geom_sf(data = areawater, size = 0.2, fill = color_water, color = color_water) +
  194.   geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  195.   geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  196.   #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
  197.  
  198.   graph_scale
  199. print(p_subset_per_pop)
  200.  
  201. ### Graph the counties
  202. p_housing_to_income <-
  203.   ggplot() +
  204.   theme_void() +
  205.   theme(
  206.     legend.title    = element_blank(),
  207.     legend.position = "right",
  208.     plot.title      = element_text(hjust = 0.5),
  209.   ) +
  210.   labs(title = "Median Yearly Housing Costs as a percent of median Houshold Income exceeds 28%\n(12 * [B25105_001] / [B19013_001] > 0.28)") +
  211.   geom_sf(data = map_housing_to_income, size = 0.05, alpha = 0.5,
  212.           aes(fill = housing_per)) +
  213.  
  214.   geom_sf(data = lakes, size = 0.2, fill = color_water, color = color_water) +
  215.   geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  216.   geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  217.   #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
  218.  
  219.   graph_scale
  220.  
  221. fit <- lm(housing_per ~ subset_per * pop_density * income * distance_to_center, data = data_combined)
  222. predict_fit <- 0.282728 -0.005900 * data_combined$distance_to_center
  223.  
  224. p_fit <-
  225.   ggplot(data = data_combined, aes(y = housing_per, x = subset_per)) +
  226.   theme_classic() +
  227.   geom_point(shape = 19, size = 0.5, color = "black") +
  228.   geom_smooth(method = "lm", formula = y ~ x) +
  229.   geom_hline(yintercept = 0.28, alpha = 0.5, linetype = "dotted") +
  230.   labs(
  231.     x = paste(my_demographic_txt, " % of Population", sep = ""),
  232.     y = "Median Housing to Income Ratio") +
  233.   scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  234.   scale_y_continuous(labels = scales::percent_format(accuracy = 1))
  235.  
  236. plot_list <- list(p_subset_per_pop, p_housing_to_income, p_fit)
  237.  
  238. layout_list <- rbind(
  239.   c(1, 2),
  240.   c(1, 2),
  241.   c(1, 2),
  242.   c(3, 3),
  243.   c(3, 3)
  244. )
  245.  
  246. charts <-
  247.   grid.arrange(grobs         = plot_list,
  248.                layout_matrix = layout_list,
  249.                ncols = 2)
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×