Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(ipumsr)
- library(ggplot2)
- library(ggridges)
- if(!exists("d")){
- #read in the data
- ddi <- read_ipums_ddi("C:/Users/nikolai/Downloads/usa_00004.xml")
- d <- read_ipums_micro(ddi)
- #subset to married couples with spouse present in household
- dsub <- d[d$MARST == 1, c("SERIAL", "PERNUM", "SPLOC", "SEX", "AGE", "BIRTHYR", "YRMARR")]
- #subsample d to write code more efficiently
- subsamp <- 1:1E5
- ds <- dsub[subsamp,]
- }
- #assign data to work with (for easy switching)
- dat <- ds
- #extract households and ID spouse pairs (splitting households with >2 couples)
- h <- split(dat, dat$SERIAL)
- if(nrow(dat) != nrow(dsub)){h <- h[-length(h)]}
- n_per_household <- sapply(h, nrow)
- hc <- h[n_per_household == 2]
- hc_gr1 <- h[n_per_household > 2]
- hc_gr1 <- unlist(lapply(hc_gr1, function(x){
- spouse_IDs <- x[,c("PERNUM", "SPLOC")]
- spouse_inds <- cbind(1:nrow(x), match(spouse_IDs$SPLOC, spouse_IDs$PERNUM))
- ind_pairs <- t(apply(spouse_inds, 1, sort))
- uniq_ind_pairs <- unique(ind_pairs)
- out <- lapply(1:nrow(uniq_ind_pairs), function(i){
- x[uniq_ind_pairs[i,],]
- })
- return(out)
- }), recursive = F)
- #combine back into h
- h <- c(hc, hc_gr1)
- #process back to tabular form
- m <- data.frame(do.call(rbind, lapply(h, function(x){
- #check that these pairs married each other that year
- if(x$YRMARR[1] != x$YRMARR[2]){
- return(NULL)
- }
- #remove same-sex spouses
- if(x$SEX[1] == x$SEX[2]){
- return(NULL)
- }
- #remove pairs with unknown sex
- if(any(x$SEX == 9)){
- return(NULL)
- }
- male <- x[x$SEX == 1,]
- female <- x[x$SEX == 2,]
- out <- c(age_m = as.numeric(male$AGE),
- age_f = as.numeric(female$AGE),
- byear_m = as.numeric(male$BIRTHYR),
- byear_f = as.numeric(female$BIRTHYR),
- year_married = as.numeric(x$YRMARR[1]))
- return(out)
- })))
- m$diff_age <- m$age_m - m$age_f
- m$age_m_at_marriage <- m$year_married - m$byear_m
- m$age_f_at_marriage <- m$year_married - m$byear_f
- m$mean_age_at_marriage <- (m$age_m_at_marriage + m$age_f_at_marriage) / 2
- # bin the male ages
- m_ages <- min(m$age_m_at_marriage) : max(m$age_m_at_marriage)
- age_breaks <- seq(min(m$age_m_at_marriage) - 0.5, max(m$age_m_at_marriage) + 0.5, by = 1)
- m$age_m_at_marriage_bin <- cut(m$age_m_at_marriage, breaks=age_breaks, right=FALSE)
- m$bin_counts <- ave(m$diff_age, m$age_m_at_marriage_bin, FUN = length)
- m$bin_counts_resc <- m$bin_counts / max(m$bin_counts) * 10
- # Compute mean difference in age for each bin
- mean_diff_age <- tapply(m$diff_age, m$age_m_at_marriage_bin, mean)
- mean_diff_age <- mean_diff_age[!is.na(mean_diff_age)]
- m$mean_diff_age <- ave(m$diff_age, m$age_m_at_marriage_bin, FUN = mean)
- # Create the ridge plot with the vertical line and custom y-axis labels
- ggplot(m, aes(x = diff_age, y = age_m_at_marriage_bin, fill = mean_diff_age)) +
- # geom_density_ridges(aes(scale = bin_counts_resc), rel_min_height = 0.01) +
- geom_density_ridges() +
- geom_vline(xintercept = 0, linetype = "dashed", color = "black", lwd = 1) +
- theme_ridges() +
- scale_y_discrete(labels = function(labels) {
- # Only show every 5th label
- label_indices <- seq(1, length(labels), by = 5)
- labels <- sapply(1:length(labels), function(i) {
- if (i %in% label_indices) {
- return(m_ages[i])
- } else {
- return("")
- }
- })
- labels
- }) +
- scale_fill_gradient2(low = "brown2", mid = "darkviolet", high = "deepskyblue4",
- midpoint = 5, name = "Mean Age\nDifference") +
- labs(title = "Age Differences Between Husbands and Wives on Their Wedding Day",
- x = "Difference in Age between Husband and Wife (Years)",
- y = "Age of Husband at Marriage (Years)") +
- theme(
- plot.title = element_text(vjust = 10, hjust = 0.6),
- axis.title.y = element_text(hjust = 0.5, vjust = 4),
- axis.title.x = element_text(hjust = 0.5, vjust = 0),
- plot.margin = margin(t = 50, r = 20, b = 20, l = 20)
- ) +
- coord_cartesian(clip = "off") +
- annotate("text", x = -30, y = 79, label = "Wife Older Than Husband",
- fontface = "bold", col = "brown2", size = 5) +
- annotate("text", x = 30, y = 79, label = "Husband Older Than Wife",
- fontface = "bold", col = "deepskyblue4", size = 5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement