Advertisement
Guest User

Untitled

a guest
Jul 31st, 2024
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.21 KB | None | 0 0
  1. library(ipumsr)
  2. library(ggplot2)
  3. library(ggridges)
  4.  
  5. if(!exists("d")){
  6.   #read in the data
  7.   ddi <- read_ipums_ddi("C:/Users/nikolai/Downloads/usa_00004.xml")
  8.   d <- read_ipums_micro(ddi)
  9.  
  10.   #subset to married couples with spouse present in household
  11.   dsub <- d[d$MARST == 1, c("SERIAL", "PERNUM", "SPLOC", "SEX", "AGE", "BIRTHYR", "YRMARR")]
  12.  
  13.   #subsample d to write code more efficiently
  14.   subsamp <- 1:1E5
  15.   ds <- dsub[subsamp,]
  16. }
  17.  
  18. #assign data to work with (for easy switching)
  19. dat <- ds
  20.  
  21. #extract households and ID spouse pairs (splitting households with >2 couples)
  22. h <- split(dat, dat$SERIAL)
  23. if(nrow(dat) != nrow(dsub)){h <- h[-length(h)]}
  24. n_per_household <- sapply(h, nrow)
  25. hc <- h[n_per_household == 2]
  26. hc_gr1 <- h[n_per_household > 2]
  27. hc_gr1 <- unlist(lapply(hc_gr1, function(x){
  28.   spouse_IDs <- x[,c("PERNUM", "SPLOC")]
  29.   spouse_inds <- cbind(1:nrow(x), match(spouse_IDs$SPLOC, spouse_IDs$PERNUM))
  30.   ind_pairs <- t(apply(spouse_inds, 1, sort))
  31.   uniq_ind_pairs <- unique(ind_pairs)
  32.   out <- lapply(1:nrow(uniq_ind_pairs), function(i){
  33.     x[uniq_ind_pairs[i,],]
  34.   })
  35.   return(out)
  36. }), recursive = F)
  37.  
  38. #combine back into h
  39. h <- c(hc, hc_gr1)
  40.  
  41. #process back to tabular form
  42. m <- data.frame(do.call(rbind, lapply(h, function(x){
  43.   #check that these pairs married each other that year
  44.   if(x$YRMARR[1] != x$YRMARR[2]){
  45.     return(NULL)
  46.   }
  47.   #remove same-sex spouses
  48.   if(x$SEX[1] == x$SEX[2]){
  49.     return(NULL)
  50.   }
  51.   #remove pairs with unknown sex
  52.   if(any(x$SEX == 9)){
  53.     return(NULL)
  54.   }
  55.   male <- x[x$SEX == 1,]
  56.   female <- x[x$SEX == 2,]
  57.   out <- c(age_m = as.numeric(male$AGE),
  58.            age_f = as.numeric(female$AGE),
  59.            byear_m = as.numeric(male$BIRTHYR),
  60.            byear_f = as.numeric(female$BIRTHYR),
  61.            year_married = as.numeric(x$YRMARR[1]))
  62.   return(out)
  63. })))
  64. m$diff_age <- m$age_m - m$age_f
  65. m$age_m_at_marriage <- m$year_married - m$byear_m
  66. m$age_f_at_marriage <- m$year_married - m$byear_f
  67. m$mean_age_at_marriage <- (m$age_m_at_marriage + m$age_f_at_marriage) / 2
  68.  
  69. # bin the male ages
  70. m_ages <- min(m$age_m_at_marriage) : max(m$age_m_at_marriage)
  71. age_breaks <- seq(min(m$age_m_at_marriage) - 0.5, max(m$age_m_at_marriage) + 0.5, by = 1)
  72. m$age_m_at_marriage_bin <- cut(m$age_m_at_marriage, breaks=age_breaks, right=FALSE)
  73. m$bin_counts <- ave(m$diff_age, m$age_m_at_marriage_bin, FUN = length)
  74. m$bin_counts_resc <- m$bin_counts / max(m$bin_counts) * 10
  75.  
  76. # Compute mean difference in age for each bin
  77. mean_diff_age <- tapply(m$diff_age, m$age_m_at_marriage_bin, mean)
  78. mean_diff_age <- mean_diff_age[!is.na(mean_diff_age)]
  79. m$mean_diff_age <- ave(m$diff_age, m$age_m_at_marriage_bin, FUN = mean)
  80.  
  81. # Create the ridge plot with the vertical line and custom y-axis labels
  82. ggplot(m, aes(x = diff_age, y = age_m_at_marriage_bin, fill = mean_diff_age)) +
  83.   # geom_density_ridges(aes(scale = bin_counts_resc), rel_min_height = 0.01) +
  84.   geom_density_ridges() +
  85.   geom_vline(xintercept = 0, linetype = "dashed", color = "black", lwd = 1) +
  86.   theme_ridges() +
  87.   scale_y_discrete(labels = function(labels) {
  88.     # Only show every 5th label
  89.     label_indices <- seq(1, length(labels), by = 5)
  90.     labels <- sapply(1:length(labels), function(i) {
  91.       if (i %in% label_indices) {
  92.         return(m_ages[i])
  93.       } else {
  94.         return("")
  95.       }
  96.     })
  97.     labels
  98.   }) +
  99.   scale_fill_gradient2(low = "brown2", mid = "darkviolet", high = "deepskyblue4",
  100.                        midpoint = 5, name = "Mean Age\nDifference") +
  101.   labs(title = "Age Differences Between Husbands and Wives on Their Wedding Day",
  102.        x = "Difference in Age between Husband and Wife (Years)",
  103.        y = "Age of Husband at Marriage (Years)") +
  104.   theme(
  105.     plot.title = element_text(vjust = 10, hjust = 0.6),
  106.     axis.title.y = element_text(hjust = 0.5, vjust = 4),
  107.     axis.title.x = element_text(hjust = 0.5, vjust = 0),
  108.     plot.margin = margin(t = 50, r = 20, b = 20, l = 20)
  109.   ) +
  110.   coord_cartesian(clip = "off") +
  111.   annotate("text", x = -30, y = 79, label = "Wife Older Than Husband",
  112.            fontface = "bold", col = "brown2", size = 5) +
  113.   annotate("text", x = 30, y = 79, label = "Husband Older Than Wife",
  114.            fontface = "bold", col = "deepskyblue4", size = 5)
  115.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement