# install.packages("readstata13")
library(readstata13)
# download tables 1 and 3 from the 2015 paper section
tbl1 = read.dta13("nbhds_online_data_table1.dta")
tbl3 = read.dta13("nbhds_online_data_table3.dta")
cz = merge(tbl1,tbl3,by="cz")
# install.packages("psych")
library(psych)
cz$p25_mobility_scaled = scale(cz$pct_causal_p25_kr26)
cz$p75_mobility_scaled = scale(cz$pct_causal_p75_kr26)
cz$pct_black_scaled = scale(cz$cs_race_bla)
cz$poverty_rate_scaled = scale(cz$poor_share)
cz$seg_index_scaled = scale(cz$cs_race_theil_2000)
cz$single_mom_rate_scaled = scale(cz$cs_fam_wkidsinglemom)
cz$adj_test_scores_scaled = scale(cz$score_r)
cz$pct_black_log10 = log(cz$cs_race_bla,10)
selcols = c('p25_mobility_scaled','p75_mobility_scaled',
'poverty_rate_scaled','seg_index_scaled',
'adj_test_scores_scaled',
'pct_black_scaled','single_mom_rate_scaled')
pairs.panels(subset(cz,select=selcols),main="Chetty 2015 CZ-level 'causal' mobility & covariates")
czlm1 = lm( p25_mobility_scaled ~ single_mom_rate_scaled,cz)
czlm2 = lm( p25_mobility_scaled ~ pct_black_scaled,cz)
czlm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cz)
czlm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cz,weights=pop2000)
czlm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cz)
czlm4_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cz,weights=pop2000)
cz$lm4_pred = predict(czlm4)
cz$lm4_w_pred = predict(czlm4_w)
ggplot(cz,aes(x=lm4_pred,y=p25_mobility_scaled)) + geom_point() + geom_smooth()
ggplot(cz,aes(x=single_mom_rate_scaled,y=p25_mobility_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level 25th percentile mobility by percent single-mother")
ggplot(cz,aes(x=pct_black_scaled,y=p25_mobility_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level 25th percentile mobility by percent black")
ggplot(cz,aes(x=pct_black_scaled,y=single_mom_rate_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level percent single-mother by percent black")
lmloess_sm = loess(p25_mobility_scaled ~ single_mom_rate_scaled, cz)
cz$loess_sm_pred = predict(lmloess_sm)
cz$loess_sm_overpred = cz$loess_sm_pred - cz$p25_mobility_scaled
ggplot(cz,aes(x=pct_black_scaled,y=loess_sm_overpred)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level: single-mother/loess model residuals by percent black")
lmloess_black = loess(p25_mobility_scaled ~ pct_black_scaled, cz)
cz$loess_black_pred = predict(lmloess_black)
cz$loess_black_overpred = cz$loess_black_pred - cz$p25_mobility_scaled
ggplot(cz,aes(x=single_mom_rate_scaled,y=loess_black_overpred)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level: black/loess model residuals by percent single-mother")
library(data.table)
# take top 100 largest CZ
cztop = data.table(cz)[order(-pop2000)][1:100]
cztop$p25_mobility_scaled = scale(cztop$pct_causal_p25_kr26)
cztop$p75_mobility_scaled = scale(cztop$pct_causal_p75_kr26)
cztop$pct_black_scaled = scale(cztop$cs_race_bla)
cztop$poverty_rate_scaled = scale(cztop$poor_share)
cztop$seg_index_scaled = scale(cztop$cs_race_theil_2000)
cztop$single_mom_rate_scaled = scale(cztop$cs_fam_wkidsinglemom)
cztop$adj_test_scores_scaled = scale(cztop$score_r)
cztop$gini_scaled = scale(cztop$gini)
selcols = c('p25_mobility_scaled','p75_mobility_scaled',
'poverty_rate_scaled','seg_index_scaled',
'adj_test_scores_scaled','gini_scaled',
'pct_black_scaled','single_mom_rate_scaled')
pairs.panels(subset(cztop,select=selcols),main="Chetty 2015 top-100 CZ: 'causal' mobility & covariates")
cztoplm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cztop)
cztoplm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cztop, weights=pop2000)
cztoplm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cztop)
library(gtools)
cz$percent_black_quintile = quantcut(cz$cs_race_bla,seq(0,1,by=0.20))
ggplot(cz,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 CZ-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by quintiles of CZ percent black")
#### county level
tbl2 = read.dta13("nbhds_online_data_table2.dta")
tbl4 = read.dta13("nbhds_online_data_table4.dta")
ct = merge(tbl2,tbl4,by="cty2000")
ct$p25_mobility_scaled = scale(ct$pct_causal_p25_kr26)
ct$p75_mobility_scaled = scale(ct$pct_causal_p75_kr26)
ct$pct_black_scaled = scale(ct$cs_race_bla)
ct$poverty_rate_scaled = scale(ct$poor_share)
ct$seg_index_scaled = scale(ct$cs_race_theil_2000)
ct$single_mom_rate_scaled = scale(ct$cs_fam_wkidsinglemom)
ct$adj_test_scores_scaled = scale(ct$score_r)
selcols = c('p25_mobility_scaled','p75_mobility_scaled',
'poverty_rate_scaled','seg_index_scaled',
'adj_test_scores_scaled',
'pct_black_scaled','single_mom_rate_scaled')
pairs.panels(subset(ct,select=selcols),main="Chetty 2015 county-level 'causal' mobility & covariates")
ctlm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,ct)
ctlm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,ct,weights=cty_pop2000)
ctlm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,ct)
ctlm4_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,ct,weights=cty_pop2000)
ctlmloess_sm = loess(p25_mobility_scaled ~ single_mom_rate_scaled, ct)
ct$loess_sm_pred = predict(ctlmloess_sm)
ct$loess_sm_overpred = ct$loess_sm_pred - ct$p25_mobility_scaled
ggplot(ct,aes(x=pct_black_scaled,y=loess_sm_overpred)) + geom_point(aes(size=cty_pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty County-level: single-mother/loess model residuals by percent black")
ctlmloess_black = loess(p25_mobility_scaled ~ pct_black_scaled, ct)
ct$loess_black_pred = predict(ctlmloess_black)
ct$loess_black_overpred = ct$loess_black_pred - ct$p25_mobility_scaled
ggplot(ct,aes(x=single_mom_rate_scaled,y=loess_black_overpred)) + geom_point(aes(size=cty_pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty County-level: black/loess model residuals by percent single mother")
ctloess_exp= loess(p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled, ct)
ct$exp_pred = predict(ctloess_exp)
ct$exp_overpred = ct$exp_pred - ct$p25_mobility_scaled
library(gtools)
ct$black_quintile = quantcut(ct$cs_race_bla,seq(0,1,by=0.20))
ct$black_decile = quantcut(ct$cs_race_bla,seq(0,1,by=0.1))
ct$single_mother_quintile = quantcut(ct$cs_fam_wkidsinglemom,seq(0,1,by=0.20))
ct$single_mother_decile = quantcut(ct$cs_fam_wkidsinglemom,seq(0,1,by=0.1))
ggplot(ct,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by quintiles of county percent black")
ggplot(ct,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by deciles of county percent black")
ct$ctlm4_pred = predict(ctlm4)
ct$ctlm4_overpred = ct$ctlm4_pred - ct$p25_mobility_scaled
ctlm_6 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled + I(single_mom_rate_scaled^2),ct)
ggplot(ct,aes(x=loess_black_pred,y=p25_mobility_scaled,color=single_mother_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by loess/black-model prediction\ngrouped by quintiles of county percent single mother")
ggplot(ct,aes(x=loess_black_pred,y=p25_mobility_scaled,color=single_mother_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by loess/black-model prediction\ngrouped by deciles of county percent single mother")
ggplot(ct,aes(x=ctlm4_pred,y=p25_mobility_scaled,color=single_mother_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by interaction model estimate\ngrouped by deciles of county percent single-mother")
ggplot(ct,aes(x=ctlm4_pred,y=p25_mobility_scaled,color=black_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by interaction model estimate\ngrouped by deciles of county percent black")