Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # install.packages("readstata13")
- library(readstata13)
- # download tables 1 and 3 from the 2015 paper section
- tbl1 = read.dta13("nbhds_online_data_table1.dta")
- tbl3 = read.dta13("nbhds_online_data_table3.dta")
- cz = merge(tbl1,tbl3,by="cz")
- # install.packages("psych")
- library(psych)
- cz$p25_mobility_scaled = scale(cz$pct_causal_p25_kr26)
- cz$p75_mobility_scaled = scale(cz$pct_causal_p75_kr26)
- cz$pct_black_scaled = scale(cz$cs_race_bla)
- cz$poverty_rate_scaled = scale(cz$poor_share)
- cz$seg_index_scaled = scale(cz$cs_race_theil_2000)
- cz$single_mom_rate_scaled = scale(cz$cs_fam_wkidsinglemom)
- cz$adj_test_scores_scaled = scale(cz$score_r)
- cz$pct_black_log10 = log(cz$cs_race_bla,10)
- selcols = c('p25_mobility_scaled','p75_mobility_scaled',
- 'poverty_rate_scaled','seg_index_scaled',
- 'adj_test_scores_scaled',
- 'pct_black_scaled','single_mom_rate_scaled')
- pairs.panels(subset(cz,select=selcols),main="Chetty 2015 CZ-level 'causal' mobility & covariates")
- czlm1 = lm( p25_mobility_scaled ~ single_mom_rate_scaled,cz)
- czlm2 = lm( p25_mobility_scaled ~ pct_black_scaled,cz)
- czlm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cz)
- czlm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cz,weights=pop2000)
- czlm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cz)
- czlm4_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cz,weights=pop2000)
- cz$lm4_pred = predict(czlm4)
- cz$lm4_w_pred = predict(czlm4_w)
- ggplot(cz,aes(x=lm4_pred,y=p25_mobility_scaled)) + geom_point() + geom_smooth()
- ggplot(cz,aes(x=single_mom_rate_scaled,y=p25_mobility_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level 25th percentile mobility by percent single-mother")
- ggplot(cz,aes(x=pct_black_scaled,y=p25_mobility_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level 25th percentile mobility by percent black")
- ggplot(cz,aes(x=pct_black_scaled,y=single_mom_rate_scaled)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth() + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level percent single-mother by percent black")
- lmloess_sm = loess(p25_mobility_scaled ~ single_mom_rate_scaled, cz)
- cz$loess_sm_pred = predict(lmloess_sm)
- cz$loess_sm_overpred = cz$loess_sm_pred - cz$p25_mobility_scaled
- ggplot(cz,aes(x=pct_black_scaled,y=loess_sm_overpred)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level: single-mother/loess model residuals by percent black")
- lmloess_black = loess(p25_mobility_scaled ~ pct_black_scaled, cz)
- cz$loess_black_pred = predict(lmloess_black)
- cz$loess_black_overpred = cz$loess_black_pred - cz$p25_mobility_scaled
- ggplot(cz,aes(x=single_mom_rate_scaled,y=loess_black_overpred)) + geom_point(aes(size=pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty CZ-level: black/loess model residuals by percent single-mother")
- library(data.table)
- # take top 100 largest CZ
- cztop = data.table(cz)[order(-pop2000)][1:100]
- cztop$p25_mobility_scaled = scale(cztop$pct_causal_p25_kr26)
- cztop$p75_mobility_scaled = scale(cztop$pct_causal_p75_kr26)
- cztop$pct_black_scaled = scale(cztop$cs_race_bla)
- cztop$poverty_rate_scaled = scale(cztop$poor_share)
- cztop$seg_index_scaled = scale(cztop$cs_race_theil_2000)
- cztop$single_mom_rate_scaled = scale(cztop$cs_fam_wkidsinglemom)
- cztop$adj_test_scores_scaled = scale(cztop$score_r)
- cztop$gini_scaled = scale(cztop$gini)
- selcols = c('p25_mobility_scaled','p75_mobility_scaled',
- 'poverty_rate_scaled','seg_index_scaled',
- 'adj_test_scores_scaled','gini_scaled',
- 'pct_black_scaled','single_mom_rate_scaled')
- pairs.panels(subset(cztop,select=selcols),main="Chetty 2015 top-100 CZ: 'causal' mobility & covariates")
- cztoplm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cztop)
- cztoplm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,cztop, weights=pop2000)
- cztoplm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,cztop)
- library(gtools)
- cz$percent_black_quintile = quantcut(cz$cs_race_bla,seq(0,1,by=0.20))
- ggplot(cz,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 CZ-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by quintiles of CZ percent black")
- #### county level
- tbl2 = read.dta13("nbhds_online_data_table2.dta")
- tbl4 = read.dta13("nbhds_online_data_table4.dta")
- ct = merge(tbl2,tbl4,by="cty2000")
- ct$p25_mobility_scaled = scale(ct$pct_causal_p25_kr26)
- ct$p75_mobility_scaled = scale(ct$pct_causal_p75_kr26)
- ct$pct_black_scaled = scale(ct$cs_race_bla)
- ct$poverty_rate_scaled = scale(ct$poor_share)
- ct$seg_index_scaled = scale(ct$cs_race_theil_2000)
- ct$single_mom_rate_scaled = scale(ct$cs_fam_wkidsinglemom)
- ct$adj_test_scores_scaled = scale(ct$score_r)
- selcols = c('p25_mobility_scaled','p75_mobility_scaled',
- 'poverty_rate_scaled','seg_index_scaled',
- 'adj_test_scores_scaled',
- 'pct_black_scaled','single_mom_rate_scaled')
- pairs.panels(subset(ct,select=selcols),main="Chetty 2015 county-level 'causal' mobility & covariates")
- ctlm3 = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,ct)
- ctlm3_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled + pct_black_scaled,ct,weights=cty_pop2000)
- ctlm4 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,ct)
- ctlm4_w = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled,ct,weights=cty_pop2000)
- ctlmloess_sm = loess(p25_mobility_scaled ~ single_mom_rate_scaled, ct)
- ct$loess_sm_pred = predict(ctlmloess_sm)
- ct$loess_sm_overpred = ct$loess_sm_pred - ct$p25_mobility_scaled
- ggplot(ct,aes(x=pct_black_scaled,y=loess_sm_overpred)) + geom_point(aes(size=cty_pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty County-level: single-mother/loess model residuals by percent black")
- ctlmloess_black = loess(p25_mobility_scaled ~ pct_black_scaled, ct)
- ct$loess_black_pred = predict(ctlmloess_black)
- ct$loess_black_overpred = ct$loess_black_pred - ct$p25_mobility_scaled
- ggplot(ct,aes(x=single_mom_rate_scaled,y=loess_black_overpred)) + geom_point(aes(size=cty_pop2000),colour="blue") + geom_smooth(method='lm',color="red") + geom_smooth(color="green") + scale_x_continuous(breaks=seq(-3,3)) + scale_y_continuous(breaks=seq(-3,3)) + geom_hline(yintercept=0) + geom_vline(xintercept=0) + ggtitle("Chetty County-level: black/loess model residuals by percent single mother")
- ctloess_exp= loess(p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled, ct)
- ct$exp_pred = predict(ctloess_exp)
- ct$exp_overpred = ct$exp_pred - ct$p25_mobility_scaled
- library(gtools)
- ct$black_quintile = quantcut(ct$cs_race_bla,seq(0,1,by=0.20))
- ct$black_decile = quantcut(ct$cs_race_bla,seq(0,1,by=0.1))
- ct$single_mother_quintile = quantcut(ct$cs_fam_wkidsinglemom,seq(0,1,by=0.20))
- ct$single_mother_decile = quantcut(ct$cs_fam_wkidsinglemom,seq(0,1,by=0.1))
- ggplot(ct,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by quintiles of county percent black")
- ggplot(ct,aes(x=loess_sm_pred,y=p25_mobility_scaled,color=black_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by single-motherhood/loess model estimate\ngrouped by deciles of county percent black")
- ct$ctlm4_pred = predict(ctlm4)
- ct$ctlm4_overpred = ct$ctlm4_pred - ct$p25_mobility_scaled
- ctlm_6 = lm( p25_mobility_scaled ~ single_mom_rate_scaled * pct_black_scaled + I(single_mom_rate_scaled^2),ct)
- ggplot(ct,aes(x=loess_black_pred,y=p25_mobility_scaled,color=single_mother_quintile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by loess/black-model prediction\ngrouped by quintiles of county percent single mother")
- ggplot(ct,aes(x=loess_black_pred,y=p25_mobility_scaled,color=single_mother_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by loess/black-model prediction\ngrouped by deciles of county percent single mother")
- ggplot(ct,aes(x=ctlm4_pred,y=p25_mobility_scaled,color=single_mother_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by interaction model estimate\ngrouped by deciles of county percent single-mother")
- ggplot(ct,aes(x=ctlm4_pred,y=p25_mobility_scaled,color=black_decile)) + geom_point() + geom_smooth(method=lm,size=3) + ggtitle("Chetty 2015 county-level: p25 mobility by interaction model estimate\ngrouped by deciles of county percent black")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement