Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df1 <- read.csv("/mnt/d/projects/data/credit_count.txt")
- df2 <- df1[which(df1$CARDHLDR == 1), ]
- ca_glm <- function(fml, data, family, nchunk) {
- cls <- parallel::makeCluster(nchunk, type = "PSOCK")
- df1 <- parallel::parLapplyLB(cls, parallel::clusterSplit(cls, seq(nrow(data))),
- function(c_) data[c_,])
- parallel::clusterExport(cl, c("fml", "family", "data"), envir = environment())
- est <- parallel::parLapplyLB(cls, df1,
- function(d_) cbind(coef(summary(glm(fml, data = d_, family = family)))[, 1:2], nrow(d_) / nrow(data)))
- parallel::stopCluster(cls)
- df2 <- Reduce(rbind,
- lapply(est,
- function(e_) data.frame(name = format(rownames(e_), justify = "left"),
- beta = e_[, 1] * e_[, 3],
- var = (e_[, 2] * e_[, 3])^ 2)))
- df3 <- Reduce(rbind,
- lapply(split(df2, df2$name),
- function(d_) data.frame(name = d_$name[1],
- beta = sum(d_$beta),
- stder = sum(d_$var) ^ 0.5)))
- return(cbind(df3, zvalue = df3$beta / df3$stder, pvalue = 2 * pnorm(-abs(df3$beta / df3$stder))))
- }
- y <- "DEFAULT"
- x <- c("MAJORDRG", "MINORDRG", "INCOME")
- f <- as.formula(paste(y, paste(x, collapse = " + "), sep = " ~ "))
- summary(glm(f, data = df2, family = "binomial"))$coef
- # Estimate Std. Error z value Pr(|z|)
- # (Intercept) -1.2215970658 9.076358e-02 -13.459110 2.721743e-41
- # MAJORDRG 0.2030503715 6.921101e-02 2.933787 3.348538e-03
- # MINORDRG 0.1919770456 4.783751e-02 4.013107 5.992472e-05
- # INCOME -0.0004705599 3.918955e-05 -12.007282 3.253645e-33
- ca_glm(f, df2, "binomial", 2)[rank(rownames(summary(glm(f, data = df2, family = "binomial"))$coef)), ]
- # name beta stder zvalue pvalue
- # (Intercept) -1.2001768403 9.161584e-02 -13.100102 3.288167e-39
- # MAJORDRG 0.2024462446 6.936634e-02 2.918508 3.517103e-03
- # MINORDRG 0.1928945270 4.799079e-02 4.019407 5.834476e-05
- # INCOME -0.0004811946 3.974214e-05 -12.107919 9.589651e-34
- ca_glm(f, df2, "binomial", 4)[rank(rownames(summary(glm(f, data = df2, family = "binomial"))$coef)), ]
- # name beta stder zvalue pvalue
- # (Intercept) -1.1891569565 9.257056e-02 -12.845952 9.063064e-38
- # MAJORDRG 0.2008495039 7.084338e-02 2.835120 4.580846e-03
- # MINORDRG 0.2075713235 4.883860e-02 4.250149 2.136283e-05
- # INCOME -0.0004902169 4.032866e-05 -12.155548 5.360122e-34
- y <- "MAJORDRG"
- x <- c("ADEPCNT", "MINORDRG", "INCPER")
- f <- as.formula(paste(y, paste(x, collapse = " + "), sep = " ~ "))
- summary(glm(f, data = df2, family = "poisson"))$coef
- # Estimate Std. Error z value Pr(|z|)
- # (Intercept) -2.875143e+00 6.565395e-02 -43.792384 0.000000e+00
- # ADEPCNT 2.091082e-01 2.137937e-02 9.780844 1.360717e-22
- # MINORDRG 5.249111e-01 1.775197e-02 29.569171 3.723779e-192
- # INCPER 2.018335e-05 1.683187e-06 11.991151 3.953735e-33
- ca_glm(f, df2, "poisson", 2)[rank(rownames(summary(glm(f, data = df2, family = "poisson"))$coef)), ]
- # name beta stder zvalue pvalue
- # (Intercept) -2.876932e+00 6.589413e-02 -43.659914 0.000000e+00
- # ADEPCNT 2.072821e-01 2.151670e-02 9.633546 5.770316e-22
- # MINORDRG 5.269996e-01 1.791464e-02 29.417248 3.304900e-190
- # INCPER 2.015435e-05 1.692556e-06 11.907644 1.079855e-32
- ca_glm(f, df2, "poisson", 4)[rank(rownames(summary(glm(f, data = df2, family = "poisson"))$coef)), ]
- # name beta stder zvalue pvalue
- # (Intercept) -2.890965e+00 6.723771e-02 -42.996187 0.000000e+00
- # ADEPCNT 2.112105e-01 2.179890e-02 9.689044 3.356557e-22
- # MINORDRG 5.334541e-01 1.848846e-02 28.853359 4.598288e-183
- # INCPER 2.012836e-05 1.744654e-06 11.537165 8.570364e-31
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement