Advertisement
Guest User

Untitled

a guest
Jul 18th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.94 KB | None | 0 0
  1. df1 <- read.csv("/mnt/d/projects/data/credit_count.txt")
  2. df2 <- df1[which(df1$CARDHLDR == 1), ]
  3.  
  4. ca_glm <- function(fml, data, family, nchunk) {
  5. cls <- parallel::makeCluster(nchunk, type = "PSOCK")
  6. df1 <- parallel::parLapplyLB(cls, parallel::clusterSplit(cls, seq(nrow(data))),
  7. function(c_) data[c_,])
  8. parallel::clusterExport(cl, c("fml", "family", "data"), envir = environment())
  9. est <- parallel::parLapplyLB(cls, df1,
  10. function(d_) cbind(coef(summary(glm(fml, data = d_, family = family)))[, 1:2], nrow(d_) / nrow(data)))
  11. parallel::stopCluster(cls)
  12. df2 <- Reduce(rbind,
  13. lapply(est,
  14. function(e_) data.frame(name = format(rownames(e_), justify = "left"),
  15. beta = e_[, 1] * e_[, 3],
  16. var = (e_[, 2] * e_[, 3])^ 2)))
  17. df3 <- Reduce(rbind,
  18. lapply(split(df2, df2$name),
  19. function(d_) data.frame(name = d_$name[1],
  20. beta = sum(d_$beta),
  21. stder = sum(d_$var) ^ 0.5)))
  22. return(cbind(df3, zvalue = df3$beta / df3$stder, pvalue = 2 * pnorm(-abs(df3$beta / df3$stder))))
  23. }
  24.  
  25. y <- "DEFAULT"
  26. x <- c("MAJORDRG", "MINORDRG", "INCOME")
  27. f <- as.formula(paste(y, paste(x, collapse = " + "), sep = " ~ "))
  28. summary(glm(f, data = df2, family = "binomial"))$coef
  29. # Estimate Std. Error z value Pr(|z|)
  30. # (Intercept) -1.2215970658 9.076358e-02 -13.459110 2.721743e-41
  31. # MAJORDRG 0.2030503715 6.921101e-02 2.933787 3.348538e-03
  32. # MINORDRG 0.1919770456 4.783751e-02 4.013107 5.992472e-05
  33. # INCOME -0.0004705599 3.918955e-05 -12.007282 3.253645e-33
  34. ca_glm(f, df2, "binomial", 2)[rank(rownames(summary(glm(f, data = df2, family = "binomial"))$coef)), ]
  35. # name beta stder zvalue pvalue
  36. # (Intercept) -1.2001768403 9.161584e-02 -13.100102 3.288167e-39
  37. # MAJORDRG 0.2024462446 6.936634e-02 2.918508 3.517103e-03
  38. # MINORDRG 0.1928945270 4.799079e-02 4.019407 5.834476e-05
  39. # INCOME -0.0004811946 3.974214e-05 -12.107919 9.589651e-34
  40. ca_glm(f, df2, "binomial", 4)[rank(rownames(summary(glm(f, data = df2, family = "binomial"))$coef)), ]
  41. # name beta stder zvalue pvalue
  42. # (Intercept) -1.1891569565 9.257056e-02 -12.845952 9.063064e-38
  43. # MAJORDRG 0.2008495039 7.084338e-02 2.835120 4.580846e-03
  44. # MINORDRG 0.2075713235 4.883860e-02 4.250149 2.136283e-05
  45. # INCOME -0.0004902169 4.032866e-05 -12.155548 5.360122e-34
  46.  
  47. y <- "MAJORDRG"
  48. x <- c("ADEPCNT", "MINORDRG", "INCPER")
  49. f <- as.formula(paste(y, paste(x, collapse = " + "), sep = " ~ "))
  50. summary(glm(f, data = df2, family = "poisson"))$coef
  51. # Estimate Std. Error z value Pr(|z|)
  52. # (Intercept) -2.875143e+00 6.565395e-02 -43.792384 0.000000e+00
  53. # ADEPCNT 2.091082e-01 2.137937e-02 9.780844 1.360717e-22
  54. # MINORDRG 5.249111e-01 1.775197e-02 29.569171 3.723779e-192
  55. # INCPER 2.018335e-05 1.683187e-06 11.991151 3.953735e-33
  56. ca_glm(f, df2, "poisson", 2)[rank(rownames(summary(glm(f, data = df2, family = "poisson"))$coef)), ]
  57. # name beta stder zvalue pvalue
  58. # (Intercept) -2.876932e+00 6.589413e-02 -43.659914 0.000000e+00
  59. # ADEPCNT 2.072821e-01 2.151670e-02 9.633546 5.770316e-22
  60. # MINORDRG 5.269996e-01 1.791464e-02 29.417248 3.304900e-190
  61. # INCPER 2.015435e-05 1.692556e-06 11.907644 1.079855e-32
  62. ca_glm(f, df2, "poisson", 4)[rank(rownames(summary(glm(f, data = df2, family = "poisson"))$coef)), ]
  63. # name beta stder zvalue pvalue
  64. # (Intercept) -2.890965e+00 6.723771e-02 -42.996187 0.000000e+00
  65. # ADEPCNT 2.112105e-01 2.179890e-02 9.689044 3.356557e-22
  66. # MINORDRG 5.334541e-01 1.848846e-02 28.853359 4.598288e-183
  67. # INCPER 2.012836e-05 1.744654e-06 11.537165 8.570364e-31
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement