Advertisement
Guest User

sjPlot::tab_model with arbitrary P-value thresholds (update)

a guest
Mar 12th, 2024
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 33.88 KB | None | 0 0
  1. ## Replacement 'get_p_stars' function that doesn't hard-code length or symbols
  2. my_get_p_stars <- function(pval, thresholds = NULL) {
  3. if (is.null(thresholds)) thresholds <- c("*"=.05, "**"=.01, "***"=.001)
  4. ord <- order(thresholds)
  5. sym <- names(thresholds)
  6. thresholds <- thresholds[ord]
  7. sym <- sym[ord]
  8.  
  9. vapply(pval, \(p) {
  10. if (is.na(p)) return("")
  11. lt <- vapply(thresholds, \(t) p < t, logical(1))
  12. if (any(lt)) return(sym[which.max(lt)])
  13. return("")
  14. }, character(1))
  15. }
  16.  
  17. ## New function to make footnote accept arbitrary lengths & symbols
  18. my_p_star_footnote <- function(thresholds) {
  19. paste(mapply(\(p, n) {
  20. paste0(n, " p&lt;", p)
  21. }, p=thresholds, n=names(thresholds)), collapse = "&nbsp;&nbsp;&nbsp;")
  22. }
  23.  
  24. ## Custom versions of tab_model and format_p_values that call the above helpers
  25. my_tab_model <- function(
  26. ...,
  27. transform,
  28.  
  29. show.intercept = TRUE,
  30. show.est = TRUE,
  31. show.ci = .95,
  32. show.ci50 = FALSE,
  33. show.se = NULL,
  34. show.std = NULL,
  35. std.response = TRUE,
  36. show.p = TRUE,
  37. show.stat = FALSE,
  38. show.df = FALSE,
  39.  
  40. show.zeroinf = TRUE,
  41. show.r2 = TRUE,
  42. show.icc = TRUE,
  43. show.re.var = TRUE,
  44. show.ngroups = TRUE,
  45. show.fstat = FALSE,
  46. show.aic = FALSE,
  47. show.aicc = FALSE,
  48. show.dev = FALSE,
  49. show.loglik = FALSE,
  50. show.obs = TRUE,
  51. show.reflvl = FALSE,
  52.  
  53. terms = NULL,
  54. rm.terms = NULL,
  55. order.terms = NULL,
  56. keep = NULL,
  57. drop = NULL,
  58.  
  59. title = NULL,
  60. pred.labels = NULL,
  61. dv.labels = NULL,
  62. wrap.labels = 25,
  63.  
  64. bootstrap = FALSE,
  65. iterations = 1000,
  66. seed = NULL,
  67.  
  68. robust = FALSE,
  69. vcov.fun = NULL,
  70. vcov.type = NULL,
  71. vcov.args = NULL,
  72.  
  73. string.pred = "Predictors",
  74. string.est = "Estimate",
  75. string.std = "std. Beta",
  76. string.ci = "CI",
  77. string.se = "std. Error",
  78. string.std_se = "standardized std. Error",
  79. string.std_ci = "standardized CI",
  80. string.p = "p",
  81. string.std.p = "std. p",
  82. string.df = "df",
  83. string.stat = "Statistic",
  84. string.std.stat = "std. Statistic",
  85. string.resp = "Response",
  86. string.intercept = "(Intercept)",
  87. strings = NULL,
  88. ci.hyphen = "&nbsp;&ndash;&nbsp;",
  89. minus.sign = "&#45;",
  90.  
  91. collapse.ci = FALSE,
  92. collapse.se = FALSE,
  93. linebreak = TRUE,
  94.  
  95.  
  96. col.order = c(
  97. "est",
  98. "se",
  99. "std.est",
  100. "std.se",
  101. "ci",
  102. "std.ci",
  103. "ci.inner",
  104. "ci.outer",
  105. "stat",
  106. "std.stat",
  107. "p",
  108. "std.p",
  109. "df.error",
  110. "response.level"
  111. ),
  112.  
  113. digits = 2,
  114. digits.p = 3,
  115. digits.rsq = 3,
  116. digits.re = 2,
  117. emph.p = TRUE,
  118. p.val = NULL,
  119. df.method = NULL,
  120. p.style = c("numeric", "stars", "numeric_stars", "scientific", "scientific_stars"),
  121. p.threshold = c("*"=0.05, "**"=0.01, "***"=0.001),
  122. p.adjust = NULL,
  123.  
  124. case = "parsed",
  125. auto.label = TRUE,
  126. prefix.labels = c("none", "varname", "label"),
  127. bpe = "median",
  128. CSS = sjPlot::css_theme("regression"),
  129. file = NULL,
  130. use.viewer = TRUE,
  131. encoding = "UTF-8"
  132. ) {
  133.  
  134. if (!missing(df.method)) {
  135. p.val <- df.method
  136. }
  137.  
  138. if (!is.null(p.val)) {
  139. p.val <- match.arg(p.val, choices = c("wald", "profile", "kenward", "kr", "satterthwaite", "ml1", "betwithin", "residual", "normal"))
  140. }
  141. p.style <- match.arg(p.style)
  142. prefix.labels <- match.arg(prefix.labels)
  143.  
  144. change_string_est <- !missing(string.est)
  145.  
  146. # if we prefix labels, use different default for case conversion,
  147. # else the separating white spaces after colon are removed.
  148. if (missing(case)) {
  149. if (prefix.labels == "none" && !show.reflvl)
  150. case <- "parsed"
  151. else
  152. case <- NULL
  153. }
  154.  
  155. if (p.style == "stars") show.p <- FALSE
  156.  
  157. # default robust?
  158. if (isTRUE(robust)) {
  159. vcov.fun <- "HC3"
  160. }
  161.  
  162. models <- list(...)
  163.  
  164. if (length(class(models[[1]])) == 1 && inherits(models[[1]], "list"))
  165. models <- lapply(models[[1]], function(x) x)
  166.  
  167. names(models) <- unlist(lapply(
  168. match.call(expand.dots = FALSE)$`...`,
  169. function(.x) deparse(.x, width.cutoff = 500L))
  170. )
  171.  
  172. auto.transform <- missing(transform)
  173. ci.lvl <- ifelse(is.null(show.ci), .95, show.ci)
  174.  
  175. copos <- which("est" == col.order)
  176. if (!sjmisc::is_empty(copos)) col.order[copos] <- "estimate"
  177.  
  178. copos <- which("se" == col.order)
  179. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.error"
  180.  
  181. copos <- which("ci" == col.order)
  182. if (!sjmisc::is_empty(copos)) col.order[copos] <- "conf.int"
  183.  
  184. copos <- which("std.est" == col.order)
  185. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.estimate"
  186.  
  187. copos <- which("std.se" == col.order)
  188. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.se"
  189.  
  190. copos <- which("std.ci" == col.order)
  191. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.conf.int"
  192.  
  193. copos <- which("p" == col.order)
  194. if (!sjmisc::is_empty(copos)) col.order[copos] <- "p.value"
  195.  
  196. copos <- which("std.p" == col.order)
  197. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.p.value"
  198.  
  199. copos <- which("stat" == col.order)
  200. if (!sjmisc::is_empty(copos)) col.order[copos] <- "statistic"
  201.  
  202. copos <- which("std.stat" == col.order)
  203. if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.statistic"
  204.  
  205. # match strings, to label the default strings in the table,
  206. # like "Estimate", "CI" etc.
  207. if (!sjmisc::is_empty(strings) && !is.null(names(strings))) {
  208. s.names <- names(strings)
  209. if ("pred" %in% s.names) string.pred <- strings[["pred"]]
  210. if ("est" %in% s.names) string.est <- strings[["est"]]
  211. if ("std" %in% s.names) string.std <- strings[["std"]]
  212. if ("ci" %in% s.names) string.ci <- strings[["ci"]]
  213. if ("se" %in% s.names) string.se <- strings[["se"]]
  214. if ("std_se" %in% s.names) string.std_se <- strings[["std_se"]]
  215. if ("std_ci" %in% s.names) string.std_ci <- strings[["std_ci"]]
  216. if ("p" %in% s.names) string.p <- strings[["p"]]
  217. if ("std.p" %in% s.names) string.std.p <- strings[["std.p"]]
  218. if ("df" %in% s.names) string.df <- strings[["df"]]
  219. if ("stat" %in% s.names) string.stat <- strings[["stat"]]
  220. if ("std.stat" %in% s.names) string.std.stat <- strings[["std.stat"]]
  221. if ("resp" %in% s.names) string.resp <- strings[["resp"]]
  222. if ("intercept" %in% s.names) string.intercept <- strings[["intercept"]]
  223. }
  224.  
  225. model.list <- purrr::map2(
  226. models,
  227. seq_along(models),
  228. function(model, i) {
  229.  
  230. # get info on model family
  231. fam.info <- insight::model_info(model)
  232.  
  233. if (insight::is_multivariate(model))
  234. fam.info <- fam.info[[1]]
  235.  
  236. # check whether estimates should be transformed or not
  237.  
  238. if (auto.transform) {
  239. if (is.null(fam.info) || fam.info$is_linear || identical(fam.info$link_function, "identity"))
  240. transform <- NULL
  241. else
  242. transform <- "exp"
  243. }
  244.  
  245. # get tidy output of summary ----
  246.  
  247. dat <- sjPlot:::tidy_model(
  248. model = model,
  249. ci.lvl = ci.lvl,
  250. tf = transform,
  251. type = "est",
  252. bpe = bpe,
  253. robust = list(vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args),
  254. facets = FALSE,
  255. show.zeroinf = show.zeroinf,
  256. p.val = p.val,
  257. bootstrap = bootstrap,
  258. iterations = iterations,
  259. seed = seed,
  260. p_adjust = p.adjust,
  261. keep = keep,
  262. drop = drop
  263. )
  264.  
  265.  
  266. # transform estimates
  267.  
  268. if (!sjPlot:::is.stan(model) && !is.null(transform)) {
  269. funtrans <- match.fun(transform)
  270. dat[["estimate"]] <- funtrans(dat[["estimate"]])
  271. dat[["conf.low"]] <- funtrans(dat[["conf.low"]])
  272. dat[["conf.high"]] <- funtrans(dat[["conf.high"]])
  273. dat[["std.error"]] <- dat[["std.error"]] * dat[["estimate"]]
  274. }
  275.  
  276.  
  277. # merge CI columns
  278.  
  279. if (all(c("conf.low", "conf.high") %in% names(dat))) {
  280. dat <- dat |>
  281. dplyr::mutate(conf.int = sprintf(
  282. "%.*f%s%.*f",
  283. digits,
  284. .data$conf.low,
  285. ci.hyphen,
  286. digits,
  287. .data$conf.high
  288. )) |>
  289. dplyr::select(-"conf.low", -"conf.high")
  290. }
  291.  
  292. # get inner probability (i.e. 2nd CI for Stan-models) ----
  293.  
  294. if (sjPlot:::is.stan(model)) {
  295. dat <- dat |>
  296. sjmisc::var_rename(conf.int = "ci.outer") |>
  297. dplyr::mutate(ci.inner = sprintf(
  298. "%.*f%s%.*f",
  299. digits,
  300. .data$conf.low50,
  301. ci.hyphen,
  302. digits,
  303. .data$conf.high50
  304. )) |>
  305. dplyr::select(-"conf.low50", -"conf.high50")
  306. }
  307.  
  308. # tidy output of standardized values ----
  309.  
  310. if (!is.null(show.std) && !sjPlot:::is.stan(model)) {
  311. std_method <- switch(show.std, "std" = "refit", "std2" = "2sd", "")
  312. tmp_dat <- sjPlot:::tidy_model(
  313. model = model,
  314. ci.lvl = ci.lvl,
  315. tf = transform,
  316. type = "est",
  317. bpe = bpe,
  318. robust = list(vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args),
  319. facets = FALSE,
  320. show.zeroinf = show.zeroinf,
  321. p.val = p.val,
  322. p_adjust = p.adjust,
  323. standardize = std_method,
  324. bootstrap = bootstrap,
  325. iterations = iterations,
  326. seed = seed,
  327. keep = keep,
  328. drop = drop,
  329. std.response = std.response
  330. ) |>
  331. my_format_p_values(p.style, digits.p, emph.p, p.threshold) |>
  332. sjmisc::var_rename(
  333. estimate = "std.estimate",
  334. std.error = "std.se",
  335. conf.low = "std.conf.low",
  336. conf.high = "std.conf.high",
  337. p.value = "std.p.value",
  338. statistic = "std.statistic",
  339. p.stars = "std.p.stars"
  340. ) |>
  341. dplyr::select(-1)
  342.  
  343. # transform estimates
  344.  
  345. if (!sjPlot:::is.stan(model) && !is.null(transform)) {
  346. funtrans <- match.fun(transform)
  347. tmp_dat[["std.estimate"]] <- funtrans(tmp_dat[["std.estimate"]])
  348. tmp_dat[["std.conf.low"]] <- funtrans(tmp_dat[["std.conf.low"]])
  349. tmp_dat[["std.conf.high"]] <- funtrans(tmp_dat[["std.conf.high"]])
  350. tmp_dat[["std.se"]] <- tmp_dat[["std.se"]] * tmp_dat[["std.estimate"]]
  351. }
  352.  
  353. dat <- tmp_dat |>
  354. sjmisc::add_columns(dat) |>
  355. dplyr::mutate(std.conf.int = sprintf(
  356. "%.*f%s%.*f",
  357. digits,
  358. .data$std.conf.low,
  359. ci.hyphen,
  360. digits,
  361. .data$std.conf.high
  362. ))|>
  363. dplyr::select(-"std.conf.low", -"std.conf.high")
  364. # if t-statistic is the same for standardized and unstandardized model
  365. # remove standardized; ignore intercept
  366. if (all(round(dat$statistic[-1], 3) == round(dat$std.statistic[-1], 3))) {
  367. dat <- dat |>
  368. dplyr::select(-"std.statistic", -"std.p.value")
  369. }
  370. }
  371.  
  372. # format p values for unstandardized model
  373. dat <- my_format_p_values(dat, p.style, digits.p, emph.p, p.threshold)
  374.  
  375. # add asterisks to estimates ----
  376.  
  377. if (grepl("stars", p.style)) {
  378. if (sjPlot:::obj_has_name(dat, "estimate"))
  379. dat$estimate <- sprintf("%.*f <sup>%s</sup>", digits, dat$estimate, dat$p.stars)
  380. if (!show.est && sjPlot:::obj_has_name(dat, "std.estimate")) {
  381. dat$std.estimate <- sprintf("%.*f <sup>%s</sup>", digits, dat$std.estimate, dat$std.p.stars)
  382. dat <- dplyr::select(dat, -"std.p.stars")
  383. }
  384. }
  385.  
  386. if ("p.stars" %in% names(dat)) {
  387. dat <- dplyr::select(dat, -"p.stars")
  388. }
  389.  
  390.  
  391. # switch column for p-value and conf. int. ----
  392.  
  393. dat <- dat[, sjPlot:::sort_columns(colnames(dat), sjPlot:::is.stan(model), col.order)]
  394.  
  395.  
  396. # add suffix to column names, so we can distinguish models later
  397.  
  398. cn <- colnames(dat)[2:ncol(dat)]
  399. colnames(dat)[2:ncol(dat)] <- sprintf("%s_%i", cn, i)
  400.  
  401.  
  402. # for HTML, convert numerics to character ----
  403.  
  404. dat <- dat |>
  405. purrr::map_if(is.numeric, ~ sprintf("%.*f", digits, .x)) |>
  406. as.data.frame(stringsAsFactors = FALSE)
  407.  
  408.  
  409. # remove 2nd HDI if requested ----
  410.  
  411. if (!show.ci50)
  412. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("ci.inner", colnames(dat)))
  413.  
  414.  
  415. ## TODO optionally insert linebreak for new-line-CI / SE
  416.  
  417. # merge estimates and CI / SE columns, if requested ----
  418.  
  419. if (collapse.ci) {
  420.  
  421. if (linebreak)
  422. lb <- "<br>"
  423. else
  424. lb <- " "
  425.  
  426. est.cols <- sjPlot:::string_starts_with("estimate", x = colnames(dat))
  427. dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 2]])
  428.  
  429. # for stan models, we also have 50% HDI
  430. if (!sjmisc::is_empty(string_starts_with("ci", x = colnames(dat)))) {
  431. if (isTRUE(show.ci50)) {
  432. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("ci.inner", x = colnames(dat)))
  433. dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 2]])
  434. }
  435. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("ci.outer", x = colnames(dat)))
  436. } else {
  437. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("conf.int", x = colnames(dat)))
  438. }
  439.  
  440. std.cols <- sjPlot:::string_starts_with("std.estimate", x = colnames(dat))
  441. if (!sjmisc::is_empty(std.cols)) {
  442. dat[[std.cols]] <- sprintf("%s%s(%s)", dat[[std.cols]], lb, dat[[std.cols + 2]])
  443. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("std.conf.int", x = colnames(dat)))
  444. }
  445. }
  446.  
  447. if (collapse.se) {
  448.  
  449. if (linebreak)
  450. lb <- "<br>"
  451. else
  452. lb <- " "
  453.  
  454. est.cols <- sjPlot:::string_starts_with("estimate", x = colnames(dat))
  455. dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 1]])
  456. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("std.error", x = colnames(dat)))
  457.  
  458. std.cols <- sjPlot:::string_starts_with("std.estimate", x = colnames(dat))
  459. if (!sjmisc::is_empty(std.cols)) {
  460. dat[[std.cols]] <- sprintf("%s%s(%s)", dat[[std.cols]], lb, dat[[std.cols + 1]])
  461. dat <- dplyr::select(dat, -sjPlot:::string_starts_with("std.se", x = colnames(dat)))
  462. }
  463. }
  464.  
  465.  
  466. # replace minus signs
  467. dat[] <- lapply(dat, function(i) gsub("-(\\d)(.*)", paste0(minus.sign, "\\1\\2"), i))
  468.  
  469.  
  470. # handle zero-inflation part ----
  471.  
  472. zidat <- NULL
  473. wf <- sjPlot:::string_starts_with("wrap.facet", x = colnames(dat))
  474.  
  475. if (!sjmisc::is_empty(wf)) {
  476. zi <- which(dat[[wf]] %in% c("Zero-Inflated Model", "Zero Inflation Model", "zero_inflated", "zi"))
  477.  
  478. if (show.zeroinf && !sjmisc::is_empty(zi)) {
  479. zidat <- dat |>
  480. dplyr::slice(!! zi) |>
  481. dplyr::select(!! -wf)
  482. }
  483.  
  484. if (!sjmisc::is_empty(zi)) dat <- dplyr::slice(dat, !! -zi)
  485. dat <- dplyr::select(dat, !! -wf)
  486. }
  487.  
  488.  
  489. # Add no of observations statistic ----
  490.  
  491. n_obs <- NULL
  492.  
  493. if (show.obs) {
  494. n_obs <- sjPlot:::get_observations(model)
  495. }
  496.  
  497.  
  498. vars <- vars_brms <- NULL
  499.  
  500. # extract variance components ----
  501.  
  502. if ((show.icc || show.re.var || show.r2) && sjPlot:::is_mixed_model(model)) {
  503. if (inherits(model, "brmsfit")) {
  504. vars <- suppressWarnings(insight::get_variance(model))
  505. if (is.null(vars)) {
  506. vars_brms <- tryCatch(
  507. {
  508. performance::variance_decomposition(model)
  509. },
  510. error = function(e) {
  511. NULL
  512. }
  513. )
  514. if (!is.null(vars_brms)) {
  515. vars$var.intercept <- attr(vars_brms, "var_rand_intercept")
  516. vars$var.residual <- attr(vars_brms, "var_residual")
  517. }
  518. }
  519. } else {
  520. vars <- suppressWarnings(insight::get_variance(model))
  521. }
  522. } else {
  523. vars <- NULL
  524. }
  525.  
  526. # sanity check for models currently not supported by "get_variance()"
  527. if (!is.null(vars) && length(vars) == 1 && is.na(vars)) vars <- NULL
  528.  
  529. # Add ICC statistic ----
  530.  
  531. icc <- NULL
  532.  
  533. if (show.icc && sjPlot:::is_mixed_model(model) && !is.null(vars) && !all(is.na(vars))) {
  534. if (inherits(model, "brmsfit") && !is.null(vars_brms)) {
  535. icc <- list(icc.adjusted = vars_brms$ICC_decomposed)
  536. } else {
  537. icc <- list(icc.adjusted = vars$var.random / (vars$var.random + vars$var.residual))
  538. }
  539. }
  540.  
  541. # Add r-squared statistic ----
  542.  
  543. rsq <- NULL
  544.  
  545. if (show.r2 && !insight::is_multivariate(model)) {
  546. # if marginal and conditional r-squared already have been computed
  547. # via adjusted ICC, use these results and avoid time consuming
  548. # multiple computation
  549. if (sjPlot:::is_mixed_model(model)) {
  550. if (inherits(model, "brmsfit")) {
  551. rsqdummy <- tryCatch(suppressWarnings(performance::r2(model)),
  552. error = function(x) NULL)
  553. if (!is.null(rsqdummy)) {
  554. rsq <- list(
  555. `Marginal R2` = rsqdummy$R2_Bayes_marginal,
  556. `Conditional R2` = rsqdummy$R2_Bayes
  557. )
  558. }
  559. } else if (!is.null(vars)) {
  560. if (is.null(vars$var.random)) {
  561. rsq <- list(
  562. `Marginal R2` = vars$var.fixed / (vars$var.fixed + vars$var.residual),
  563. `Conditional R2` = NA
  564. )
  565. } else {
  566. rsq <- list(
  567. `Marginal R2` = vars$var.fixed / (vars$var.fixed + vars$var.random + vars$var.residual),
  568. `Conditional R2` = (vars$var.fixed + vars$var.random) / (vars$var.fixed + vars$var.random + vars$var.residual)
  569. )
  570. }
  571. }
  572. } else {
  573. rsq <- tryCatch(suppressWarnings(performance::r2(model)),
  574. error = function(x) NULL)
  575.  
  576. # fix names of r-squared values
  577.  
  578. if (!is.null(rsq)) {
  579. rnames <- sub("_", " ", names(rsq))
  580. names(rsq) <- rnames
  581. }
  582. }
  583. }
  584.  
  585.  
  586. # Add number of random effect groups ----
  587.  
  588. n_re_grps <- NULL
  589.  
  590. if (show.ngroups && sjPlot:::is_mixed_model(model)) {
  591. rand_eff <- insight::get_data(model, verbose = FALSE)[, insight::find_random(model, split_nested = TRUE, flatten = TRUE), drop = FALSE]
  592. n_re_grps <- sapply(rand_eff, function(.i) length(unique(.i, na.rm = TRUE)))
  593. names(n_re_grps) <- sprintf("ngrps.%s", names(n_re_grps))
  594. }
  595.  
  596.  
  597. # Add deviance and AIC statistic ----
  598.  
  599. dev <- NULL
  600. if (show.dev) dev <- sjPlot:::model_deviance(model)
  601.  
  602. aic <- NULL
  603. if (show.aic) aic <- sjPlot:::model_aic(model)
  604.  
  605. aicc <- NULL
  606. if (show.aicc) aicc <- sjPlot:::model_aicc(model)
  607.  
  608. loglik <- NULL
  609. if (show.loglik) loglik <- sjPlot:::model_loglik(model)
  610.  
  611.  
  612. ## TODO add F-Statistic
  613.  
  614.  
  615. # fix brms coefficient names
  616.  
  617. if (inherits(model, "brmsfit")) {
  618. dat$term <- gsub("^b_", "", dat$term)
  619. if (!is.null(zidat)) zidat$term <- gsub("^b_", "", zidat$term)
  620. }
  621.  
  622.  
  623. # check if Intercept should be renamed...
  624.  
  625. if (string.intercept != "(Intercept)") {
  626. intercepts <- which(dat$term == "(Intercept)")
  627. if (!sjmisc::is_empty(intercepts)) {
  628. dat$term[intercepts] <- string.intercept
  629. }
  630. if (!is.null(zidat)) {
  631. intercepts <- which(zidat$term == "(Intercept)")
  632. if (!sjmisc::is_empty(intercepts)) {
  633. zidat$term[intercepts] <- string.intercept
  634. }
  635. }
  636. }
  637.  
  638.  
  639. list(
  640. dat = dat,
  641. transform = transform,
  642. zeroinf = zidat,
  643. rsq = rsq,
  644. n_obs = n_obs,
  645. icc = icc,
  646. dev = dev,
  647. aic = aic,
  648. variances = vars,
  649. n_re_grps = n_re_grps,
  650. loglik = loglik,
  651. aicc = aicc
  652. )
  653. }
  654. )
  655.  
  656.  
  657. # join all model data frames and convert to character ----
  658.  
  659. na.vals <- c(
  660. "NA",
  661. sprintf("NA%sNA", ci.hyphen),
  662. sprintf("NA (NA%sNA)", ci.hyphen),
  663. sprintf("NA (NA%sNA) (NA)", ci.hyphen)
  664. )
  665.  
  666. # we have data for fixed effects and zero inflation part as
  667. # well as transformation of coefficients in a list, so separate
  668. # them out into own objects
  669.  
  670. model.data <- purrr::map(model.list, ~.x[[1]])
  671. transform.data <- purrr::map(model.list, ~.x[[2]])
  672. zeroinf.data <- purrr::map(model.list, ~.x[[3]])
  673. rsq.data <- purrr::map(model.list, ~.x[[4]])
  674. n_obs.data <- purrr::map(model.list, ~.x[[5]])
  675. icc.data <- purrr::map(model.list, ~.x[[6]])
  676. dev.data <- purrr::map(model.list, ~.x[[7]])
  677. aic.data <- purrr::map(model.list, ~.x[[8]])
  678. variance.data <- purrr::map(model.list, ~.x[[9]])
  679. ngrps.data <- purrr::map(model.list, ~.x[[10]])
  680. loglik.data <- purrr::map(model.list, ~.x[[11]])
  681. aicc.data <- purrr::map(model.list, ~.x[[12]])
  682. is.zeroinf <- purrr::map_lgl(model.list, ~ !is.null(.x[[3]]))
  683.  
  684. zeroinf.data <- purrr::compact(zeroinf.data)
  685.  
  686.  
  687. # make sure we don't have zi-data if not wanted
  688.  
  689. if (!show.zeroinf) zeroinf.data <- NULL
  690.  
  691.  
  692. # sort multivariate response models by response level
  693.  
  694. model.data <- purrr::map(model.data, function(.x) {
  695. resp.col <- sjPlot:::string_starts_with("response.level", x = colnames(.x))
  696. if (!sjmisc::is_empty(resp.col))
  697. .x[order(match(.x[[resp.col]], unique(.x[[resp.col]]))), ]
  698. else
  699. .x
  700. })
  701.  
  702.  
  703. # if only one multivariate response model, split data
  704. # to print models side by side, and update labels of
  705. # dependent variables
  706.  
  707. show.response <- TRUE
  708.  
  709. if (length(model.data) == 1) {
  710. fi <- insight::model_info(models[[1]])
  711.  
  712. if (insight::is_multivariate(models[[1]]))
  713. fi <- fi[[1]]
  714.  
  715. if (!is.null(fi) && (insight::is_multivariate(models[[1]]) || fi$is_categorical)) {
  716.  
  717. show.response <- FALSE
  718.  
  719. if (fi$is_categorical) {
  720. dv.labels <- sprintf(
  721. "%s: %s",
  722. insight::find_response(models[[1]]),
  723. unique(model.data[[1]][["response.level_1"]])
  724. )
  725.  
  726. model.data <- split(model.data[[1]], model.data[[1]]["response.level_1"])
  727. } else {
  728. dv.labels <- insight::find_response(models[[1]])
  729. model.data <- split(model.data[[1]], model.data[[1]]["response.level_1"])
  730. dv.labels <- dv.labels[match(names(dv.labels), names(model.data))]
  731. dv.labels <- sjmisc::word_wrap(dv.labels, wrap = wrap.labels, linesep = "<br>")
  732. }
  733.  
  734. model.data <- purrr::map2(model.data, seq_along(model.data), function(x, y) {
  735. colnames(x) <- gsub(
  736. pattern = "_1",
  737. replacement = sprintf("_%i", y),
  738. x = colnames(x)
  739. )
  740. x
  741. })
  742. }
  743. }
  744.  
  745.  
  746. # Join all models into one data frame, and replace NA by empty strings
  747.  
  748. dat <- model.data |>
  749. purrr::reduce(~ dplyr::full_join(.x, .y, by = "term")) |>
  750. purrr::map_df(~ dplyr::if_else(.x %in% na.vals | is.na(.x), "", .x))
  751.  
  752. # remove unwanted columns and rows ----
  753.  
  754. dat <-
  755. sjPlot:::remove_unwanted(
  756. dat,
  757. show.intercept,
  758. show.est,
  759. show.std,
  760. show.ci,
  761. show.se,
  762. show.stat,
  763. show.p,
  764. show.df,
  765. show.response,
  766. terms,
  767. rm.terms
  768. )
  769.  
  770.  
  771. # same for zero-inflated parts ----
  772.  
  773. zeroinf <- NULL
  774. if (!sjmisc::is_empty(zeroinf.data)) {
  775. zeroinf <- zeroinf.data |>
  776. purrr::reduce(~ dplyr::full_join(.x, .y, by = "term")) |>
  777. purrr::map_df(~ dplyr::if_else(.x %in% na.vals | is.na(.x), "", .x))
  778.  
  779. zeroinf <-
  780. sjPlot:::remove_unwanted(
  781. zeroinf,
  782. show.intercept,
  783. show.est,
  784. show.std,
  785. show.ci,
  786. show.se,
  787. show.stat,
  788. show.p,
  789. show.df,
  790. show.response,
  791. terms,
  792. rm.terms
  793. )
  794. }
  795.  
  796.  
  797. # get default labels for dv and terms ----
  798.  
  799. if (isTRUE(auto.label) && sjmisc::is_empty(pred.labels)) {
  800. if (sjPlot:::.labelled_model_data(models) || any(sapply(models, sjPlot:::is.stan)) || isTRUE(show.reflvl)) {
  801. pred.labels <- sjlabelled::term_labels(models, case = case, mark.cat = TRUE, prefix = prefix.labels)
  802. category.values <- attr(pred.labels, "category.value")
  803.  
  804. # remove random effect labels
  805. re_terms <- unlist(sapply(
  806. models,
  807. insight::find_predictors,
  808. effects = "random",
  809. component = "all",
  810. flatten = TRUE
  811. ))
  812.  
  813. if (!is.null(re_terms)) {
  814. pred.labels.tmp <- sjlabelled::term_labels(models, case = case, mark.cat = TRUE, prefix = "varname")
  815. for (.re in re_terms) {
  816. found <- grepl(paste0("^", .re, ":"), pred.labels.tmp)
  817. if (any(found)) {
  818. pred.labels <- pred.labels[!found]
  819. category.values <- category.values[!found]
  820. pred.labels.tmp <- pred.labels.tmp[!found]
  821. }
  822. }
  823. }
  824.  
  825. no.dupes <- !duplicated(names(pred.labels))
  826. pred.labels <- sjPlot:::prepare.labels(
  827. x = pred.labels[no.dupes],
  828. grp = show.reflvl,
  829. categorical = category.values[no.dupes],
  830. models = models
  831. )
  832. } else {
  833. pred.labels <- NULL
  834. for (pl_counter in seq_along(models)) {
  835. pred.labels <- c(pred.labels, parameters::format_parameters(models[[pl_counter]]))
  836. }
  837. pred.labels <- pred.labels[!duplicated(names(pred.labels))]
  838. show.reflvl <- FALSE
  839. }
  840. } else {
  841. # no automatic grouping of table rows for categorical variables
  842. # when user supplies own labels
  843. show.reflvl <- FALSE
  844. }
  845.  
  846.  
  847. # to insert "header" rows for categorical variables, we need to
  848. # save the original term names first.
  849.  
  850. # remember.terms <- dat$term
  851.  
  852.  
  853. # named vector for predictor labels means we try to match labels
  854. # with model terms
  855.  
  856. if (!sjmisc::is_empty(pred.labels)) {
  857.  
  858. if (!is.null(names(pred.labels))) {
  859. labs <- sjmisc::word_wrap(pred.labels, wrap = wrap.labels, linesep = "<br>")
  860. if (show.reflvl) {
  861. pl <- pred.labels
  862. dupes <- which(pred.labels == names(pred.labels))
  863. if (!sjmisc::is_empty(dupes)) pl <- pl[-dupes]
  864. dat <- merge(dat, data.frame(term = names(pl)), by = "term", all = TRUE)
  865. # resort, in case reference level is alphabetically after other categories
  866. found <- match(names(pl), dat$term)
  867. dat[sort(found), ] <- dat[found, ]
  868. refs <- is.na(dat[, 2])
  869. } else {
  870. refs <- NULL
  871. }
  872. # some labels may not match. in this case, we only need to replace those
  873. # elements in the vector that match a specific label, but
  874. # at the correct position inside "dat$term"
  875. tr <- seq_len(nrow(dat))
  876. find.matches <- match(dat$term, names(pred.labels))
  877. find.na <- which(is.na(find.matches))
  878. if (!sjmisc::is_empty(find.na)) tr <- tr[-find.na]
  879. rp <- as.vector(stats::na.omit(find.matches))
  880.  
  881. dat$term[tr] <- unname(labs[rp])
  882.  
  883. if (!is.null(refs)) {
  884. dat[refs, 2:ncol(dat)] <- ""
  885. est.cols <- if (show.est)
  886. grepl("^estimate", colnames(dat))
  887. else if (show.std)
  888. grepl("^std.estimate", colnames(dat))
  889. else
  890. NULL
  891. if (!is.null(est.cols)) dat[refs, est.cols] <- "<em>Reference</em>"
  892. }
  893.  
  894. # also label zero-inflated part
  895.  
  896. if (!is.null(zeroinf)) {
  897. tr <- seq_len(nrow(zeroinf))
  898. find.matches <- match(zeroinf$term, names(pred.labels))
  899. find.na <- which(is.na(find.matches))
  900. if (!sjmisc::is_empty(find.na)) tr <- tr[-find.na]
  901. rp <- as.vector(stats::na.omit(find.matches))
  902.  
  903. zeroinf$term[tr] <- unname(labs[rp])
  904. }
  905.  
  906. } else {
  907. if (length(pred.labels) == nrow(dat))
  908. dat$term <- pred.labels
  909. else
  910. message("Length of `pred.labels` does not equal number of predictors, no labelling applied.")
  911. }
  912. }
  913.  
  914.  
  915. if (isTRUE(auto.label) && is.null(dv.labels)) {
  916. dv.labels <- sjmisc::word_wrap(
  917. sjlabelled::response_labels(models, case = case),
  918. wrap = wrap.labels,
  919. linesep = "<br>"
  920. )
  921. } else if (is.null(dv.labels)) {
  922. dv.labels <- purrr::map(models, insight::find_response) |> purrr::flatten_chr()
  923. }
  924.  
  925.  
  926. # does user want a specific order for terms?
  927.  
  928. if (!is.null(order.terms)) {
  929. if (length(order.terms) == nrow(dat)) {
  930. dat <- dat[order.terms, ]
  931. } else {
  932. message("Number of values in `order.terms` does not match number of terms. Terms are not sorted.")
  933. }
  934. }
  935.  
  936.  
  937. # get proper column header labels ----
  938.  
  939. col.header <- purrr::map_chr(colnames(dat), function(x) {
  940. pos <- grep("^estimate_", x)
  941.  
  942. if (!sjmisc::is_empty(pos)) {
  943. i <- as.numeric(sub("estimate_", "", x = x, fixed = TRUE))
  944.  
  945. if (insight::is_multivariate(models[[1]]))
  946. mr <- i
  947. else
  948. mr <- NULL
  949.  
  950. if (change_string_est && !sjmisc::is_empty(string.est)) {
  951. x <- string.est
  952. } else if (i <= length(models)) {
  953. x <- sjPlot:::estimate_axis_title(
  954. models[[i]],
  955. axis.title = NULL,
  956. type = "est",
  957. transform = transform.data[[i]],
  958. multi.resp = mr,
  959. include.zeroinf = FALSE
  960. )
  961. } else if (length(models) == 1) {
  962. x <- sjPlot:::estimate_axis_title(
  963. models[[1]],
  964. axis.title = NULL,
  965. type = "est",
  966. transform = transform.data[[1]],
  967. multi.resp = mr,
  968. include.zeroinf = FALSE
  969. )
  970. } else {
  971. x <- string.est
  972. }
  973. }
  974.  
  975.  
  976. pos <- grep("^term", x)
  977. if (!sjmisc::is_empty(pos)) x <- string.pred
  978.  
  979. pos <- grep("^conf.int", x)
  980. if (!sjmisc::is_empty(pos)) x <- string.ci
  981.  
  982. pos <- grep("^std.error", x)
  983. if (!sjmisc::is_empty(pos)) x <- string.se
  984.  
  985. pos <- grep("^std.estimate", x)
  986. if (!sjmisc::is_empty(pos)) x <- string.std
  987.  
  988. pos <- grep("^std.se", x)
  989. if (!sjmisc::is_empty(pos)) x <- string.std_se
  990.  
  991. pos <- grep("^std.conf.int", x)
  992. if (!sjmisc::is_empty(pos)) x <- string.std_ci
  993.  
  994. pos <- grep("^p.value", x)
  995. if (!sjmisc::is_empty(pos)) x <- string.p
  996.  
  997. pos <- grep("^std.p.value", x)
  998. if (!sjmisc::is_empty(pos)) x <- string.std.p
  999.  
  1000. pos <- grep("^df", x)
  1001. if (!sjmisc::is_empty(pos)) x <- string.df
  1002.  
  1003. pos <- grep("^statistic", x)
  1004. if (!sjmisc::is_empty(pos)) x <- string.stat
  1005.  
  1006. pos <- grep("^std.statistic", x)
  1007. if (!sjmisc::is_empty(pos)) x <- string.std.stat
  1008.  
  1009. pos <- grep("^response.level", x)
  1010. if (!sjmisc::is_empty(pos)) x <- string.resp
  1011.  
  1012. pos <- grep("^ci.inner", x)
  1013. if (!sjmisc::is_empty(pos)) x <- "CI (50%)"
  1014.  
  1015. pos <- grep("^ci.outer", x)
  1016. if (!sjmisc::is_empty(pos)) x <- sprintf("CI (%i%%)", round(100 * show.ci))
  1017.  
  1018. x
  1019. })
  1020.  
  1021.  
  1022. if (grepl("stars", p.style))
  1023. ## CHANGED
  1024. footnote <- my_p_star_footnote(p.threshold)
  1025. else
  1026. footnote <- NULL
  1027.  
  1028.  
  1029. sjPlot:::tab_model_df(
  1030. x = dat,
  1031. zeroinf = zeroinf,
  1032. is.zeroinf = is.zeroinf,
  1033. title = title,
  1034. col.header = col.header,
  1035. dv.labels = dv.labels,
  1036. rsq.list = rsq.data,
  1037. n_obs.list = n_obs.data,
  1038. icc.list = icc.data,
  1039. dev.list = dev.data,
  1040. aic.list = aic.data,
  1041. aicc.list = aicc.data,
  1042. variance.list = variance.data,
  1043. ngrps.list = ngrps.data,
  1044. loglik.list = loglik.data,
  1045. n.models = length(model.list),
  1046. show.re.var = show.re.var,
  1047. show.icc = show.icc,
  1048. CSS = CSS,
  1049. file = file,
  1050. use.viewer = use.viewer,
  1051. footnote = footnote,
  1052. digits.rsq = digits.rsq,
  1053. digits.re = digits.re,
  1054. encoding = encoding
  1055. )
  1056. }
  1057.  
  1058. ## The only change in this function is that it now calls 'my_get_p_stars'
  1059. ## instead of the previous 'get_p_stars'
  1060. my_format_p_values <- function(dat, p.style, digits.p, emph.p, p.threshold) {
  1061. # get stars and significance at alpha = 0.05 ----
  1062.  
  1063. if (!"p.value" %in% names(dat)) {
  1064. return(dat)
  1065. }
  1066.  
  1067. dat <- dat |>
  1068. dplyr::mutate(
  1069. p.stars = my_get_p_stars(.data$p.value, p.threshold),
  1070. p.sig = .data$p.value < .05
  1071. )
  1072.  
  1073. # scientific notation ----
  1074.  
  1075. if (grepl("scientific", p.style)) {
  1076. dat$p.value <- formatC(dat$p.value, format = "e", digits = digits.p)
  1077. } else {
  1078. dat$p.value <- sprintf("%.*f", digits.p, dat$p.value)
  1079. }
  1080.  
  1081. # emphasize p-values ----
  1082.  
  1083. if (emph.p && !all(dat$p.value == "NA")) dat$p.value[which(dat$p.sig)] <- sprintf("<strong>%s</strong>", dat$p.value[which(dat$p.sig)])
  1084. dat <- dplyr::select(dat, -"p.sig")
  1085.  
  1086. # indicate p <0.001 ----
  1087.  
  1088. pv <- paste0("0.", paste(rep("0", digits.p), collapse = ""))
  1089. dat$p.value[dat$p.value == pv] <- paste("&lt;", format(10^(-digits.p), scientific = FALSE), sep = "")
  1090.  
  1091. pv <- paste0("<strong>0.", paste(rep("0", digits.p), collapse = ""), "</strong>")
  1092. dat$p.value[dat$p.value == pv] <- paste("<strong>&lt;", format(10^(-digits.p), scientific = FALSE), "</strong>", sep = "")
  1093. dat
  1094. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement