Advertisement
Guest User

Untitled

a guest
Feb 28th, 2020
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.88 KB | None | 0 0
  1. function (x, cuts, m = 150, g, levels.mean = FALSE, digits, minmax = TRUE,
  2. oneval = TRUE, onlycuts = FALSE, formatfun = format, ...)
  3. {
  4. if (inherits(formatfun, "formula")) {
  5. if (!requireNamespace("rlang"))
  6. stop("Package 'rlang' must be installed to use formula notation")
  7. formatfun <- getFromNamespace("as_function", "rlang")(formatfun)
  8. }
  9. method <- 1
  10. x.unique <- sort(unique(c(x[!is.na(x)], if (!missing(cuts)) cuts)))
  11. min.dif <- min(diff(x.unique))/2
  12. min.dif.factor <- 1
  13. if (missing(digits))
  14. digits <- if (levels.mean)
  15. 5
  16. else 3
  17. format.args <- if (any(c("...", "digits") %in%
  18. names(formals(args(formatfun))))) {
  19. c(digits = digits, list(...))
  20. }
  21. else {
  22. list(...)
  23. }
  24. oldopt <- options("digits")
  25. options(digits = digits)
  26. on.exit(options(oldopt))
  27. xlab <- attr(x, "label")
  28. if (missing(cuts)) {
  29. nnm <- sum(!is.na(x))
  30. if (missing(g))
  31. g <- max(1, floor(nnm/m))
  32. if (g < 1)
  33. stop("g must be >=1, m must be positive")
  34. options(digits = 15)
  35. n <- table(x)
  36. xx <- as.double(names(n))
  37. options(digits = digits)
  38. cum <- cumsum(n)
  39. m <- length(xx)
  40. y <- as.integer(ifelse(is.na(x), NA, 1))
  41. labs <- character(g)
  42. cuts <- approx(cum, xx, xout = (1:g) * nnm/g, method = "constant",
  43. rule = 2, f = 1)$y
  44. cuts[length(cuts)] <- max(xx)
  45. lower <- xx[1]
  46. upper <- 1e+45
  47. up <- low <- double(g)
  48. i <- 0
  49. for (j in 1:g) {
  50. cj <- if (method == 1 || j == 1)
  51. cuts[j]
  52. else {
  53. if (i == 0)
  54. stop("program logic error")
  55. s <- if (is.na(lower))
  56. FALSE
  57. else xx >= lower
  58. cum.used <- if (all(s))
  59. 0
  60. else max(cum[!s])
  61. if (j == m)
  62. max(xx)
  63. else if (sum(s) < 2)
  64. max(xx)
  65. else approx(cum[s] - cum.used, xx[s], xout = (nnm -
  66. cum.used)/(g - j + 1), method = "constant",
  67. rule = 2, f = 1)$y
  68. }
  69. if (cj == upper)
  70. next
  71. i <- i + 1
  72. upper <- cj
  73. y[x >= (lower - min.dif.factor * min.dif)] <- i
  74. low[i] <- lower
  75. lower <- if (j == g)
  76. upper
  77. else min(xx[xx > upper])
  78. if (is.na(lower))
  79. lower <- upper
  80. up[i] <- lower
  81. }
  82. low <- low[1:i]
  83. up <- up[1:i]
  84. variation <- logical(i)
  85. for (ii in 1:i) {
  86. r <- range(x[y == ii], na.rm = TRUE)
  87. variation[ii] <- diff(r) > 0
  88. }
  89. if (onlycuts)
  90. return(unique(c(low, max(xx))))
  91. flow <- do.call(formatfun, c(list(low), format.args))
  92. fup <- do.call(formatfun, c(list(up), format.args))
  93. bb <- c(rep(")", i - 1), "]")
  94. labs <- ifelse(low == up | (oneval & !variation), flow,
  95. paste("[", flow, ",", fup, bb, sep = ""))
  96. ss <- y == 0 & !is.na(y)
  97. if (any(ss))
  98. stop(paste("categorization error in cut2. Values of x not appearing in any interval:\n",
  99. paste(format(x[ss], digits = 12), collapse = " "),
  100. "\nLower endpoints:", paste(format(low,
  101. digits = 12), collapse = " "), "\nUpper endpoints:",
  102. paste(format(up, digits = 12), collapse = " ")))
  103. y <- structure(y, class = "factor", levels = labs)
  104. }
  105. else {
  106. if (minmax) {
  107. r <- range(x, na.rm = TRUE)
  108. if (r[1] < cuts[1])
  109. cuts <- c(r[1], cuts)
  110. if (r[2] > max(cuts))
  111. cuts <- c(cuts, r[2])
  112. }
  113. l <- length(cuts)
  114. k2 <- cuts - min.dif
  115. k2[l] <- cuts[l]
  116. y <- cut(x, k2)
  117. if (!levels.mean) {
  118. brack <- rep(")", l - 1)
  119. brack[l - 1] <- "]"
  120. fmt <- do.call(formatfun, c(list(cuts), format.args))
  121. labs <- paste("[", fmt[1:(l - 1)], ",",
  122. fmt[2:l], brack, sep = "")
  123. if (oneval) {
  124. nu <- table(cut(x.unique, k2))
  125. if (length(nu) != length(levels(y)))
  126. stop("program logic error")
  127. levels(y) <- ifelse(nu == 1, c(fmt[1:(l - 2)],
  128. fmt[l]), labs)
  129. }
  130. else levels(y) <- labs
  131. }
  132. }
  133. if (levels.mean) {
  134. means <- tapply(x, y, function(w) mean(w, na.rm = TRUE))
  135. levels(y) <- do.call(formatfun, c(list(means), format.args))
  136. }
  137. attr(y, "class") <- "factor"
  138. if (length(xlab))
  139. label(y) <- xlab
  140. y
  141. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement