Untitled

get_digit <- function(x, d) {
    # digits from the right
    # i.e.: first digit is the ones, second is the tens, etc.
    (x %% 10^d) %/% (10^(d-1))
}

radix_sort <- function(x) {
    # k is maximum number of digits
    k <- max(nchar(x))
    for(i in 1:k) {
        x_digit_i <- get_digit(x, i)
        # split numbers based on their i digit
        x_by_bucket <- split(x, x_digit_i)
        # recombine the vectors, now sorted to the i'th digit
        x <- unlist(x_by_bucket, use.names = FALSE)
    }
    # since each iteration is a stable sort, the final result
    # is a sorted array, yay!
    x
}

> library(microbenchmark)
> x <- sample(100)
> microbenchmark(radix_sort(x), sort(x))
Unit: microseconds
          expr     min      lq      mean   median      uq     max neval cld
 radix_sort(x) 459.378 465.895 485.58322 480.4835 496.779 649.956   100   b
       sort(x)  27.314  29.487  33.05064  31.9710  33.212  73.563   100  a
> x <- sample(10000)
> microbenchmark(radix_sort(x), sort(x))
Unit: microseconds
          expr       min         lq       mean     median        uq       max
 radix_sort(x) 44317.123 44777.8965 46062.3446 45204.3715 45714.807 63838.148
       sort(x)   158.609   165.7485   198.3083   186.6995   206.099   750.832

x <- sample(10000)
Rprof(tmp <- tempfile())
for (i in 1:10) z <- radix_sort(x)
Rprof()
summaryRprof(tmp)$by.total
#                         total.time total.pct self.time self.pct
# "radix_sort"                  8.26     99.76      0.72     8.70
# "split"                       7.34     88.65      0.06     0.72
# "split.default"               7.28     87.92      0.54     6.52
# "as.factor"                   6.74     81.40      0.08     0.97
# "factor"                      6.64     80.19      1.72    20.77
# "as.character"                4.34     52.42      4.34    52.42
# "unique"                      0.42      5.07      0.04     0.48
# "unique.default"              0.38      4.59      0.38     4.59
# "%%"                          0.14      1.69      0.14     1.69
# "get_digit"                   0.14      1.69      0.00     0.00
# "sort.list"                   0.12      1.45      0.02     0.24
# "order"                       0.08      0.97      0.06     0.72
# "unlist"                      0.06      0.72      0.06     0.72
# [...]

z <- outer(x_digit_i, 0:9, "==")

idx <- row(z)[z]

idx <- 1L + (which(z) - 1L) %% length(x)

x <- x[idx]

get_digit <- function(x, d) (x %% as.integer(10^d)) %/% as.integer(10^(d-1))

x <- sample(100)
microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
# Unit: microseconds
#            expr     min       lq       mean   median       uq      max neval
#   radix_sort(x) 964.692 972.3675 1025.35180 984.3775 1012.178 2233.397   100
#  radix_sort2(x) 250.642 256.5720  282.58952 261.2910  282.449 1266.061   100
#         sort(x)  82.270  86.1605   92.22669  88.0230   90.943  223.249   100

x <- sample(10000)
microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
# Unit: microseconds
#            expr       min         lq       mean     median         uq        max neval
#   radix_sort(x) 71939.706 76147.1715 80028.7541 78389.8140 81512.4140 144632.484   100
#  radix_sort2(x) 24218.810 27613.3190 34841.8724 29477.7115 31772.9415 143283.337   100
#         sort(x)   411.691   454.4015   563.4825   492.6165   558.0925   3412.719   100