Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- get_digit <- function(x, d) {
- # digits from the right
- # i.e.: first digit is the ones, second is the tens, etc.
- (x %% 10^d) %/% (10^(d-1))
- }
- radix_sort <- function(x) {
- # k is maximum number of digits
- k <- max(nchar(x))
- for(i in 1:k) {
- x_digit_i <- get_digit(x, i)
- # split numbers based on their i digit
- x_by_bucket <- split(x, x_digit_i)
- # recombine the vectors, now sorted to the i'th digit
- x <- unlist(x_by_bucket, use.names = FALSE)
- }
- # since each iteration is a stable sort, the final result
- # is a sorted array, yay!
- x
- }
- > library(microbenchmark)
- > x <- sample(100)
- > microbenchmark(radix_sort(x), sort(x))
- Unit: microseconds
- expr min lq mean median uq max neval cld
- radix_sort(x) 459.378 465.895 485.58322 480.4835 496.779 649.956 100 b
- sort(x) 27.314 29.487 33.05064 31.9710 33.212 73.563 100 a
- > x <- sample(10000)
- > microbenchmark(radix_sort(x), sort(x))
- Unit: microseconds
- expr min lq mean median uq max
- radix_sort(x) 44317.123 44777.8965 46062.3446 45204.3715 45714.807 63838.148
- sort(x) 158.609 165.7485 198.3083 186.6995 206.099 750.832
- x <- sample(10000)
- Rprof(tmp <- tempfile())
- for (i in 1:10) z <- radix_sort(x)
- Rprof()
- summaryRprof(tmp)$by.total
- # total.time total.pct self.time self.pct
- # "radix_sort" 8.26 99.76 0.72 8.70
- # "split" 7.34 88.65 0.06 0.72
- # "split.default" 7.28 87.92 0.54 6.52
- # "as.factor" 6.74 81.40 0.08 0.97
- # "factor" 6.64 80.19 1.72 20.77
- # "as.character" 4.34 52.42 4.34 52.42
- # "unique" 0.42 5.07 0.04 0.48
- # "unique.default" 0.38 4.59 0.38 4.59
- # "%%" 0.14 1.69 0.14 1.69
- # "get_digit" 0.14 1.69 0.00 0.00
- # "sort.list" 0.12 1.45 0.02 0.24
- # "order" 0.08 0.97 0.06 0.72
- # "unlist" 0.06 0.72 0.06 0.72
- # [...]
- z <- outer(x_digit_i, 0:9, "==")
- idx <- row(z)[z]
- idx <- 1L + (which(z) - 1L) %% length(x)
- x <- x[idx]
- get_digit <- function(x, d) (x %% as.integer(10^d)) %/% as.integer(10^(d-1))
- x <- sample(100)
- microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
- # Unit: microseconds
- # expr min lq mean median uq max neval
- # radix_sort(x) 964.692 972.3675 1025.35180 984.3775 1012.178 2233.397 100
- # radix_sort2(x) 250.642 256.5720 282.58952 261.2910 282.449 1266.061 100
- # sort(x) 82.270 86.1605 92.22669 88.0230 90.943 223.249 100
- x <- sample(10000)
- microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
- # Unit: microseconds
- # expr min lq mean median uq max neval
- # radix_sort(x) 71939.706 76147.1715 80028.7541 78389.8140 81512.4140 144632.484 100
- # radix_sort2(x) 24218.810 27613.3190 34841.8724 29477.7115 31772.9415 143283.337 100
- # sort(x) 411.691 454.4015 563.4825 492.6165 558.0925 3412.719 100
Add Comment
Please, Sign In to add comment