Guest User

Untitled

a guest
Nov 17th, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.30 KB | None | 0 0
  1. get_digit <- function(x, d) {
  2. # digits from the right
  3. # i.e.: first digit is the ones, second is the tens, etc.
  4. (x %% 10^d) %/% (10^(d-1))
  5. }
  6.  
  7. radix_sort <- function(x) {
  8. # k is maximum number of digits
  9. k <- max(nchar(x))
  10. for(i in 1:k) {
  11. x_digit_i <- get_digit(x, i)
  12. # split numbers based on their i digit
  13. x_by_bucket <- split(x, x_digit_i)
  14. # recombine the vectors, now sorted to the i'th digit
  15. x <- unlist(x_by_bucket, use.names = FALSE)
  16. }
  17. # since each iteration is a stable sort, the final result
  18. # is a sorted array, yay!
  19. x
  20. }
  21.  
  22. > library(microbenchmark)
  23. > x <- sample(100)
  24. > microbenchmark(radix_sort(x), sort(x))
  25. Unit: microseconds
  26. expr min lq mean median uq max neval cld
  27. radix_sort(x) 459.378 465.895 485.58322 480.4835 496.779 649.956 100 b
  28. sort(x) 27.314 29.487 33.05064 31.9710 33.212 73.563 100 a
  29. > x <- sample(10000)
  30. > microbenchmark(radix_sort(x), sort(x))
  31. Unit: microseconds
  32. expr min lq mean median uq max
  33. radix_sort(x) 44317.123 44777.8965 46062.3446 45204.3715 45714.807 63838.148
  34. sort(x) 158.609 165.7485 198.3083 186.6995 206.099 750.832
  35.  
  36. x <- sample(10000)
  37. Rprof(tmp <- tempfile())
  38. for (i in 1:10) z <- radix_sort(x)
  39. Rprof()
  40. summaryRprof(tmp)$by.total
  41. # total.time total.pct self.time self.pct
  42. # "radix_sort" 8.26 99.76 0.72 8.70
  43. # "split" 7.34 88.65 0.06 0.72
  44. # "split.default" 7.28 87.92 0.54 6.52
  45. # "as.factor" 6.74 81.40 0.08 0.97
  46. # "factor" 6.64 80.19 1.72 20.77
  47. # "as.character" 4.34 52.42 4.34 52.42
  48. # "unique" 0.42 5.07 0.04 0.48
  49. # "unique.default" 0.38 4.59 0.38 4.59
  50. # "%%" 0.14 1.69 0.14 1.69
  51. # "get_digit" 0.14 1.69 0.00 0.00
  52. # "sort.list" 0.12 1.45 0.02 0.24
  53. # "order" 0.08 0.97 0.06 0.72
  54. # "unlist" 0.06 0.72 0.06 0.72
  55. # [...]
  56.  
  57. z <- outer(x_digit_i, 0:9, "==")
  58.  
  59. idx <- row(z)[z]
  60.  
  61. idx <- 1L + (which(z) - 1L) %% length(x)
  62.  
  63. x <- x[idx]
  64.  
  65. get_digit <- function(x, d) (x %% as.integer(10^d)) %/% as.integer(10^(d-1))
  66.  
  67. x <- sample(100)
  68. microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
  69. # Unit: microseconds
  70. # expr min lq mean median uq max neval
  71. # radix_sort(x) 964.692 972.3675 1025.35180 984.3775 1012.178 2233.397 100
  72. # radix_sort2(x) 250.642 256.5720 282.58952 261.2910 282.449 1266.061 100
  73. # sort(x) 82.270 86.1605 92.22669 88.0230 90.943 223.249 100
  74.  
  75. x <- sample(10000)
  76. microbenchmark(radix_sort(x), radix_sort2(x), sort(x))
  77. # Unit: microseconds
  78. # expr min lq mean median uq max neval
  79. # radix_sort(x) 71939.706 76147.1715 80028.7541 78389.8140 81512.4140 144632.484 100
  80. # radix_sort2(x) 24218.810 27613.3190 34841.8724 29477.7115 31772.9415 143283.337 100
  81. # sort(x) 411.691 454.4015 563.4825 492.6165 558.0925 3412.719 100
Add Comment
Please, Sign In to add comment