celestialgod

comparison between dplyr, dtplyr, data.table

Nov 29th, 2016
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.62 KB | None | 0 0
  1. library(data.table)
  2. library(dtplyr)
  3. library(dplyr)
  4. library(pipeR)
  5. library(microbenchmark)
  6.  
  7. DT <- data.table(a = sample(LETTERS, 1e7, TRUE), b = sample(LETTERS, 1e7, TRUE), c = rnorm(1e7), d = rnorm(1e7))
  8. DT2 <- copy(DT) %>>% tbl_dt
  9. DT3 <- copy(DT) %>>% tbl_df
  10.  
  11. microbenchmark(dt = DT[ , e := c * d], dtplyr = mutate(DT2, e = c * d), dplyr = mutate(DT3, e = c * d), times = 50L)
  12. # Unit: milliseconds
  13. #    expr       min        lq      mean    median        uq      max neval
  14. #      dt  24.94179  25.22324  31.17094  25.65127  27.66590 128.2435    50
  15. #  dtplyr 108.21481 110.10539 157.68023 141.65822 197.71106 318.1482    50
  16. #   dplyr  25.24109  25.45467  33.00601  25.59744  26.62377 149.1016    50
  17.  
  18. microbenchmark(dt = DT[ , `:=`(e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)],
  19.                dtplyr = mutate(DT2, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2),
  20.                dplyr = mutate(DT3, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2),
  21.                times = 50L)
  22. # Unit: milliseconds
  23. #    expr      min       lq     mean   median       uq      max neval
  24. #      dt 202.7219 242.1649 299.0250 271.9968 349.0535 443.8595    50
  25. #  dtplyr 202.0095 215.1382 303.9602 278.7894 336.6854 500.0714    50
  26. #   dplyr 115.6288 116.4854 165.0618 121.9035 187.2587 361.3428    50
  27.  
  28. microbenchmark(dt = DT[ , .(mean(c), mean(d)), by = .(a)],
  29.                dtplyr = group_by(DT2, a) %>>% summarise(mean(c), mean(d)),
  30.                dplyr = group_by(DT3, a) %>>% summarise(mean(c), mean(d)), times = 50L)
  31. #Unit: milliseconds
  32. #   expr      min       lq     mean   median       uq      max neval
  33. #     dt 153.8275 155.6625 159.4768 156.4928 158.0805 263.9203    50
  34. # dtplyr 240.1420 243.6950 291.4689 248.0736 349.2796 506.5822    50
  35. #  dplyr 600.8775 605.5545 620.2024 611.1551 620.1537 740.4969    50
  36.  
  37. microbenchmark(dt = DT[ , .(mean(c), mean(d)), by = .(a, b)],
  38.                dtplyr = group_by(DT2, a, b) %>>% summarise(mean(c), mean(d)),
  39.                dplyr = group_by(DT3, a, b) %>>% summarise(mean(c), mean(d)), times = 50L)
  40. # Unit: milliseconds
  41. #    expr      min       lq      mean   median        uq       max neval
  42. #      dt 272.4823 274.2456  280.2034 276.4716  279.3117  367.5075    50
  43. #  dtplyr 357.5374 362.8934  403.9268 365.9142  440.4850  632.3936    50
  44. #   dplyr 972.4859 976.8619 1003.0153 985.8695 1011.0667 1147.9290    50
  45.  
  46. library(profvis)
  47. profvis(DT[ , `:=`(e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)]) # without copy
  48. profvis(mutate(DT2, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)) # still use data.table::copy
  49. profvis(mutate(DT3, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2))
Advertisement
Add Comment
Please, Sign In to add comment