Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.09 KB | None | 0 0
  1. library(data.table)
  2. library(magrittr)
  3. library(ggplot2)
  4. set.seed(42)
  5. do_bench <- function(N) {
  6. dt <- suppressWarnings(data.table(x = rnorm(N),
  7. group = letters[1:3]))
  8.  
  9. pipe <- function() {
  10. dt %>%
  11. .[, mean(x), by = group] %>%
  12. .[group == "a"]
  13. }
  14.  
  15.  
  16. bracket <- function() {
  17. dt[, mean(x), by = group][group == "a"]
  18. }
  19.  
  20.  
  21.  
  22. bm <- bench::mark(pipe, bracket, check = FALSE)
  23.  
  24. as.numeric(bm$time[[2]]/bm$time[[1]])
  25. }
  26.  
  27. benches <- data.table(N = 10^seq(4, 8)) %>%
  28. .[, do_bench(N), by = N]
  29.  
  30.  
  31. ggplot(benches, aes(factor(N), V1)) +
  32. geom_boxplot() +
  33. ggforce::geom_sina(size = 0.1, alpha = 0.1) +
  34. geom_hline(yintercept = 1) +
  35. scale_y_log10("Timing [][] / %>%", breaks = c(1/3, 1/2, seq(1, 3)), limits = c(1/4, 4),
  36. labels = c("1/3", "1/2", seq(1, 3))) +
  37. scale_x_discrete("Number of rows") +
  38. labs(title = "Using pipes with data.table operations has negligible impact on timing",
  39. subtitle = "doing \ndt %>% .[, mean(x), by = group] %>% .[group == 1] vs. \ndt[, mean(x), by = group][group == 1]") +
  40. hrbrthemes::theme_ipsum_rc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement