SHARE
TWEET

Untitled

a guest Jul 22nd, 2019 56 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(data.table)
  2. library(magrittr)
  3. library(ggplot2)
  4. set.seed(42)
  5. do_bench <- function(N) {
  6.   dt <- suppressWarnings(data.table(x = rnorm(N),
  7.                    group = letters[1:3]))
  8.  
  9.   pipe <- function() {
  10.     dt %>%
  11.       .[, mean(x), by = group] %>%
  12.       .[group == "a"]
  13.   }
  14.  
  15.  
  16.   bracket <- function() {
  17.     dt[, mean(x), by = group][group == "a"]
  18.   }
  19.  
  20.  
  21.  
  22.   bm <- bench::mark(pipe, bracket, check = FALSE)
  23.  
  24.   as.numeric(bm$time[[2]]/bm$time[[1]])
  25. }
  26.  
  27. benches <- data.table(N = 10^seq(4, 8)) %>%
  28.   .[, do_bench(N), by = N]
  29.  
  30.  
  31. ggplot(benches, aes(factor(N), V1)) +
  32.   geom_boxplot() +
  33.   ggforce::geom_sina(size = 0.1, alpha = 0.1) +
  34.   geom_hline(yintercept = 1) +
  35.   scale_y_log10("Timing [][] / %>%", breaks = c(1/3, 1/2, seq(1, 3)), limits = c(1/4, 4),
  36.                 labels = c("1/3", "1/2", seq(1, 3))) +
  37.   scale_x_discrete("Number of rows") +
  38.   labs(title = "Using pipes with data.table operations has negligible impact on timing",
  39.        subtitle = "doing \ndt %>% .[, mean(x), by = group] %>% .[group == 1] vs. \ndt[, mean(x), by = group][group == 1]") +
  40.   hrbrthemes::theme_ipsum_rc()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top