Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(magrittr)
- library(ggplot2)
- set.seed(42)
- do_bench <- function(N) {
- dt <- suppressWarnings(data.table(x = rnorm(N),
- group = letters[1:3]))
- pipe <- function() {
- dt %>%
- .[, mean(x), by = group] %>%
- .[group == "a"]
- }
- bracket <- function() {
- dt[, mean(x), by = group][group == "a"]
- }
- bm <- bench::mark(pipe, bracket, check = FALSE)
- as.numeric(bm$time[[2]]/bm$time[[1]])
- }
- benches <- data.table(N = 10^seq(4, 8)) %>%
- .[, do_bench(N), by = N]
- ggplot(benches, aes(factor(N), V1)) +
- geom_boxplot() +
- ggforce::geom_sina(size = 0.1, alpha = 0.1) +
- geom_hline(yintercept = 1) +
- scale_y_log10("Timing [][] / %>%", breaks = c(1/3, 1/2, seq(1, 3)), limits = c(1/4, 4),
- labels = c("1/3", "1/2", seq(1, 3))) +
- scale_x_discrete("Number of rows") +
- labs(title = "Using pipes with data.table operations has negligible impact on timing",
- subtitle = "doing \ndt %>% .[, mean(x), by = group] %>% .[group == 1] vs. \ndt[, mean(x), by = group][group == 1]") +
- hrbrthemes::theme_ipsum_rc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement