Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(dtplyr)
- library(dplyr)
- library(pipeR)
- library(microbenchmark)
- DT <- data.table(a = sample(LETTERS, 1e7, TRUE), b = sample(LETTERS, 1e7, TRUE), c = rnorm(1e7), d = rnorm(1e7))
- DT2 <- copy(DT) %>>% tbl_dt
- DT3 <- copy(DT) %>>% tbl_df
- microbenchmark(dt = DT[ , e := c * d], dtplyr = mutate(DT2, e = c * d), dplyr = mutate(DT3, e = c * d), times = 50L)
- # Unit: milliseconds
- # expr min lq mean median uq max neval
- # dt 24.94179 25.22324 31.17094 25.65127 27.66590 128.2435 50
- # dtplyr 108.21481 110.10539 157.68023 141.65822 197.71106 318.1482 50
- # dplyr 25.24109 25.45467 33.00601 25.59744 26.62377 149.1016 50
- microbenchmark(dt = DT[ , `:=`(e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)],
- dtplyr = mutate(DT2, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2),
- dplyr = mutate(DT3, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2),
- times = 50L)
- # Unit: milliseconds
- # expr min lq mean median uq max neval
- # dt 202.7219 242.1649 299.0250 271.9968 349.0535 443.8595 50
- # dtplyr 202.0095 215.1382 303.9602 278.7894 336.6854 500.0714 50
- # dplyr 115.6288 116.4854 165.0618 121.9035 187.2587 361.3428 50
- microbenchmark(dt = DT[ , .(mean(c), mean(d)), by = .(a)],
- dtplyr = group_by(DT2, a) %>>% summarise(mean(c), mean(d)),
- dplyr = group_by(DT3, a) %>>% summarise(mean(c), mean(d)), times = 50L)
- #Unit: milliseconds
- # expr min lq mean median uq max neval
- # dt 153.8275 155.6625 159.4768 156.4928 158.0805 263.9203 50
- # dtplyr 240.1420 243.6950 291.4689 248.0736 349.2796 506.5822 50
- # dplyr 600.8775 605.5545 620.2024 611.1551 620.1537 740.4969 50
- microbenchmark(dt = DT[ , .(mean(c), mean(d)), by = .(a, b)],
- dtplyr = group_by(DT2, a, b) %>>% summarise(mean(c), mean(d)),
- dplyr = group_by(DT3, a, b) %>>% summarise(mean(c), mean(d)), times = 50L)
- # Unit: milliseconds
- # expr min lq mean median uq max neval
- # dt 272.4823 274.2456 280.2034 276.4716 279.3117 367.5075 50
- # dtplyr 357.5374 362.8934 403.9268 365.9142 440.4850 632.3936 50
- # dplyr 972.4859 976.8619 1003.0153 985.8695 1011.0667 1147.9290 50
- library(profvis)
- profvis(DT[ , `:=`(e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)]) # without copy
- profvis(mutate(DT2, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2)) # still use data.table::copy
- profvis(mutate(DT3, e = c*d, f = c + d, g = c - d, h = c**2, i = d**2))
Advertisement
Add Comment
Please, Sign In to add comment