SHARE
TWEET

Untitled

a guest Oct 10th, 2019 64 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(data.table)
  2. library(disk.frame)
  3. setup_disk.frame()
  4.  
  5. bench_disk.frame_data.table_group_by <- function(data1,n) {
  6.   setDT(data1)
  7.  
  8.   a.sharded.df = as.disk.frame(data1, shardby = c("year", "month", "day"))
  9.   a.not_sharded.df = as.disk.frame(data1)
  10.  
  11.   data.table_timing = system.time(data1[,.(mean_dep_time = mean(dep_time, na.rm=T)), .(year, month, day)])[3]
  12.  
  13.   disk.frame_sharded_timing = system.time(
  14.     a.sharded.df[
  15.       ,
  16.       .(mean_dep_time = mean(dep_time, na.rm=TRUE)),
  17.       .(year, month, day),
  18.       keep = c("year", "month","day", "dep_time")])[3]
  19.  
  20.  
  21.   disk.frame_not_sharded_timing = system.time(
  22.     a.not_sharded.df[
  23.       ,
  24.       .(
  25.         sum_dep_time = sum(dep_time, na.rm=TRUE),
  26.         n = sum(!is.na(dep_time))
  27.       ),
  28.       .(year, month, day),
  29.       keep = c("year", "month","day", "dep_time")][
  30.         ,
  31.         .(mean_dep_time = sum(sum_dep_time)/sum(n)),
  32.         .(year, month, day)
  33.         ])[3]
  34.  
  35.   barplot(
  36.     c(data.table_timing, disk.frame_sharded_timing, disk.frame_not_sharded_timing),
  37.     names.arg = c("data.table", "sharded disk.frame", "not sharded disk.frame"),
  38.     main = glue:glue("flights duplicated {n}  times group-by year, month, day"),
  39.     ylab = "Seconds")
  40. }
  41.  
  42.  
  43. system.time(flights_100 <- rbindlist(lapply(1:100, function(x) nycflights13::flights)))
  44.  
  45. gc()
  46. bench_disk.frame_data.table_group_by(flights_100, 100)
  47.  
  48. system.time(flights_1000 <- rbindlist(lapply(1:10, function(x) flights_100)))
  49. rm(flights_100)
  50. gc()
  51.  
  52.  
  53. bench_disk.frame_data.table_group_by(flights_1000, 1000)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top