celestialgod

difference of filtering operation (dt vs df)

Oct 21st, 2017
288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 0.63 KB | None | 0 0
  1. library(data.table)
  2. library(profmem)
  3. library(nycflights13)
  4.  
  5. dt <- rbindlist(replicate(100L, flights, FALSE))
  6. setkey(dt, month, hour, arr_delay)
  7. object.size(dt) # 4041516584 bytes ~ 3.76 GB
  8. df <- copy(dt)
  9. dt2 <- copy(dt)
  10. setDF(df)
  11.  
  12. # data.table
  13. profmem({
  14.   dt <- dt[month < 5]
  15.   dt <- dt[hour > 6]
  16.   dt <- dt[arr_delay > 0]
  17. })
  18. # total 3546226792 bytes ~ 3.3 GB
  19.  
  20. # data.frame
  21. profmem({
  22.   df <- df[df$month < 5, ]
  23.   df <- df[df$hour > 6, ]
  24.   df <- df[df$arr_delay > 0, ]
  25. })
  26. # total 10471614192 bytes ~ 9.75 GB
  27.  
  28. # best case
  29. profmem({
  30.   dt2 <- dt2[eval("month < 5 & hour > 6 & arr_delay > 0")]
  31. })
  32. # total 32944 ~ 32.17 KB
Advertisement
Add Comment
Please, Sign In to add comment