Advertisement
celestialgod

R read / write

Mar 24th, 2017
274
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.73 KB | None | 0 0
  1. library(data.table)
  2. library(feather)
  3. library(microbenchmark)
  4.  
  5. testDT <- rbindlist(replicate(2e5L, iris, simplify = FALSE))
  6. format(object.size(testDT), "Mb") # 1030 Mb ~ 1 Gb
  7.  
  8. fwrite(testDT, "test.csv")
  9. write_feather(testDT, "test.feather")
  10. saveRDS(testDT, "test.rds")
  11. saveRDS(testDT, "test2.rds", compress = FALSE)
  12.  
  13. file.size("test.csv")     #  761600059 ~  726 Mb
  14. file.size("test.feather") # 1080000640 ~ 1030 Mb
  15. file.size("test.rds")     #    8296749 ~ 7.91 Mb
  16. file.size("test2.rds")    # 1080000557 ~ 1030 Mb
  17.  
  18. microbenchmark(saveRDS = local(saveRDS(testDT, "test.rds")),
  19.                saveRDS_no_compress = local(saveRDS(testDT, "test2.rds", compress = FALSE)),
  20.                write_feather = local(write_feather(testDT, "test.feather")),
  21.                fwrite = local(fwrite(testDT, "test.csv")),
  22.                readRDS = local({testDT <- readRDS("test.rds")}),
  23.                readRDS_no_compress = local({testDT2 <- readRDS("test2.rds")}),
  24.                read_feather = local(as.data.frame(read_feather("test.feather"))),
  25.                fread = local(fread("test.csv")),
  26.                times = 20L)
  27. # Unit: milliseconds
  28. #                 expr        min         lq       mean     median         uq       max neval
  29. #              saveRDS  7516.5394  7602.5767  7770.3196  7686.7767  7935.6723  8158.606    20
  30. #  saveRDS_no_compress  1417.6231  1446.3086  1756.1208  1514.9383  1729.1255  5045.703    20
  31. #        write_feather  4571.6872  5459.9026  6384.5605  6115.1136  7116.2653  9478.708    20
  32. #               fwrite  2271.8399  2382.4617  2525.4811  2474.2916  2527.3236  3416.503    20
  33. #  readRDS_no_compress  1274.8282  1395.2916  1516.4908  1513.2334  1658.3701  1816.422    20
  34. #              readRDS  2118.5699  2252.7583  2390.4575  2385.7802  2518.7170  2830.373    20
  35. #         read_feather   565.3394   612.3607   727.3408   621.3474   900.5532  1136.687    20
  36. #                fread 57886.2133 58140.8123 59768.2511 58742.0473 60783.4750 65087.893    20
  37.  
  38. # devtools::install_github("bwlewis/lz4")
  39. library(lz4)
  40.  
  41. testDT <- rbindlist(replicate(2e5L, iris, simplify = FALSE))
  42. writeBin(lzCompress(serialize(testDT, NULL)), "test.lz4")
  43. testDT2 <- unserialize(lzDecompress(readBin("test.lz4", "raw", file.info("test.lz4")$size)))
  44. all.equal(testDT, testDT2) # TRUE
  45.  
  46. microbenchmark(lz4write = local(writeBin(lzCompress(serialize(testDT, NULL)), "test.lz4")),
  47.                lz4read = local(unserialize(lzDecompress(readBin("test.lz4", "raw", file.info("test.lz4")$size)))),
  48.                times = 20L)
  49. # Unit: seconds
  50. #     expr      min       lq     mean   median       uq      max neval
  51. # lz4write 3.425010 3.646188 3.753905 3.733799 3.841660 4.198919    20
  52. #  lz4read 1.493515 1.566712 1.646142 1.603984 1.746855 1.797484    20
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement