Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(feather)
- library(microbenchmark)
- testDT <- rbindlist(replicate(2e5L, iris, simplify = FALSE))
- format(object.size(testDT), "Mb") # 1030 Mb ~ 1 Gb
- fwrite(testDT, "test.csv")
- write_feather(testDT, "test.feather")
- saveRDS(testDT, "test.rds")
- saveRDS(testDT, "test2.rds", compress = FALSE)
- file.size("test.csv") # 761600059 ~ 726 Mb
- file.size("test.feather") # 1080000640 ~ 1030 Mb
- file.size("test.rds") # 8296749 ~ 7.91 Mb
- file.size("test2.rds") # 1080000557 ~ 1030 Mb
- microbenchmark(saveRDS = local(saveRDS(testDT, "test.rds")),
- saveRDS_no_compress = local(saveRDS(testDT, "test2.rds", compress = FALSE)),
- write_feather = local(write_feather(testDT, "test.feather")),
- fwrite = local(fwrite(testDT, "test.csv")),
- readRDS = local({testDT <- readRDS("test.rds")}),
- readRDS_no_compress = local({testDT2 <- readRDS("test2.rds")}),
- read_feather = local(as.data.frame(read_feather("test.feather"))),
- fread = local(fread("test.csv")),
- times = 20L)
- # Unit: milliseconds
- # expr min lq mean median uq max neval
- # saveRDS 7516.5394 7602.5767 7770.3196 7686.7767 7935.6723 8158.606 20
- # saveRDS_no_compress 1417.6231 1446.3086 1756.1208 1514.9383 1729.1255 5045.703 20
- # write_feather 4571.6872 5459.9026 6384.5605 6115.1136 7116.2653 9478.708 20
- # fwrite 2271.8399 2382.4617 2525.4811 2474.2916 2527.3236 3416.503 20
- # readRDS_no_compress 1274.8282 1395.2916 1516.4908 1513.2334 1658.3701 1816.422 20
- # readRDS 2118.5699 2252.7583 2390.4575 2385.7802 2518.7170 2830.373 20
- # read_feather 565.3394 612.3607 727.3408 621.3474 900.5532 1136.687 20
- # fread 57886.2133 58140.8123 59768.2511 58742.0473 60783.4750 65087.893 20
- # devtools::install_github("bwlewis/lz4")
- library(lz4)
- testDT <- rbindlist(replicate(2e5L, iris, simplify = FALSE))
- writeBin(lzCompress(serialize(testDT, NULL)), "test.lz4")
- testDT2 <- unserialize(lzDecompress(readBin("test.lz4", "raw", file.info("test.lz4")$size)))
- all.equal(testDT, testDT2) # TRUE
- microbenchmark(lz4write = local(writeBin(lzCompress(serialize(testDT, NULL)), "test.lz4")),
- lz4read = local(unserialize(lzDecompress(readBin("test.lz4", "raw", file.info("test.lz4")$size)))),
- times = 20L)
- # Unit: seconds
- # expr min lq mean median uq max neval
- # lz4write 3.425010 3.646188 3.753905 3.733799 3.841660 4.198919 20
- # lz4read 1.493515 1.566712 1.646142 1.603984 1.746855 1.797484 20
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement