Advertisement
SteveWeston

Sequential data table benchmark V2

Sep 16th, 2013
296
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 0.61 KB | None | 0 0
  1. library(data.table)
  2.  
  3. args <- commandArgs(trailingOnly=TRUE)
  4. n <- if (length(args) > 0) as.integer(args[1]) else 1000000
  5. m <- if (length(args) > 1) as.integer(args[2]) else ceiling(n / 100)
  6. set.seed(107)
  7. td <- data.table(val=rnorm(n), id=sample(m, n, replace=TRUE))
  8.  
  9. start <- proc.time()[3]
  10. td[, means := mean(td$val[-.I]), by = id]
  11. elapsed <- proc.time()[3] - start
  12.  
  13. res <- td$means
  14. cat("Sequential data table version:\n")
  15. cat(sprintf("Rows: %d, Unique IDs: %d\n", length(td$id), length(unique(td$id))))
  16. cat(sprintf("Elapsed time: %f\n", elapsed))
  17.  
  18. library(digest)
  19. cat(sprintf("MD5 hash: %s\n", digest(res)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement