Advertisement
SteveWeston

foreach/doParallel/PSOCK benchmark V2

Sep 16th, 2013
325
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.06 KB | None | 0 0
  1. suppressMessages(library(doParallel))
  2. library(itertools)
  3.  
  4. args <- commandArgs(trailingOnly=TRUE)
  5. n <- if (length(args) > 0) as.integer(args[1]) else 1000000
  6. m <- if (length(args) > 1) as.integer(args[2]) else ceiling(n / 100)
  7. set.seed(107)
  8. td <- data.frame(val=rnorm(n), id=sample(m, n, replace=TRUE))
  9.  
  10. cl <- makePSOCKcluster(4)
  11. registerDoParallel(cl)
  12. workers <- getDoParWorkers()
  13.  
  14. vadd <- function(a, ...) {
  15.   for (v in list(...))
  16.     a <- a + v
  17.   a
  18. }
  19.  
  20. start <- proc.time()[3]
  21. res <- foreach(ids=isplitVector(unique(td$id), chunks=workers),
  22.                .combine='vadd',
  23.                .multicombine=TRUE,
  24.                .inorder=FALSE) %dopar% {
  25.   r <- rep(0, NROW(td))
  26.   for (i in ids)
  27.     r[td$id == i] <- mean(td$val[td$id != i])
  28.   r
  29. }
  30. elapsed <- proc.time()[3] - start
  31.  
  32. cat(sprintf("foreach/doParallel/PSOCK with %d workers:\n", workers))
  33. cat(sprintf("Rows: %d, Unique IDs: %d\n", length(td$id), length(unique(td$id))))
  34. cat(sprintf("Elapsed time: %f\n", elapsed))
  35.  
  36. library(digest)
  37. cat(sprintf("MD5 hash: %s\n", digest(res)))
  38.  
  39. stopCluster(cl)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement