SteveWeston

foreach/doParallel/PSOCK benchmark

Sep 13th, 2013
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 0.80 KB | None | 0 0
  1. suppressMessages(library(doParallel))
  2. library(itertools)
  3.  
  4. set.seed(107)
  5. n <- 1000000
  6. m <- 10000
  7. td <- data.frame(val=rnorm(n), id=sample(m, n, replace=TRUE))
  8.  
  9. cl <- makePSOCKcluster(4)
  10. registerDoParallel(cl)
  11. workers <- getDoParWorkers()
  12.  
  13. vadd <- function(a, ...) {
  14.   for (v in list(...))
  15.     a <- a + v
  16.   a
  17. }
  18.  
  19. start <- proc.time()[3]
  20. res <- foreach(ids=isplitVector(unique(td$id), chunks=workers),
  21.                .combine='vadd',
  22.                .multicombine=TRUE,
  23.                .inorder=FALSE) %dopar% {
  24.   r <- rep(0, NROW(td))
  25.   for (i in ids)
  26.     r[td$id == i] <- mean(td$val[td$id != i])
  27.   r
  28. }
  29. elapsed <- proc.time()[3] - start
  30.  
  31. library(digest)
  32. cat(sprintf("foreach/doParallel/PSOCK with %d workers:\n", workers))
  33. cat(sprintf("Elapsed time: %f, MD5 hash: %s\n", elapsed, digest(res)))
Advertisement
Add Comment
Please, Sign In to add comment