Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(doParallel)
- library(foreach)
- require(snowfall)
- library(microbenchmark)
- sfInit(parallel = TRUE, cpus = 6L)
- cl <- sfGetCluster()
- registerDoParallel(cl)
- a <- rnorm(1e3)
- b <- rnorm(1e4)
- d <- 0.5
- e <- rnorm(1e5) # unused variables
- # f is not a good method to get that result, it is just for benchmark
- f <- function(x) {
- sum <- 0
- for (i in seq(1, x)) sum <- sum + (mean(a) - mean(b))*d*i
- return(sum)
- }
- sfExport("a", "b", "d")
- clusterExport(cl, c("a", "b", "d"))
- g1 <- function(x) {
- out1 <- vector("numeric", length = 100)
- for (i in 1:1000) out1[[i]] <- f(i)
- return(out1)
- }
- g2 <- function(x) {
- out2 <- sapply(1:1000, f)
- return(out2)
- }
- g3 <- function(x) {
- out3 <- sfSapply(1:1000, f)
- return(out3)
- }
- g4 <- function(x) {
- out4 <- parSapply(cl, 1:1000, f)
- return(out4)
- }
- g5 <- function(x) {
- out5 <- foreach(i = 1:1000, .combine = c, .export = "f") %dopar% f(i)
- return(out5)
- }
- microbenchmark(g1(), g2(), g3(), g4(), g5(), times = 20L)
- # Unit: seconds
- # expr min lq mean median uq max neval
- # g1() 12.197837 12.289874 12.364563 12.351551 12.460248 12.549038 20
- # g2() 12.149057 12.213601 12.274867 12.262235 12.318187 12.548602 20
- # g3() 3.846017 3.983526 4.065733 4.047937 4.075122 4.530810 20
- # g4() 3.933617 3.973013 4.030316 4.016203 4.074077 4.224589 20
- # g5() 2.764633 2.800048 2.861919 2.845468 2.914351 3.008172 20
- # find the main difference
- library(profvis)
- profvis(g3()) # using clusterApply
- profvis(g5()) # using clusterApplyLB
- # clusterApplyLB is a load balancing version of clusterApply. If the length p of seq is not greater
- # than the number of nodes n, then a job is sent to p nodes. Otherwise the first n jobs are placed
- # in order on the n nodes. When the first job completes, the next job is placed on the node that
- # has become free; this continues until all jobs are complete. Using clusterApplyLB can result in
- # better cluster utilization than using clusterApply, but increased communication can reduce performance.
- # Furthermore, the node that executes a particular job is non-deterministic.
- g6 <- function(x) {
- out6 <- clusterApplyLB(cl, 1:1000, f)
- return(out6)
- }
- g7 <- function(x) {
- out7 <- parSapplyLB(cl, 1:1000, f)
- return(out7)
- }
- library(plyr)
- g8 <- function(x) {
- out8 <- laply(1:1000, f, .parallel = TRUE)
- return(out8)
- }
- microbenchmark(g5(), g6(), g7(), g8(), times = 20L)
- # Unit: seconds
- # expr min lq mean median uq max neval
- # g5() 2.811597 2.840651 2.899999 2.860215 2.925780 3.193207 20
- # g6() 2.627146 2.640597 2.692733 2.680902 2.734413 2.816154 20
- # g7() 3.949628 3.978429 4.045335 4.029595 4.068357 4.384041 20
- # g8() 2.866590 2.883808 2.927684 2.909651 2.957875 3.070483 20
- sfStop()
- rm(cl)
- # without parallel
- f2 <- function(x) rnorm(5)
- o1 <- foreach(i = 1:2, .combine = cbind) %do% f2(i)
- o2 <- sapply(cl, 1:2, f2)
- o3 <- foreach(i = 1, .combine = cbind) %do% f2(i)
- o4 <- sapply(cl, 1, f2)
- class(o1) # "matrix"
- class(o2) # "matrix"
- class(o3) # "numeric"
- class(o4) # "matrix"
Advertisement
Add Comment
Please, Sign In to add comment