celestialgod

vectorization example

Jun 13th, 2018
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.10 KB | None | 0 0
  1. library(pipeR)
  2. library(data.table)
  3.  
  4. ## data generation
  5. N <- 5e5
  6. dat <- data.table(paste("sample", 1:N), matrix(rnorm(N*60), ncol = 60)) %>>%
  7.   setnames(c("Name", paste0(rep(c("X", "Y"), each = 30), rep(1:30,2))))
  8. mat <- as.matrix(dat[ , 2:61])
  9.  
  10. ## for loop
  11. # 前一版用data.tabe取值是不好的示範,不要學
  12. st <- proc.time()
  13. corCoef <- vector("numeric", N)
  14. for (i in 1:N)
  15.   corCoef[i] <- cor(mat[i, 1:30], mat[i, 31:60])
  16. proc.time() - st
  17. #  user  system elapsed
  18. # 15.53    0.06   15.66
  19.  
  20. ## parallel version
  21. # 新增平行版本
  22. library(parallel)
  23. cl <- makeCluster(28)
  24. st <- proc.time()
  25. output <- parRapply(cl, mat, function(x) cor(x[1:30], x[31:60]))
  26. proc.time() - st
  27. # user  system elapsed
  28. # 0.41    1.56    3.52
  29. stopCluster(cl)
  30.  
  31. ## melt
  32. # 全面移除dplyr做法
  33. st <- proc.time()
  34. output <- dat %>>% `[`(j = G := substring(variable, 1L, 1L)) %>>%
  35.   `[`(j = .(cor = cor(value[G == "X"], value[G == "Y"])), by = .(Name))
  36. proc.time() - st
  37. #  user   system elapsed
  38. #  18.12    0.92   18.68
  39.  
  40. ## vectorization
  41. library(matrixStats)
  42. st <- proc.time()
  43. mu_X <- rowMeans(mat[ , 1:30])
  44. mu_Y <- rowMeans(mat[ , 31:60])
  45. sigma_X <- rowSds(mat[ , 1:30])
  46. sigma_Y <- rowSds(mat[ , 31:60])
  47. myCorr <- rowSums((mat[ , 1:30]-mu_X)*(mat[ , 31:60]-mu_Y))/((sigma_X*sigma_Y)*(30-1))
  48. proc.time() - st
  49. #  user  system elapsed
  50. #  0.65    0.02    0.67
  51.  
  52. ## Rcpp
  53. library(Rcpp)
  54. library(RcppArmadillo)
  55. sourceCpp(code = '
  56. // [[Rcpp::depends(RcppArmadillo)]]
  57. #include <RcppArmadillo.h>
  58. using namespace Rcpp;
  59. using namespace arma;
  60. // [[Rcpp::export]]
  61.  
  62. NumericVector fastCor(NumericMatrix Xr, NumericMatrix Yr) {
  63.  uword k = Xr.ncol();
  64.  mat X(Xr.begin(), Xr.nrow(), k, false);
  65.  mat Y(Yr.begin(), Xr.nrow(), k, false);
  66.  colvec output(Xr.nrow());
  67.  
  68.  X.each_col() -= mean(X, 1);
  69.  X.each_col() /= stddev(X, 0, 1);
  70.  Y.each_col() -= mean(Y, 1);
  71.  Y.each_col() /= stddev(Y, 0, 1);
  72.  output = sum(X % Y, 1) / ((double) k - 1);
  73.  return wrap(output);
  74. }')      
  75.  
  76. st <- proc.time()
  77. output3 <- fastCor(mat[ , 1:30], mat[ , 31:60])
  78. proc.time() - st
  79. #  user  system elapsed
  80. #  0.29    0.08    0.37
Advertisement
Add Comment
Please, Sign In to add comment