Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(pipeR)
- library(data.table)
- ## data generation
- N <- 5e5
- dat <- data.table(paste("sample", 1:N), matrix(rnorm(N*60), ncol = 60)) %>>%
- setnames(c("Name", paste0(rep(c("X", "Y"), each = 30), rep(1:30,2))))
- mat <- as.matrix(dat[ , 2:61])
- ## for loop
- # 前一版用data.tabe取值是不好的示範,不要學
- st <- proc.time()
- corCoef <- vector("numeric", N)
- for (i in 1:N)
- corCoef[i] <- cor(mat[i, 1:30], mat[i, 31:60])
- proc.time() - st
- # user system elapsed
- # 15.53 0.06 15.66
- ## parallel version
- # 新增平行版本
- library(parallel)
- cl <- makeCluster(28)
- st <- proc.time()
- output <- parRapply(cl, mat, function(x) cor(x[1:30], x[31:60]))
- proc.time() - st
- # user system elapsed
- # 0.41 1.56 3.52
- stopCluster(cl)
- ## melt
- # 全面移除dplyr做法
- st <- proc.time()
- output <- dat %>>% `[`(j = G := substring(variable, 1L, 1L)) %>>%
- `[`(j = .(cor = cor(value[G == "X"], value[G == "Y"])), by = .(Name))
- proc.time() - st
- # user system elapsed
- # 18.12 0.92 18.68
- ## vectorization
- library(matrixStats)
- st <- proc.time()
- mu_X <- rowMeans(mat[ , 1:30])
- mu_Y <- rowMeans(mat[ , 31:60])
- sigma_X <- rowSds(mat[ , 1:30])
- sigma_Y <- rowSds(mat[ , 31:60])
- myCorr <- rowSums((mat[ , 1:30]-mu_X)*(mat[ , 31:60]-mu_Y))/((sigma_X*sigma_Y)*(30-1))
- proc.time() - st
- # user system elapsed
- # 0.65 0.02 0.67
- ## Rcpp
- library(Rcpp)
- library(RcppArmadillo)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppArmadillo)]]
- #include <RcppArmadillo.h>
- using namespace Rcpp;
- using namespace arma;
- // [[Rcpp::export]]
- NumericVector fastCor(NumericMatrix Xr, NumericMatrix Yr) {
- uword k = Xr.ncol();
- mat X(Xr.begin(), Xr.nrow(), k, false);
- mat Y(Yr.begin(), Xr.nrow(), k, false);
- colvec output(Xr.nrow());
- X.each_col() -= mean(X, 1);
- X.each_col() /= stddev(X, 0, 1);
- Y.each_col() -= mean(Y, 1);
- Y.each_col() /= stddev(Y, 0, 1);
- output = sum(X % Y, 1) / ((double) k - 1);
- return wrap(output);
- }')
- st <- proc.time()
- output3 <- fastCor(mat[ , 1:30], mat[ , 31:60])
- proc.time() - st
- # user system elapsed
- # 0.29 0.08 0.37
Advertisement
Add Comment
Please, Sign In to add comment