Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # data generation
- set.seed(100)
- numData = 1e4
- dat = replicate(numData, as.integer(sample(1:100, 10)))
- # method 1
- st = proc.time()
- count = vector('integer', numData)
- for (i in 1:ncol(dat))
- count[i] = sum(colSums(matrix(dat %in% dat[,i], 10)) >= 5) - 1
- proc.time() - st
- # user system elapsed
- # 22.84 0.91 23.74
- # method 2 by Rcpp
- library(Rcpp)
- library(RcppArmadillo)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppArmadillo)]]
- #define ARMA_DONT_USE_CXX11
- #include <RcppArmadillo.h>
- using namespace Rcpp;
- using namespace arma;
- // [[Rcpp::export]]
- Col<int> count_cpp(IntegerMatrix xr) {
- Mat<int> x(xr.begin(), xr.nrow(), xr.ncol(), false);
- Col<int> out = zeros< Col<int> >(xr.ncol());
- int count;
- for (uword i = 0; i < x.n_cols; i++)
- {
- for (uword j = 0; j < x.n_cols; j++)
- {
- count = 0;
- for (uword k = 0; k < x.n_rows; k++)
- for (uword l = 0; l < x.n_rows; l++)
- if (x(k, j) == x(l, i))
- count++;
- if (count >= 5)
- out(i)++;
- }
- }
- return out;
- }')
- st = proc.time()
- count2 = count_cpp(dat) - 1
- proc.time() - st
- # user system elapsed
- # 7.28 0.01 7.30
- # method 3 by Rcpp and RcppParallel
- library(Rcpp)
- library(RcppArmadillo)
- library(RcppParallel)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppArmadillo, RcppParallel)]]
- // [[Rcpp::plugins("cpp11")]]
- #include <RcppArmadillo.h>
- #include <RcppParallel.h>
- using namespace Rcpp;
- using namespace arma;
- using namespace RcppParallel;
- struct CountWorker: public Worker {
- Mat<int>& tableMat;
- Mat<int>& data;
- Col<int>& output;
- CountWorker(Mat<int>& tableMat, Mat<int>& data, Col<int>& output) :
- tableMat(tableMat), data(data), output(output) {}
- void operator()(std::size_t begin, std::size_t end) {
- for (std::size_t i = begin; i < end; i++)
- {
- uvec tmp = find(sum(tableMat.cols(conv_to<uvec>::from(data.col(i))-1), 1) >= 5);
- output(i) = tmp.n_elem;
- }
- }
- };
- // [[Rcpp::export]]
- Col<int> count_cpp(IntegerMatrix xr, IntegerVector tableVecr) {
- Mat<int> x(xr.begin(), xr.nrow(), xr.ncol(), false);
- Col<int> tableVec(tableVecr.begin(), tableVecr.size(), false);
- Mat<int> tableMat = zeros< Mat<int> >(x.n_cols, tableVec.n_elem);
- Col<int> output = zeros< Col<int> >(x.n_cols);
- for (uword i = 0; i < x.n_cols; i++)
- for (uword j = 0; j < x.n_rows; j++)
- tableMat(i, x(j, i)-1)++;
- CountWorker countWorker(tableMat, x, output);
- parallelFor(0, x.n_cols, countWorker);
- return output;
- }')
- st = proc.time()
- tmp = unique(sort(dat))
- count3 = count_cpp(dat, tmp) - 1
- proc.time() - st
- # user system elapsed
- # 1.16 0.06 0.28
- # method 4 modified version of the codes written by Edster
- st = proc.time()
- Y = unique(sort(dat))
- Z = matrix(0, ncol(dat), length(Y))
- count4 = vector('numeric', ncol(dat))
- for(i in 1:ncol(dat))
- for(j in 1:10)
- Z[i, dat[j, i]] = 1
- for(i in 1:ncol(dat))
- count4[i] = sum(rowSums(Z[, dat[,i]]) >= 5) - 1
- proc.time() - st
- # user system elapsed
- # 7.10 1.31 8.47
- all.equal(count, as.vector(count2))
- all.equal(count, as.vector(count3))
- all.equal(count, as.vector(count4))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement