celestialgod

combns_protein

Mar 30th, 2015
324
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.37 KB | None | 0 0
  1. library(Rcpp)
  2. library(RcppArmadillo)
  3. sourceCpp(code = '
  4. // [[Rcpp::depends(RcppArmadillo)]]
  5. #define ARMA_64BIT_WORD
  6. #include <RcppArmadillo.h>
  7. using namespace Rcpp;
  8. using namespace arma;
  9.  
  10. // [[Rcpp::export]]
  11. List combns_f2(IntegerVector comnbs_for_each_loc, IntegerVector exist_k,
  12.  List names_eliminate_k){
  13.  Col<int> X(comnbs_for_each_loc.begin(), comnbs_for_each_loc.size());
  14.  Col<int> Y(exist_k.begin(), exist_k.size(), false);
  15.  uvec loc_Y = find(Y >= 1);
  16.  X(loc_Y) -= 1;
  17.  loc_Y.insert_rows(loc_Y.n_elem, X.n_elem*ones<uvec>(1)-1);
  18.  List result_list(loc_Y.n_elem);
  19.  CharacterVector char_k = CharacterVector::create("K");
  20.  for (int i = 0; i < loc_Y.n_elem; i++)
  21.  {
  22.    uword size_mat = 1;
  23.    for (int k = 0; k <= loc_Y(i); k++)
  24.      size_mat *= (uword) X(k);
  25.    CharacterMatrix tmp_result(size_mat, loc_Y(i)+1);
  26.    for (int j = 0; j < (int) loc_Y(i)+1; j++)
  27.    {
  28.      if ( i < loc_Y.n_elem-1 && j == (int) loc_Y(i))
  29.        tmp_result(_, j) = rep(char_k, size_mat);
  30.      else
  31.        tmp_result(_, j) = rep(as<CharacterVector>(names_eliminate_k[j]),
  32.          size_mat / X(j));
  33.    }
  34.    result_list[i] = tmp_result;
  35.  }
  36.  return result_list;
  37. }
  38.  
  39. // [[Rcpp::export]]
  40. List combns_f(IntegerVector comnbs_for_each_loc, IntegerVector exist_k){
  41.  Col<int> X(comnbs_for_each_loc.begin(), comnbs_for_each_loc.size());
  42.  Col<int> Y(exist_k.begin(), exist_k.size(), false);
  43.  uvec loc_Y = find(Y >= 1);
  44.  X(loc_Y) -= 1;
  45.  loc_Y.insert_rows(loc_Y.n_elem, X.n_elem*ones<uvec>(1)-1);
  46.  List result_list(loc_Y.n_elem);
  47.  for (int i = 0; i < loc_Y.n_elem; i++)
  48.  {
  49.    uword size_mat = 1;
  50.    for (int k = 0; k <= loc_Y(i); k++)
  51.      size_mat *= (uword) X(k);
  52.    Mat<int> tmp_result(size_mat, loc_Y(i)+1);
  53.    for (int j = 0; j < (int) loc_Y(i)+1; j++)
  54.    {
  55.      if ( i < loc_Y.n_elem-1 && j == (int) loc_Y(i))
  56.        tmp_result.col(j) = repmat( Y(loc_Y(i)) *
  57.          ones<Col<int> >(1), size_mat, 1);
  58.      else
  59.        tmp_result.col(j) = repmat(
  60.          linspace<Col<int> >(1, X(j), X(j)), size_mat / X(j), 1);
  61.    }
  62.    result_list[i] = wrap(tmp_result);
  63.  }
  64.  return result_list;
  65. }')
  66.  
  67. # x_test = list("A", c("B1", "B2"), c("C1", "C2"), c("K", "D"), "E",
  68. #   c("F1", "F2"))
  69. set.seed(77)
  70. x = lapply(setdiff(LETTERS[1:14], "K"), function(a) paste0(a, 1:sample(1:5, 1)))
  71. x = lapply(x, function(y) if(runif(1) < 0.4){c(y, "K")} else{y})
  72.  
  73. t1 = proc.time()
  74. size_x = sapply(x, length)
  75. exist_k = as.integer(sapply(x, function(x) which(x=="K")))
  76. exist_k[which(is.na(exist_k))] = 0
  77. result = combns_f(size_x, exist_k)
  78. result_transform = vector('list', length(result))
  79. tmp_x = x
  80. for (j in 1:length(result))
  81. {
  82.   result_transform[[j]] = sapply(1:ncol(result[[j]]),
  83.     function(i) tmp_x[[i]][result[[j]][,i]])
  84.   if (j < length(result))
  85.     tmp_x[[which(exist_k>=1)[j]]] =
  86.       setdiff(tmp_x[[which(exist_k>=1)[j]]], "K")
  87. }
  88. proc.time() - t1
  89. #    user  system elapsed
  90. #    9.31    1.74   11.90
  91. object.size(result) # 704257576 bytes
  92. object.size(result_transform) # 1408520016 bytes
  93.  
  94. t2 = proc.time()
  95. size_x = sapply(x, length)
  96. exist_k = as.integer(sapply(x, function(x) which(x=="K")))
  97. exist_k[which(is.na(exist_k))] = 0
  98. result2 = combns_f2(size_x, exist_k, lapply(x, setdiff, y = "K"))
  99. proc.time() - t2
  100. #    user  system elapsed
  101. #    1.86    0.15    2.03
  102. object.size(result2) # 1408520016 bytes
  103. all.equal(result_transform, result2)
  104. # TRUE
Advertisement
Add Comment
Please, Sign In to add comment