Advertisement
celestialgod

merge with function aggregation with Rcpp

Oct 13th, 2015
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.01 KB | None | 0 0
  1. library(data.table)
  2. library(plyr)
  3. library(dplyr)
  4. library(purrr)
  5. library(magrittr)
  6. library(Rcpp)
  7.  
  8. sourceCpp(code = '
  9. #include <Rcpp.h>
  10. #include <string>
  11. #include <vector>
  12. using namespace Rcpp;
  13. using namespace std;
  14.  
  15. NumericVector add_narm(NumericVector x, NumericVector y) {
  16.  NumericVector out(x.size());
  17.  NumericVector::iterator x_it,y_it,out_it;
  18.  for (x_it = x.begin(), y_it=y.begin(), out_it = out.begin();
  19.       x_it != x.end(); ++x_it, ++y_it, ++out_it) {
  20.    if (ISNA(*x_it)) {
  21.      *out_it = *y_it;
  22.    } else if (ISNA(*y_it)) {
  23.      *out_it = *x_it;
  24.    } else {
  25.      *out_it = *x_it + *y_it;
  26.    }
  27.  }
  28.  return out;
  29. }
  30.  
  31. // [[Rcpp::export]]
  32. DataFrame merge_all_cpp(List dfs){
  33.  DataFrame out(dfs[0]);
  34.  vector<string> df_names = out.names();
  35.  for (int i = 1; i < dfs.length(); i++)
  36.  {
  37.    DataFrame tmp(dfs[i]);
  38.    vector<string> tmp_names = tmp.names();
  39.    for (int j = 0; j < tmp.length(); j++)
  40.    {
  41.      if (find(df_names.begin(), df_names.end(), tmp_names[j]) != df_names.end())
  42.      {
  43.        out[tmp_names[j]] = add_narm(
  44.          as<NumericVector>(out[tmp_names[j]]),
  45.          as<NumericVector>(tmp[tmp_names[j]]));
  46.      }
  47.      else
  48.      {
  49.        out.push_back(tmp[tmp_names[j]], tmp_names[j]);
  50.        df_names.push_back(tmp_names[j]);
  51.      }
  52.    }
  53.  }
  54.  return out;
  55. }');
  56.  
  57. t = proc.time()
  58. dfs = list(w,x,y,z)
  59. by = "SP"
  60. overall_keys = list(w,x,y,z) %>% map(~select_(., .dots = by)) %>% rbindlist %>% distinct
  61. dfs %<>% map(~merge(., overall_keys, by = by, all = TRUE))
  62. ## not use purrr
  63. # overall_keys = dfs %>% lapply(function(x) select_(x, .dots = by)) %>% rbindlist %>% distinct
  64. # dfs %<>% lapply(function(x) merge(x, overall_keys, by = by, all = TRUE))
  65. f = function(x) x / length(dfs)
  66. out7 = merge_all_cpp(dfs) %>% tbl_dt(FALSE) %>% mutate_each_(funs(f), by)
  67. proc.time() - t
  68. #   user  system elapsed
  69. #   1.46    0.11    1.57
  70. out7 %<>% select_(.dots = names(out4)) %>%
  71.   arrange_(.dots = by)  %>% mutate_each(funs(as.integer))
  72. all.equal(out4, out7) # TRUE
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement