Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(plyr)
- library(dplyr)
- library(purrr)
- library(magrittr)
- library(Rcpp)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppArmadillo)]]
- #include <RcppArmadillo.h>
- // #include <Rcpp.h>
- #include <string>
- #include <vector>
- #include <algorithm>
- using namespace Rcpp;
- using namespace std;
- using namespace arma;
- // [[Rcpp::export]]
- NumericVector add_narm(SEXP xs, SEXP ys){
- NumericVector xr(xs);
- NumericVector yr(ys);
- colvec x(xr.begin(), xr.size(), false);
- colvec y(yr.begin(), yr.size(), false);
- uvec loc_na_x = find_nonfinite(x);
- uvec loc_na_y = find_nonfinite(y);
- colvec z = x + y;
- vector<uword> loc_na_std;
- set_intersection(loc_na_x.begin(), loc_na_x.end(),
- loc_na_y.begin(), loc_na_y.end(),
- back_inserter(loc_na_std));
- uvec loc_na = conv_to<uvec>::from(loc_na_std);
- x.elem(loc_na_x).zeros();
- y.elem(loc_na_y).zeros();
- z = x + y;
- z.elem(loc_na).fill(NA_REAL);
- return wrap(z);
- }
- // [[Rcpp::export]]
- DataFrame merge_all_cpp(List dfs){
- DataFrame out(dfs[0]);
- vector<string> df_names = out.names();
- for (int i = 1; i < dfs.length(); i++)
- {
- DataFrame tmp(dfs[i]);
- vector<string> tmp_names = tmp.names();
- for (int j = 0; j < tmp.length(); j++)
- {
- if (find(df_names.begin(), df_names.end(), tmp_names[j]) != df_names.end())
- out[tmp_names[j]] = add_narm(out[tmp_names[j]], tmp[tmp_names[j]]);
- else
- {
- out.push_back(tmp[tmp_names[j]], tmp_names[j]);
- df_names.push_back(tmp_names[j]);
- }
- }
- }
- return out;
- }');
- t = proc.time()
- dfs = list(w,x,y,z)
- by = "SP"
- overall_keys = dfs %>% map(~select_(., .dots = by)) %>% rbindlist %>% distinct
- dfs %<>% map(~merge(., overall_keys, by = by, all = TRUE))
- ## not use purrr
- # overall_keys = dfs %>% lapply(function(x) select_(x, .dots = by)) %>% rbindlist %>% distinct
- # dfs %<>% lapply(function(x) merge(x, overall_keys, by = by, all = TRUE))
- f = function(x) x / length(dfs)
- f = function(x) x / length(dfs)
- out8 = merge_all_cpp(dfs) %>% tbl_dt(FALSE) %>% mutate_each_(funs(f), by)
- proc.time() - t
- # user system elapsed
- # 0.04 0.03 0.12
- out8 %<>% select_(.dots = names(out4)) %>%
- arrange_(.dots = by) %>% mutate_each(funs(as.integer))
- all.equal(out4, out8) # TRUE
Advertisement
Add Comment
Please, Sign In to add comment