Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(dplyr)
- library(magrittr)
- # data generation
- N_patient = 3000
- dat = sample(48, N_patient, replace = TRUE) %>% {
- cbind(rep(1:N_patient, times=.), unlist(sapply(., seq, from = 1)))
- } %>% cbind(replicate(6, sample(0:1, nrow(.), TRUE))) %>%
- tbl_dt() %>% setnames(c("id", "duration", paste0("M_", 1:6))) %>%
- arrange(id, duration)
- st = proc.time()
- transitMatrix_eachTime = dat %>% split(.$id) %>% lapply(function(dd){
- out = lapply(1:47, function(x) matrix(0, 6, 6))
- if (nrow(dd) > 1)
- {
- tmp = dd %>% select(3:8) %>% as.matrix()
- for(i in 2:nrow(dd))
- {
- transitMatrix = matrix(0, 6, 6);
- if (sum(tmp[i-1,]) > 0)
- transitMatrix[tmp[i-1,]==1,] =
- t(replicate(sum(tmp[i-1,]), tmp[i,]))
- out[[i-1]] = transitMatrix
- }
- }
- out
- }) %>%
- Reduce(function(x, y) lapply(1:length(x),
- function(v) x[[v]]+y[[v]]), .) %>%
- lapply(function(x) x / ifelse(rowSums(x)> 0, rowSums(x), 1))
- proc.time() - st
- # user system elapsed
- # 32.10 0.19 33.26
- library(Rcpp)
- library(RcppArmadillo)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppArmadillo)]]
- #include <RcppArmadillo.h>
- using namespace Rcpp;
- using namespace arma;
- // [[Rcpp::export]]
- List transitMatrix_f(umat M, uword maxDuration)
- {
- uword maxDuration_id = M.n_rows;
- List transitMatrixList(maxDuration-1);
- umat transitMatrix(M.n_cols, M.n_cols);
- urowvec previous_M(M.n_cols);
- for (uword i = 1; i < maxDuration; i++)
- {
- transitMatrix.zeros();
- if ( i < maxDuration_id)
- {
- previous_M = M.row(i-1);
- if (any(previous_M==1))
- transitMatrix.rows(find(previous_M==1)) = repmat(M.row(i), sum(previous_M), 1);
- }
- transitMatrixList[i-1] = wrap(transitMatrix);
- }
- return transitMatrixList;
- }')
- library(RcppEigen)
- sourceCpp(code = '
- // [[Rcpp::depends(RcppEigen)]]
- #include <RcppEigen.h>
- using namespace Rcpp;
- using Eigen::Map;
- using Eigen::MatrixXd;
- using Eigen::VectorXd;
- // [[Rcpp::export]]
- void list_sum_f(List Xr, List Yr) {
- for(int i = 0; i < Xr.size(); i++)
- Yr[i] = as< Map<MatrixXd> >(Xr[i]) + as< Map<MatrixXd> >(Yr[i]);
- }
- // [[Rcpp::export]]
- List listAddition(List Xr) {
- int n = Xr.size();
- List list_sum = Xr[0];
- for(int j = 1; j < n; j++)
- list_sum_f(Xr[j], list_sum);
- return list_sum;
- }')
- st = proc.time()
- maxDuration = max(dat$duration)
- transitMatrix_eachTime2 = dat %>% split(.$id) %>% lapply(function(x){
- transitMatrix_f(x %>% select(-id, -duration) %>% as.matrix(),
- maxDuration)
- }) %>% listAddition() %>%
- lapply(function(x) x / ifelse(rowSums(x)> 0, rowSums(x), 1))
- proc.time() - st
- # user system elapsed
- # 14.68 0.20 15.27
- library(reshape2)
- library(Matrix)
- st = proc.time()
- dat_previous = dat %>% group_by(id) %>%
- mutate_(.dots = paste0("c(0, M_", 1:6, "[-length(M_1)])")) %>%
- setnames(old = tail(names(.), 6), new = paste0("M_", 1:6, "p"))
- dat_transform_1 = dat_previous %>%
- melt(id = c("id", "duration"), measure = paste0("M_", 1:6)) %>%
- filter(value == 1, duration > 1) %>% select(-value) %>%
- transform(variable = as.numeric(substr(variable, 3, 3))) %>%
- setnames("variable", "M") %>% setkey(id, duration)
- dat_transform_2 = dat_previous %>%
- melt(id = c("id","duration"), measure = paste0("M_", 1:6, "p")) %>%
- filter(value == 1) %>% select(-value) %>%
- mutate(variable = as.numeric(substr(variable, 3, 3))) %>%
- setnames("variable", "M_p") %>% setkey(id, duration)
- dat_combined = dat_transform_1[dat_transform_2, allow.cartesian=TRUE] %>% filter(!is.na(M), !is.na(M_p))
- transitMatrix_eachTime3 = dat_combined %>% group_by(duration, M, M_p) %>%
- summarise(count = n()) %>% group_by(duration, M_p) %>%
- mutate(transitProb = count / sum(count)) %>% ungroup() %>%
- split(.$duration) %>%
- lapply(function(x) spMatrix(6, 6, x$M_p, x$M, x$transitProb))
- proc.time() - st
- # user system elapsed
- # 2.15 0.31 2.62
- st = proc.time()
- dat_transform = dat %>%
- melt(id = c("id", "duration"), measure = paste0("M_", 1:6)) %>%
- filter(value == 1) %>% select(-value) %>%
- transform(variable = as.numeric(substr(variable, 3, 3))) %>%
- setnames("variable", "M") %>% setkey(id, duration)
- dat_combined = dat_transform %>% filter(duration > 1) %>%
- inner_join(dat_transform %>% transform(duration = duration + 1),
- by = c("id", "duration"))
- transitMatrix_eachTime5 = dat_combined %>%
- group_by(duration, M.x, M.y) %>% summarise(count = n()) %>%
- group_by(duration, M.y) %>%
- mutate(transitProb = count / sum(count)) %>% ungroup() %>%
- split(.$duration) %>%
- lapply(function(x) spMatrix(6, 6, x$M.y, x$M.x, x$transitProb))
- proc.time() - st
- # user system elapsed
- # 1.25 0.19 1.45
- library(tidyr)
- st = proc.time()
- dat_transform = dat %>% gather(M, value, M_1:M_6) %>%
- filter(value == 1) %>% select(-value) %>%
- transform(M = as.numeric(substr(M, 3, 3)))
- dat_combined = dat_transform %>% filter(duration > 1) %>%
- inner_join(dat_transform %>% transform(duration = duration + 1),
- by = c("id", "duration"))
- transitMatrix_eachTime5 = dat_combined %>%
- group_by(duration, M.x, M.y) %>% summarise(count = n()) %>%
- group_by(duration, M.y) %>%
- mutate(transitProb = count / sum(count)) %>% ungroup() %>%
- split(.$duration) %>%
- lapply(function(x) spMatrix(6, 6, x$M.y, x$M.x, x$transitProb))
- proc.time() - st
- # user system elapsed
- # 1.28 0.12 1.40
- all.equal(transitMatrix_eachTime, transitMatrix_eachTime2)
- # TRUE
- all.equal(transitMatrix_eachTime, transitMatrix_eachTime3 %>%
- lapply(as.matrix) %>% set_names(NULL))
- # TRUE
- all.equal(transitMatrix_eachTime, transitMatrix_eachTime4 %>%
- lapply(as.matrix) %>% set_names(NULL))
- # TRUE
- all.equal(transitMatrix_eachTime, transitMatrix_eachTime5 %>%
- lapply(as.matrix) %>% set_names(NULL))
- # TRUE
Advertisement
Add Comment
Please, Sign In to add comment