Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(pipeR)
- library(plyr)
- library(data.table)
- library(dtplyr)
- library(dplyr)
- library(tidyr)
- # data generation
- num_csv <- 3e3
- num_xlvls <- 30
- num_ylvls <- 30
- timePoints <- expand.grid(paste0("a", 1:num_xlvls),
- paste0("b", 1:num_ylvls),
- stringsAsFactors = FALSE)
- csv_files <- lapply(1:num_csv, function(i){
- dat <- sample(1:nrow(timePoints), sample(400:600, 1)) %>>%
- rep(each = 5) %>>% (timePoints[., ]) %>>%
- modifyList(setNames(lapply(1:3, function(j){
- sample(1:100, nrow(.), TRUE)
- }), paste0("Var", 3:5))) %>>% tbl_df
- })
- selectedCol <- c(3, 5)
- st <- proc.time()
- outRes2 <- lapply(csv_files, function(subdf, pasteCols){
- aggFml <- as.formula(paste0("~paste(", paste0(pasteCols, collapse = ","), ",sep=';')"))
- tmpdf <- subdf %>>% mutate_(.dots = setNames(
- list(lazyeval::interp(aggFml, .values = environment())), "tmp")) %>>%
- group_by(Var1, Var2) %>>%
- summarise(values = paste(tmp, collapse=';')) %>>% ungroup
- numCols <- length(strsplit(tmpdf$values[1], ";")[[1]])
- tmpdf %>>% separate(values, paste0("V", 1:numCols)) %>>%
- mutate_each(funs(as.integer(.)), -Var1, -Var2)
- }, pasteCols = paste0("Var", selectedCol)) %>>% bind_rows
- proc.time() - st
- # user system elapsed
- # 66.91 0.09 67.75
- st <- proc.time()
- outRes3 <- lapply(csv_files, function(subdf, pasteCols){
- evalStr <- paste0("paste(", paste0(pasteCols, collapse = ","), ",sep=';')")
- tmpDT <- subdf %>>% data.table %>>% `[`( , tmp1 := eval(parse(text = evalStr))) %>>%
- `[`( , list(tmp = paste(tmp1, collapse = ";")), by = c("Var1", "Var2"))
- numCols <- length(strsplit(tmpDT$tmp[1], ";")[[1]])
- tmpDT %>>% `[`( , `:=`(paste0("V", 1:numCols), tstrsplit(tmp, ";"))) %>>%
- `[`( , tmp := NULL) %>>%
- `[`( , `:=`(paste0("V", 1:numCols), lapply(.SD, as.integer)),
- .SDcols = paste0("V", 1:numCols))
- }, pasteCols = paste0("Var", selectedCol)) %>>% rbindlist
- proc.time() - st
- # user system elapsed
- # 36.70 0.89 37.32
- st <- proc.time()
- outRes4 <- lapply(csv_files, function(subdf, pasteCols){
- aggFml <- as.formula(paste0("~paste(", paste0(pasteCols, collapse = ","), ",sep=';')"))
- tmpdf <- subdf %>>% tbl_dt %>>% mutate_(.dots = setNames(
- list(lazyeval::interp(aggFml, .values = environment())), "tmp")) %>>%
- group_by(Var1, Var2) %>>% summarise(values = paste(tmp, collapse=';')) %>>% ungroup
- numCols <- length(strsplit(tmpdf$values[1], ";")[[1]])
- tmpdf %>>% separate(values, paste0("V", 1:numCols)) %>>%
- mutate_each(funs(as.integer(.)), -Var1, -Var2)
- }, pasteCols = paste0("Var", selectedCol)) %>>% bind_rows
- proc.time() - st
- # user system elapsed
- # 55.94 1.34 57.13
- # print out the versions of every used pkgs
- # sessionInfo()
- # R version 3.2.5 (2016-04-14)
- # Platform: x86_64-w64-mingw32/x64 (64-bit)
- # Running under: Windows 7 x64 (build 7601) Service Pack 1
- #
- # locale:
- # [1] LC_COLLATE=Chinese (Traditional)_Taiwan.950 LC_CTYPE=Chinese (Traditional)_Taiwan.950
- # [3] LC_MONETARY=Chinese (Traditional)_Taiwan.950 LC_NUMERIC=C
- # [5] LC_TIME=Chinese (Traditional)_Taiwan.950
- #
- # attached base packages:
- # [1] stats graphics grDevices utils datasets methods base
- #
- # other attached packages:
- # 1] tidyr_0.5.1.9000 data.table_1.9.7 dplyr_0.5.0.9000 dtplyr_0.0.1.9000
- # [5] plyr_1.8.4.9000 pipeR_0.6.1.3 RevoUtilsMath_3.2.5
- #
- # loaded via a namespace (and not attached):
- # [1] Rcpp_0.12.5.2 assertthat_0.1.0.99 digest_0.6.9 withr_1.0.1
- # [5] chron_2.3-47 R6_2.1.2 DBI_0.4-1 magrittr_1.5
- # [9] git2r_0.14.0 httr_1.1.0 stringi_1.0-1 curl_0.9.7
- # [13] lazyeval_0.1.10.9000 devtools_1.11.1 tools_3.2.5 memoise_1.0.0
- # [17] knitr_1.12.3 tibble_1.0
Advertisement
Add Comment
Please, Sign In to add comment