Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(pipeR)
- library(plyr)
- library(dplyr)
- library(tidyr)
- # data generation (csv_files就是你的ldf)
- num_csv <- 3e3
- num_xlvls <- 30
- num_ylvls <- 30
- timePoints <- expand.grid(paste0("a", 1:num_xlvls),
- paste0("b", 1:num_ylvls),
- stringsAsFactors = FALSE)
- csv_files <- lapply(1:num_csv, function(i){
- dat <- sample(1:nrow(timePoints), sample(400:600, 1)) %>>%
- rep(each = 5) %>>% (timePoints[., ]) %>>%
- modifyList(setNames(lapply(1:3, function(j){
- sample(1:100, nrow(.), TRUE)
- }), paste0("Var", 3:5))) %>>% tbl_df
- })
- # 整併 + 分開
- st <- proc.time()
- outRes1 <- lapply(csv_files, function(subdf){
- nest(subdf, -Var1, -Var2) %>>%
- mutate(data = lapply(data, function(x){
- as.matrix(x) %>>% t %>>% as.vector %>>% paste(collapse = ";")
- })) %>>% unnest(data) %>>% separate(data, paste0("V", 1:15)) %>>%
- mutate_each(funs(as.integer(.)), -Var1, -Var2)
- }) %>>% bind_rows
- proc.time() - st
- # user system elapsed
- # 263.93 0.02 265.86
- st <- proc.time()
- outRes2 <- lapply(csv_files, function(subdf){
- subdf %>>% gather(vars, values, -Var1, -Var2) %>>%
- group_by(Var1, Var2) %>>%
- summarise(tmp = paste(values, collapse = ";")) %>>% ungroup %>>%
- separate(tmp, paste0("V", 1:15)) %>>%
- mutate_each(funs(as.integer(.)), -Var1, -Var2)
- }) %>>% bind_rows
- proc.time() - st
- # user system elapsed
- # 70.01 0.00 70.25
- library(data.table)
- st <- proc.time()
- outRes3 <- lapply(csv_files, function(subdf){
- subdf %>>% data.table %>>% melt(c("Var1", "Var2")) %>>%
- `[`( , list(tmp = paste(value, collapse = ";")), by = c("Var1", "Var2")) %>>%
- `[`( , `:=`(paste0("V", 1:15), tstrsplit(tmp, ";"))) %>>%
- `[`( , tmp := NULL) %>>%
- `[`( , `:=`(paste0("V", 1:15), lapply(.SD, as.integer)), .SDcols = V1:V15)
- }) %>>% rbindlist
- proc.time() - st
- # user system elapsed
- # 37.35 1.30 38.16
Advertisement
Add Comment
Please, Sign In to add comment