celestialgod

column merge and split

Jul 7th, 2016
256
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.91 KB | None | 0 0
  1. library(pipeR)
  2. library(plyr)
  3. library(dplyr)
  4. library(tidyr)
  5.  
  6. # data generation (csv_files就是你的ldf)
  7. num_csv <- 3e3
  8. num_xlvls <- 30
  9. num_ylvls <- 30
  10. timePoints <- expand.grid(paste0("a", 1:num_xlvls),
  11.                           paste0("b", 1:num_ylvls),
  12.                           stringsAsFactors = FALSE)
  13. csv_files <- lapply(1:num_csv, function(i){
  14.   dat <- sample(1:nrow(timePoints), sample(400:600, 1)) %>>%
  15.     rep(each = 5) %>>% (timePoints[., ]) %>>%
  16.     modifyList(setNames(lapply(1:3, function(j){
  17.       sample(1:100, nrow(.), TRUE)
  18.     }), paste0("Var", 3:5))) %>>% tbl_df
  19. })
  20.  
  21. # 整併 + 分開
  22. st <- proc.time()
  23. outRes1 <- lapply(csv_files, function(subdf){
  24.   nest(subdf, -Var1, -Var2) %>>%
  25.     mutate(data = lapply(data, function(x){
  26.       as.matrix(x) %>>% t %>>% as.vector %>>% paste(collapse = ";")
  27.     })) %>>% unnest(data) %>>% separate(data, paste0("V", 1:15)) %>>%
  28.     mutate_each(funs(as.integer(.)), -Var1, -Var2)
  29. }) %>>% bind_rows
  30. proc.time() - st
  31. #   user  system elapsed
  32. # 263.93    0.02  265.86
  33.  
  34. st <- proc.time()
  35. outRes2 <- lapply(csv_files, function(subdf){
  36.   subdf %>>% gather(vars, values, -Var1, -Var2) %>>%
  37.     group_by(Var1, Var2) %>>%
  38.     summarise(tmp = paste(values, collapse = ";")) %>>% ungroup %>>%
  39.     separate(tmp, paste0("V", 1:15)) %>>%
  40.     mutate_each(funs(as.integer(.)), -Var1, -Var2)
  41. }) %>>% bind_rows
  42. proc.time() - st
  43. #  user  system elapsed
  44. # 70.01    0.00   70.25
  45.  
  46. library(data.table)
  47. st <- proc.time()
  48. outRes3 <- lapply(csv_files, function(subdf){
  49.   subdf %>>% data.table %>>% melt(c("Var1", "Var2")) %>>%
  50.     `[`( , list(tmp = paste(value, collapse = ";")), by = c("Var1", "Var2")) %>>%
  51.     `[`( , `:=`(paste0("V", 1:15), tstrsplit(tmp, ";"))) %>>%
  52.     `[`( , tmp := NULL) %>>%
  53.     `[`( , `:=`(paste0("V", 1:15), lapply(.SD, as.integer)), .SDcols = V1:V15)
  54. }) %>>% rbindlist
  55. proc.time() - st
  56. #  user  system elapsed
  57. # 37.35    1.30   38.16
Advertisement
Add Comment
Please, Sign In to add comment