celestialgod

data.table 整表

Sep 10th, 2016
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.19 KB | None | 0 0
  1. # Loading package
  2. library(plyr)
  3. library(dplyr)
  4. library(dtplyr)
  5. library(data.table)
  6. library(pipeR)
  7.  
  8. load('dat.RData')
  9. st <- proc.time()
  10. dat_list <- dat %>>% as.data.table %>>%
  11.   `[`( , `:=`(IsOpened = !is.na(IsOpened), IsClick = !is.na(IsClick))) %>>%
  12.   `[`( , grp := IsOpened * 3 + IsClick * 5) %>>%
  13.   `[`( , grp_split := ifelse(all(grp == 0), 1, ifelse(all(grp == 3 | grp == 0), 2, 3)),
  14.        by = "Account,Locale,Order_ID") %>>%
  15.   split(.$grp_split)
  16.  
  17. dat_list[[1]] <- dat_list[[1]] %>>% setorder(Account, Locale, Order_ID, Send_Date) %>>%
  18.   `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := as.character(IsClick))
  19. dat_list[[2]] <- dat_list[[2]] %>>%
  20.   setorder(Account, Locale, Order_ID, Opened_Date) %>>%
  21.   `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := as.character(IsClick))
  22. dat_list[[3]] <- dat_list[[3]] %>>% `[`(IsClick == 1) %>>%
  23.   setorder(Account, Locale, Order_ID, Click_Date) %>>%
  24.   `[`(, IsClick2 := paste(IsClick, collapse = ","), by = "Account,Locale,Order_ID") %>>%
  25.   `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := IsClick2) %>>%
  26.   `[`( , IsClick2 := NULL)
  27. dat_res_dt <- dat_list %>>% rbindlist
  28. proc.time() - st
  29. # user  system elapsed
  30. # 0.05    0.05    0.06
  31.  
  32. table(dat_res_dt$grp_split) # 540 416 43
  33.  
  34.  
  35.  
  36. load('dat.RData')
  37. st <- proc.time()
  38. dat_res <- dat %>>% mutate(tmp=paste(Account,Locale,Order_ID)) %>>% split(.$tmp) %>>%
  39.   llply(function(x){
  40.     if(any(!is.na(x$IsOpened))) {
  41.       x <- x %>>% filter(!is.na(IsOpened))
  42.       if(any(!is.na(x$IsClick))) {
  43.         x <- x %>>% filter(!is.na(x$IsClick))
  44.         IsClick <- x$IsClick %>>% strsplit(split=',') %>>% (purrr::simplify(.)) %>>%
  45.           unique %>>% na.omit %>>% paste(collapse=',')
  46.         x <- x %>>% arrange(Click_Date) %>>% {.[1,]} %>>% mutate(grp = 3)
  47.         x$IsClick = IsClick
  48.       } else {
  49.         x <- x %>>% arrange(Opened_Date) %>>% {.[1,]}  %>>% mutate(grp = 2)
  50.       }
  51.     } else {
  52.       x <- x %>>% arrange(Send_Date) %>>% {.[1,]} %>>% mutate(grp = 1)
  53.     }
  54.     return(x)
  55.   }) %>>% rbindlist
  56. proc.time() - st
  57. #  user  system elapsed
  58. # 12.50   16.52   18.23
  59.  
  60. dat_res %>>% group_by(grp) %>>% summarise(n()) # 540 416 43
Advertisement
Add Comment
Please, Sign In to add comment