Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Loading package
- library(plyr)
- library(dplyr)
- library(dtplyr)
- library(data.table)
- library(pipeR)
- load('dat.RData')
- st <- proc.time()
- dat_list <- dat %>>% as.data.table %>>%
- `[`( , `:=`(IsOpened = !is.na(IsOpened), IsClick = !is.na(IsClick))) %>>%
- `[`( , grp := IsOpened * 3 + IsClick * 5) %>>%
- `[`( , grp_split := ifelse(all(grp == 0), 1, ifelse(all(grp == 3 | grp == 0), 2, 3)),
- by = "Account,Locale,Order_ID") %>>%
- split(.$grp_split)
- dat_list[[1]] <- dat_list[[1]] %>>% setorder(Account, Locale, Order_ID, Send_Date) %>>%
- `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := as.character(IsClick))
- dat_list[[2]] <- dat_list[[2]] %>>%
- setorder(Account, Locale, Order_ID, Opened_Date) %>>%
- `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := as.character(IsClick))
- dat_list[[3]] <- dat_list[[3]] %>>% `[`(IsClick == 1) %>>%
- setorder(Account, Locale, Order_ID, Click_Date) %>>%
- `[`(, IsClick2 := paste(IsClick, collapse = ","), by = "Account,Locale,Order_ID") %>>%
- `[`(, .SD[1] , by = "Account,Locale,Order_ID") %>>% `[`( , IsClick := IsClick2) %>>%
- `[`( , IsClick2 := NULL)
- dat_res_dt <- dat_list %>>% rbindlist
- proc.time() - st
- # user system elapsed
- # 0.05 0.05 0.06
- table(dat_res_dt$grp_split) # 540 416 43
- load('dat.RData')
- st <- proc.time()
- dat_res <- dat %>>% mutate(tmp=paste(Account,Locale,Order_ID)) %>>% split(.$tmp) %>>%
- llply(function(x){
- if(any(!is.na(x$IsOpened))) {
- x <- x %>>% filter(!is.na(IsOpened))
- if(any(!is.na(x$IsClick))) {
- x <- x %>>% filter(!is.na(x$IsClick))
- IsClick <- x$IsClick %>>% strsplit(split=',') %>>% (purrr::simplify(.)) %>>%
- unique %>>% na.omit %>>% paste(collapse=',')
- x <- x %>>% arrange(Click_Date) %>>% {.[1,]} %>>% mutate(grp = 3)
- x$IsClick = IsClick
- } else {
- x <- x %>>% arrange(Opened_Date) %>>% {.[1,]} %>>% mutate(grp = 2)
- }
- } else {
- x <- x %>>% arrange(Send_Date) %>>% {.[1,]} %>>% mutate(grp = 1)
- }
- return(x)
- }) %>>% rbindlist
- proc.time() - st
- # user system elapsed
- # 12.50 16.52 18.23
- dat_res %>>% group_by(grp) %>>% summarise(n()) # 540 416 43
Advertisement
Add Comment
Please, Sign In to add comment