Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- for (i in 1:nrow(df)) {
- if (is.na(df$First_Payment_date[i]) == TRUE) {
- df$User_status[i] = "User never paid"
- } else if (df$Payment_Date[i] >= df$First_Payment_date[i]) {
- df$User_status[i] = "Paying user"
- } else if (df$Payment_Date[i] < df$First_Payment_date[i]) {
- df$User_status[i] = "Attempt before first payment"
- } else {
- df$User_status[i] = "Error"
- }
- }
- set.seed(1234)
- library(data.table)
- df = data.frame(First_Payment_date=c(sample(c(NA,1:100),1000000, replace=1)),
- Payment_Date=c(sample(1:100,1000000, replace=1)))
- dt = data.table(df)
- library(microbenchmark)
- test_df = function(){
- df$User_status <- "Error"
- df$User_status[ is.na(df$First_Payment_date) ] <- "User never paid"
- df$User_status[ df$Payment_Date >= df$First_Payment_date ] <- "Paying user"
- df$User_status[ df$Payment_Date < df$First_Payment_date ] <- "Attempt before first payment"
- }
- test_dt = function(){
- dt[, User_status := "Error"]
- dt[is.na(First_Payment_date), User_status := "User never paid"]
- dt[Payment_Date >= First_Payment_date, User_status := "Paying user"]
- dt[Payment_Date < First_Payment_date, User_status := "Attempt before first payment"]
- }
- microbenchmark(test_df(), test_dt(), times=10)
- > microbenchmark(test_df(), test_dt(), times=10)
- Unit: milliseconds
- expr min lq median uq max neval
- test_df() 247.29182 256.69067 287.89768 319.34873 330.33915 10
- test_dt() 66.74265 69.42574 70.27826 72.93969 80.89847 10
- df$User_status <- "Error"
- df$User_status[ is.na(df$First_Payment_date) ] <- "User never paid"
- df$User_status[ df$Payment_Date >= df$First_Payment_date ] <- "Paying user"
- df$User_status[ df$Payment_Date < df$First_Payment_date ] <- "Attempt before first payment"
- > df$User_status[i] <- rep("Error", nrow(df))
- ## allocate a vector, fill it with "Error"
- > sapply(seq(nrow(df)), function(i){
- if(is.na(df$First_Payment_date[i])){
- gsub("Error", "User never paid", df$User_status[i]) }
- if(df$Payment_Date[i] >= df$First_Payment_date[i]){
- gsub("Error", "Paying user", df$User_status[i]) }
- if (df$Payment_Date[i] < df$First_Payment_date[i]) {
- gsub("Error", "Attempt before first payment", df$User_status[i]) }
- })
- df$User_status <- with(df,
- ifelse(is.na(First_Payment_date), "User never paid",
- ifelse(Payment_Date >= First_Payment_date, "Paying user",
- ifelse(Payment_Date < First_Payment_date, "Attempt before first payment",
- "Error"))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement