Advertisement
Guest User

Untitled

a guest
Apr 19th, 2014
36
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.47 KB | None | 0 0
  1. for (i in 1:nrow(df)) {
  2. if (is.na(df$First_Payment_date[i]) == TRUE) {
  3. df$User_status[i] = "User never paid"
  4. } else if (df$Payment_Date[i] >= df$First_Payment_date[i]) {
  5. df$User_status[i] = "Paying user"
  6. } else if (df$Payment_Date[i] < df$First_Payment_date[i]) {
  7. df$User_status[i] = "Attempt before first payment"
  8. } else {
  9. df$User_status[i] = "Error"
  10. }
  11. }
  12.  
  13. set.seed(1234)
  14. library(data.table)
  15. df = data.frame(First_Payment_date=c(sample(c(NA,1:100),1000000, replace=1)),
  16. Payment_Date=c(sample(1:100,1000000, replace=1)))
  17. dt = data.table(df)
  18.  
  19. library(microbenchmark)
  20.  
  21. test_df = function(){
  22. df$User_status <- "Error"
  23. df$User_status[ is.na(df$First_Payment_date) ] <- "User never paid"
  24. df$User_status[ df$Payment_Date >= df$First_Payment_date ] <- "Paying user"
  25. df$User_status[ df$Payment_Date < df$First_Payment_date ] <- "Attempt before first payment"
  26. }
  27.  
  28. test_dt = function(){
  29. dt[, User_status := "Error"]
  30. dt[is.na(First_Payment_date), User_status := "User never paid"]
  31. dt[Payment_Date >= First_Payment_date, User_status := "Paying user"]
  32. dt[Payment_Date < First_Payment_date, User_status := "Attempt before first payment"]
  33. }
  34.  
  35. microbenchmark(test_df(), test_dt(), times=10)
  36.  
  37. > microbenchmark(test_df(), test_dt(), times=10)
  38. Unit: milliseconds
  39. expr min lq median uq max neval
  40. test_df() 247.29182 256.69067 287.89768 319.34873 330.33915 10
  41. test_dt() 66.74265 69.42574 70.27826 72.93969 80.89847 10
  42.  
  43. df$User_status <- "Error"
  44. df$User_status[ is.na(df$First_Payment_date) ] <- "User never paid"
  45. df$User_status[ df$Payment_Date >= df$First_Payment_date ] <- "Paying user"
  46. df$User_status[ df$Payment_Date < df$First_Payment_date ] <- "Attempt before first payment"
  47.  
  48. > df$User_status[i] <- rep("Error", nrow(df))
  49. ## allocate a vector, fill it with "Error"
  50.  
  51. > sapply(seq(nrow(df)), function(i){
  52.  
  53. if(is.na(df$First_Payment_date[i])){
  54. gsub("Error", "User never paid", df$User_status[i]) }
  55.  
  56. if(df$Payment_Date[i] >= df$First_Payment_date[i]){
  57. gsub("Error", "Paying user", df$User_status[i]) }
  58.  
  59. if (df$Payment_Date[i] < df$First_Payment_date[i]) {
  60. gsub("Error", "Attempt before first payment", df$User_status[i]) }
  61.  
  62. })
  63.  
  64. df$User_status <- with(df,
  65. ifelse(is.na(First_Payment_date), "User never paid",
  66. ifelse(Payment_Date >= First_Payment_date, "Paying user",
  67. ifelse(Payment_Date < First_Payment_date, "Attempt before first payment",
  68. "Error"))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement