Advertisement
Guest User

Untitled

a guest
Apr 16th, 2014
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.81 KB | None | 0 0
  1. library("data.table")
  2.  
  3. #Bring in data
  4. setwd("put datapath here")
  5. data <- fread("realistic_data.txt")
  6.  
  7. ##Mess with dates
  8. data <- data.frame(data)
  9. data$Record_Date <- as.POSIXlt(data$Record_Date,format="%m/%d/%Y %H:%M:%S %p")
  10. data$Record_Date <- as.numeric(data$Record_Date-as.POSIXct("2000-01-01"))
  11.  
  12. #Back to fast data.table
  13. data <- data.table(data)[order(Cust_ID,Record_Date)]
  14.  
  15. ##Create "list" of comparison dates
  16. data <- data[,list(Cust_ID,Record_Date,Compare_Date=list(I(Record_Date))),by=c("Cust_ID")]
  17.  
  18. ##Calculate the length of the compare date list for each record
  19. #data[,Compare.Cnt:=lapply(Compare_Date,length),by=c("Cust_ID","Record_Date")]
  20.  
  21. ##Create comparable Record_Date list to help with later calculations
  22. #data<-data[,list(Compare_Date,Record_Date2=list(rep(Record_Date,Compare.Cnt))),by=c("Cust_ID","Record_Date")]
  23.  
  24. ##Compare two lists and see of the compare date is within 14 days
  25. data$Within14 <- mapply(FUN = function(RD, CD) {
  26. d <- as.numeric(CD - RD)
  27. sum(d > 0 & d <= 14)
  28. }, RD = data$Record_Date, CD = data$Compare_Date)
  29.  
  30. #My final output data
  31. output <- data[,list(Cust_ID,Record_Date,Within14)][order(Cust_ID,Record_Date)]
  32.  
  33. #Your data manipulated to work with mine
  34. expected <- fread("expected_results.txt")
  35. names(expected)[3] <-"Exp"
  36. expected <- data.frame(expected)
  37. expected$Record_Date <- as.POSIXlt(expected$Record_Date,format="%m/%d/%Y %H:%M:%S %p")
  38. expected$Record_Date <- as.numeric(expected$Record_Date-as.POSIXct("2000-01-01"))
  39.  
  40. #Sort expected data
  41. expected <- data.table(expected)[order(Cust_ID,Record_Date)]
  42.  
  43. #Merge your data onto mine
  44. output <- merge(output,expected,by=c("Cust_ID","Record_Date"))
  45.  
  46. #Filter to "wrong" entries
  47. output[,Not.Equal:=sum((Within14!=Exp)*1),by="Cust_ID"]
  48.  
  49. ##Look at wrong data
  50. wrong.data <- output[Not.Equal>0][order(Cust_ID,Record_Date)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement