Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library("data.table")
- #Bring in data
- setwd("put datapath here")
- data <- fread("realistic_data.txt")
- ##Mess with dates
- data <- data.frame(data)
- data$Record_Date <- as.POSIXlt(data$Record_Date,format="%m/%d/%Y %H:%M:%S %p")
- data$Record_Date <- as.numeric(data$Record_Date-as.POSIXct("2000-01-01"))
- #Back to fast data.table
- data <- data.table(data)[order(Cust_ID,Record_Date)]
- ##Create "list" of comparison dates
- data <- data[,list(Cust_ID,Record_Date,Compare_Date=list(I(Record_Date))),by=c("Cust_ID")]
- ##Calculate the length of the compare date list for each record
- #data[,Compare.Cnt:=lapply(Compare_Date,length),by=c("Cust_ID","Record_Date")]
- ##Create comparable Record_Date list to help with later calculations
- #data<-data[,list(Compare_Date,Record_Date2=list(rep(Record_Date,Compare.Cnt))),by=c("Cust_ID","Record_Date")]
- ##Compare two lists and see of the compare date is within 14 days
- data$Within14 <- mapply(FUN = function(RD, CD) {
- d <- as.numeric(CD - RD)
- sum(d > 0 & d <= 14)
- }, RD = data$Record_Date, CD = data$Compare_Date)
- #My final output data
- output <- data[,list(Cust_ID,Record_Date,Within14)][order(Cust_ID,Record_Date)]
- #Your data manipulated to work with mine
- expected <- fread("expected_results.txt")
- names(expected)[3] <-"Exp"
- expected <- data.frame(expected)
- expected$Record_Date <- as.POSIXlt(expected$Record_Date,format="%m/%d/%Y %H:%M:%S %p")
- expected$Record_Date <- as.numeric(expected$Record_Date-as.POSIXct("2000-01-01"))
- #Sort expected data
- expected <- data.table(expected)[order(Cust_ID,Record_Date)]
- #Merge your data onto mine
- output <- merge(output,expected,by=c("Cust_ID","Record_Date"))
- #Filter to "wrong" entries
- output[,Not.Equal:=sum((Within14!=Exp)*1),by="Cust_ID"]
- ##Look at wrong data
- wrong.data <- output[Not.Equal>0][order(Cust_ID,Record_Date)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement