Advertisement
Guest User

Untitled

a guest
Oct 21st, 2016
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.56 KB | None | 0 0
  1. # From: Marcel Curlin
  2. # Sent: Thursday, October 20, 2016 9:11 AM
  3. # To: Biostatistics & Design Program
  4. # Subject: Help with coding problem
  5. #
  6. # Hi
  7. #
  8. # I have a stats problem and would very much appreciate some help. It is a
  9. # fairly simple coding task in R. Is there anyone who could help? My problem is
  10. # described below.
  11.  
  12. set.seed(as.integer(as.Date("2016-10-20")))
  13.  
  14. #CREATE SAMPLE DATA – this looks like my dataset
  15. mydf <- data.frame(replicate(4,sample(c("ptid1", "ptid2","ptid3"),40,rep=TRUE)))
  16. mydf[,2] <- sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 40)
  17. mydf[,3] <- sample(c(1, 0, 0, 0, "", "", "", "", ""), 40, replace = TRUE)
  18. mydf[,4] <- sample(c(sample(100:30000,5,replace=T), rep("", 20), rep(0, 10)),40, replace = TRUE)
  19. colnames(mydf) <- c("ID",
  20. "TESTDATE", "ELISA", "HIV_VL")
  21.  
  22. #CLEAN UP DATA
  23. mydf$ELISA[mydf$ELISA == ""] <- NA
  24. mydf$HIV_VL[mydf$HIV_VL == ""] <- NA
  25. # don't forget to make dates column as.Date in real dataset
  26.  
  27. #SORT DATA BY PTID AND DATE
  28. mydf2 <- mydf[order(mydf$ID, mydf$TESTDATE),]
  29.  
  30. #DEFINE GOLD STANDARD AS RESULTS OF ELISA OR HIV_VL
  31. # mydf2$goldstandard <- NA
  32. mydf2$goldstandard[mydf2$ELISA == 0 | mydf2$HIV_VL == 0] <- "neg"
  33. # mydf2$goldstandard[mydf2$HIV_VL==0] <- "neg"
  34. mydf2$goldstandard[mydf2$ELISA == 1 | mydf2$HIV_VL > 0] <- "pos"
  35. # mydf2$goldstandard[mydf2$HIV_VL>0] <- "pos"
  36.  
  37.  
  38. mydf2 <- mydf2[!is.na(mydf2$goldstandard), ] # Keep rows where goldstandard is not NA
  39. mydf2Neg <- mydf2[mydf2$goldstandard == "neg", c("ID", "TESTDATE")] # Partition off the negative results
  40. mydf2Pos <- mydf2[mydf2$goldstandard == "pos", c("ID", "TESTDATE")] # Partition off the positive results
  41. mydf2Pos <- aggregate(TESTDATE ~ ID, data = mydf2Pos, min) # Pick off the earliest positive result
  42.  
  43. mydf3 <- merge(mydf2Neg, mydf2Pos, by = "ID", suffixes = c("_neg", "_pos")) # Merge the negative results to the positive results
  44. mydf3$dateDiff <- mydf3$TESTDATE_pos - mydf3$TESTDATE_neg # Calculate the date differences between negative and positive results
  45. mydf3 <- mydf3[mydf3$dateDiff > 0, ] # Keep rows where the negative result was before the positive result
  46.  
  47. lookup <- aggregate(dateDiff ~ ID + TESTDATE_pos, data = mydf3, min) # Create a lookup data frame that picks off the positive result with the smallest gap from a previous negative result
  48. mydf4 <- merge(mydf3, lookup, by = c("ID", "TESTDATE_pos", "dateDiff")) # Merge the lookup to the negative results
  49.  
  50. mydf4$dateInfection <- mydf4$TESTDATE_neg + (mydf4$TESTDATE_pos - mydf4$TESTDATE_neg) / 2 # Calculate estimated date of infection (midpoint between last negative and first positive)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement