Advertisement
Guest User

Untitled

a guest
May 23rd, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. #' Algorithm for marking a set of identifiers with a
  2. #' "chain identifier" which is the HEAD PID in each object's
  3. #' version chain
  4.  
  5. ## Note: mydf needs to be read into R as a `data.frame`
  6.  
  7. # Use a hash environment which is R's fast hash implementation
  8. chain_id <- new.env(hash = TRUE)
  9.  
  10. for (i in seq_len(nrow(mydf))) {
  11. pid <- mydf[i,"pid"][[1]]
  12. chain_id[[pid]] <- pid
  13. next_id <- mydf[i, "obsoletedBy"][[1]]
  14.  
  15. # Walk forward to the HEAD PID in this object's chain
  16. while (!is.na(next_id)) {
  17. # Update the hash. This is wasteful to do each iteration
  18. chain_id[[pid]] <- next_id
  19.  
  20. # Try to find the next PID. This is slow!
  21. next_idx <- which(mydf$pid == next_id)
  22.  
  23. if (length(next_idx) == 0) {
  24. next_id <- NA
  25. } else {
  26. next_id <- mydf[next_idx[1], "obsoletedBy"][[1]]
  27. }
  28. }
  29. }
  30.  
  31. # Mark every row with its chain ID
  32. # This could've been done in the above loop too
  33. for (i in seq_len(nrow(mydf))) {
  34. mydf[i, "chain"] <- chain_id[[mydf[i, "pid"]]]
  35. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement