Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #' Algorithm for marking a set of identifiers with a
- #' "chain identifier" which is the HEAD PID in each object's
- #' version chain
- ## Note: mydf needs to be read into R as a `data.frame`
- # Use a hash environment which is R's fast hash implementation
- chain_id <- new.env(hash = TRUE)
- for (i in seq_len(nrow(mydf))) {
- pid <- mydf[i,"pid"][[1]]
- chain_id[[pid]] <- pid
- next_id <- mydf[i, "obsoletedBy"][[1]]
- # Walk forward to the HEAD PID in this object's chain
- while (!is.na(next_id)) {
- # Update the hash. This is wasteful to do each iteration
- chain_id[[pid]] <- next_id
- # Try to find the next PID. This is slow!
- next_idx <- which(mydf$pid == next_id)
- if (length(next_idx) == 0) {
- next_id <- NA
- } else {
- next_id <- mydf[next_idx[1], "obsoletedBy"][[1]]
- }
- }
- }
- # Mark every row with its chain ID
- # This could've been done in the above loop too
- for (i in seq_len(nrow(mydf))) {
- mydf[i, "chain"] <- chain_id[[mydf[i, "pid"]]]
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement