Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # The most common form would be to de-duplicate the entire data.frame.
- dat <- readRDS("some_data_frame.rds")
- w <- which(duplicated(dat))
- dat_dd <- dat[-w,]
- # Another form of de-duplication would be to eliminiate redundancy base on a single column within the data.frame
- w <- which(duplicated(dat$some_column))
- dat_dd <- dat[-w,]
- # However, the most effective form is de-duplicating based on several columns
- w <- which(duplicated(dat[,c('some_column','another_column')]))
- df_dd <- dat[-w,]
- # Having a tool like this becomes very useful when looking for a set depicting the maxima or minima of a given dataset.
- # find the maximum
- dat <- dat[order(-dat$some_column),]
- w <- which(duplicated(dat[,c('some_column','another_column')]))
- # dataset with maximum value for all unique variables
- dat_dd <- dat[-w,]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement