Untitled

# The most common form would be to de-duplicate the entire data.frame.

dat <- readRDS("some_data_frame.rds")
w <- which(duplicated(dat))
dat_dd <- dat[-w,]


# Another form of de-duplication would be to eliminiate redundancy base on a single column within the data.frame

w <- which(duplicated(dat$some_column))
dat_dd <- dat[-w,]


# However, the most effective form is de-duplicating based on several columns

w <- which(duplicated(dat[,c('some_column','another_column')]))
df_dd <- dat[-w,]


# Having a tool like this becomes very useful when looking for a set depicting the maxima or minima of a given dataset.

# find the maximum
dat <- dat[order(-dat$some_column),]
w <- which(duplicated(dat[,c('some_column','another_column')]))

# dataset with maximum value for all unique variables
dat_dd <- dat[-w,]