Guest User

Untitled

a guest
Nov 9th, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.64 KB | None | 0 0
  1. find_duplicate_files <- function(x) {
  2. # Find the duplicate files in a folder, i.e. the files with the same md5 vlues.
  3. # x: a string of file folder, e.g. "C:/example_folder"
  4. # Return: a list of duplicate files.
  5.  
  6. f <- dir(x, recursive = T, all.files = T, full.names = T) # file dir
  7. f_md5 <- sapply(f, tools::md5sum, USE.NAMES = T) # file md5 values
  8. f_grouped <- split(f, f_md5) # Group f by f_md5.
  9.  
  10. # Divide the list into duplicates (length > 1) and uniques (length = 1).
  11. d_i <- sapply(f_grouped, function(x) length(x) > 1) # index of the duplicate files
  12. y <- f_grouped[d_i]
  13. return(y)
  14. }
  15.  
  16. d_files <- find_duplicate_files(x = "E:/iPhone_7")
Add Comment
Please, Sign In to add comment