fant0men

delete_dups.sh

Oct 17th, 2020
959
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2. # This script takes at least two directories as arguments, checks the MD5 hash
  3. # of all the files in the first directory, and then uses that list to delete
  4. # duplicates from the other directories.
  5.  
  6. # for zip in *.tar.xz; do dir="${zip%.tar.xz}"; mkdir "$dir" || break; cd "$dir"; arch_unpack.sh ../"$zip" || break; cd ..; done
  7.  
  8. set -eo pipefail
  9.  
  10. usage () {
  11.     echo -e "Usage: $(basename "$0") [source dir] [dirs...]\n"
  12.     exit
  13. }
  14.  
  15. dirs=("$@")
  16.  
  17. if [[ ${#dirs[@]} -lt 2 ]]; then
  18.     usage
  19. fi
  20.  
  21. for (( i = 0; i < ${#dirs[@]}; i++ )); do
  22.     if [[ ! -d ${dirs[${i}]} ]]; then
  23.         usage
  24.     fi
  25.  
  26.     dirs[${i}]=$(readlink -f "${dirs[${i}]}")
  27. done
  28.  
  29. declare -A md5s
  30.  
  31. mapfile -t files < <(find "${dirs[0]}" -type f -iname "*")
  32. unset -v dirs[0]
  33.  
  34. for (( i = 0; i < ${#files[@]}; i++ )); do
  35.     f="${files[${i}]}"
  36.     f_bn=$(basename "$f")
  37.  
  38.     md5=$(md5sum -b "$f" | cut -d' ' -f1)
  39.     md5s[${md5}]="$f_bn"
  40. done
  41.  
  42. for dir in "${dirs[@]}"; do
  43.     mapfile -t files < <(find "$dir" -type f -iname "*")
  44.  
  45.     for (( i = 0; i < ${#files[@]}; i++ )); do
  46.         f="${files[${i}]}"
  47.         f_bn=$(basename "$f")
  48.  
  49.         md5=$(md5sum -b "$f" | cut -d' ' -f1)
  50.  
  51.         if [[ ${md5s[${md5}]} ]]; then
  52.             if [[ ${md5s[${md5}]} == $f_bn ]]; then
  53.                 echo "$f"
  54.                 rm "$f"
  55.             fi
  56.         fi
  57.     done
  58. done
  59.  
RAW Paste Data