Advertisement
GlowingApple

find_duplicates.sh

Feb 24th, 2012
693
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.78 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # find_duplicates.sh
  4. # Updated: 2012-02-24
  5. # Version: 1.0
  6. # By: Jayson Kempinger < GlowingApple (at) Gmail (dot) com >
  7. # http://www.kempinger.us
  8. # License: GPLv3 (http://www.gnu.org/licenses/gpl-3.0.txt)
  9.  
  10. # Isolate duplicate files in a directory by recursively computing the MD5 checksums of every file.
  11. # Usage: find_duplicates.sh [ DIR ]
  12.  
  13. # Known bugs:
  14. #   - some Finder aliases have the same md5 sum, so are misconstrued as duplicates (I assume the link is stored in the resource fork, but md5 only sums the data fork)
  15. #   - certain formats of webloc files are actually empty files (I assume the link is stored in the resource fork, but md5 only sums the data fork)
  16.  
  17. VERBOSE=1       # Set to 0 to hide output; set to 1 to show some simple output
  18. CHECKSUM=`which md5`    # set to location of md5 command
  19.  
  20. # Check that the correct number of arguments are given
  21. if [ $# -eq 1 ]; then
  22.     DIR="${1}"
  23. else
  24.     if [ $# -eq 0 ]; then
  25.         DIR="${PWD}"
  26.     else
  27.         echo "Please specify a single directory, or leave blank to use the current directory."
  28.         exit 1
  29.     fi
  30. fi
  31.  
  32. OLD_IFS="${IFS}"
  33. IFS=$'\n'
  34.  
  35. if [ $VERBOSE -eq 1 ]; then echo "Checksumming files in ${DIR}..."; fi
  36. FILES=`find "${DIR}" -type f -exec $CHECKSUM -r '{}' \;`
  37. DUPS=`echo "${FILES}" | awk '{print $1}' | uniq -d`
  38.  
  39. mkdir -p "${DIR}/duplicates"
  40.  
  41. if [[ $VERBOSE == 1 && "${DUPS}" != "" ]]; then echo "Moving duplicate files to ${DIR}/duplicates..."; fi
  42. for DUP in "${DUPS}"
  43. do
  44.     I=0
  45.     for FILE in `echo "${FILES}" | grep "${DUP}"`
  46.     do
  47.         # move all files, except for first file, to duplicates folder
  48.         if [ $I -ne 0 ]; then
  49.             # Using -n for move, so if two files with the same name exist, will not overwrite
  50.             FILEPATH=`echo "${FILE}" | cut -f 2- -d " "`
  51.             RESULT=`mv -nv "${FILEPATH}" "${DIR}/duplicates/" | grep -c "not overwritten"`
  52.             # Does another file with the same name exist?  If so, append random number to filename and move file.
  53.             if [ $RESULT -ne 0 ]; then
  54.                 FILENAME=`basename "${FILEPATH}"`
  55.                 NEW_FILENAME="${FILENAME%.*}-$RANDOM.${FILENAME##*.}"
  56.                 # $RANDOM should be sufficiently random for this, but just to be safe, using -n to avoid overwritting any data
  57.                 mv -n "${FILEPATH}" "${DIR}/duplicates/${NEW_FILENAME}"
  58.             fi
  59.         fi
  60.         let I=$I+1
  61.     done
  62. done
  63.  
  64. IFS="${OLD_IFS}"
  65.  
  66. if [ $VERBOSE -eq 1 ]; then
  67.     COUNT=`ls "${DIR}/duplicates" | wc -l | bc`
  68.     echo ""
  69.     if [ $COUNT -eq 0 ]; then
  70.         rmdir "${DIR}/duplicates"
  71.         echo "No duplicate files were found."
  72.     else
  73.         echo -n "${COUNT} duplicate file"
  74.         if [ $COUNT -gt 1 ]; then
  75.             echo -n "s"
  76.         fi
  77.         echo -n " found and moved to "${DIR}/duplicates"; one copy of "
  78.         if [ $COUNT -gt 1 ]; then
  79.             echo -n "each"
  80.         else
  81.             echo -n "the"
  82.         fi
  83.         echo " duplicate set was left in ${DIR}."
  84.         echo ""
  85.         echo "${DIR}:"
  86.         ls -1 "${DIR}/duplicates/"
  87.     fi
  88. fi
  89.  
  90. exit 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement