Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # find_duplicates.sh
- # Updated: 2012-02-24
- # Version: 1.0
- # By: Jayson Kempinger < GlowingApple (at) Gmail (dot) com >
- # http://www.kempinger.us
- # License: GPLv3 (http://www.gnu.org/licenses/gpl-3.0.txt)
- # Isolate duplicate files in a directory by recursively computing the MD5 checksums of every file.
- # Usage: find_duplicates.sh [ DIR ]
- # Known bugs:
- # - some Finder aliases have the same md5 sum, so are misconstrued as duplicates (I assume the link is stored in the resource fork, but md5 only sums the data fork)
- # - certain formats of webloc files are actually empty files (I assume the link is stored in the resource fork, but md5 only sums the data fork)
- VERBOSE=1 # Set to 0 to hide output; set to 1 to show some simple output
- CHECKSUM=`which md5` # set to location of md5 command
- # Check that the correct number of arguments are given
- if [ $# -eq 1 ]; then
- DIR="${1}"
- else
- if [ $# -eq 0 ]; then
- DIR="${PWD}"
- else
- echo "Please specify a single directory, or leave blank to use the current directory."
- exit 1
- fi
- fi
- OLD_IFS="${IFS}"
- IFS=$'\n'
- if [ $VERBOSE -eq 1 ]; then echo "Checksumming files in ${DIR}..."; fi
- FILES=`find "${DIR}" -type f -exec $CHECKSUM -r '{}' \;`
- DUPS=`echo "${FILES}" | awk '{print $1}' | uniq -d`
- mkdir -p "${DIR}/duplicates"
- if [[ $VERBOSE == 1 && "${DUPS}" != "" ]]; then echo "Moving duplicate files to ${DIR}/duplicates..."; fi
- for DUP in "${DUPS}"
- do
- I=0
- for FILE in `echo "${FILES}" | grep "${DUP}"`
- do
- # move all files, except for first file, to duplicates folder
- if [ $I -ne 0 ]; then
- # Using -n for move, so if two files with the same name exist, will not overwrite
- FILEPATH=`echo "${FILE}" | cut -f 2- -d " "`
- RESULT=`mv -nv "${FILEPATH}" "${DIR}/duplicates/" | grep -c "not overwritten"`
- # Does another file with the same name exist? If so, append random number to filename and move file.
- if [ $RESULT -ne 0 ]; then
- FILENAME=`basename "${FILEPATH}"`
- NEW_FILENAME="${FILENAME%.*}-$RANDOM.${FILENAME##*.}"
- # $RANDOM should be sufficiently random for this, but just to be safe, using -n to avoid overwritting any data
- mv -n "${FILEPATH}" "${DIR}/duplicates/${NEW_FILENAME}"
- fi
- fi
- let I=$I+1
- done
- done
- IFS="${OLD_IFS}"
- if [ $VERBOSE -eq 1 ]; then
- COUNT=`ls "${DIR}/duplicates" | wc -l | bc`
- echo ""
- if [ $COUNT -eq 0 ]; then
- rmdir "${DIR}/duplicates"
- echo "No duplicate files were found."
- else
- echo -n "${COUNT} duplicate file"
- if [ $COUNT -gt 1 ]; then
- echo -n "s"
- fi
- echo -n " found and moved to "${DIR}/duplicates"; one copy of "
- if [ $COUNT -gt 1 ]; then
- echo -n "each"
- else
- echo -n "the"
- fi
- echo " duplicate set was left in ${DIR}."
- echo ""
- echo "${DIR}:"
- ls -1 "${DIR}/duplicates/"
- fi
- fi
- exit 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement