Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- ### Duplicate Song Detection ##########
- # Version 20140608-3 by Scott Garrett #
- # mail [(at)] exovenom.net #
- #######################################
- songs=/tmp/songs.txt
- tags=/tmp/tags.txt
- dups=/tmp/dups.txt
- #################
- ### Functions ###
- #################
- status () {
- echo "$@" >&2
- }
- # Recursively returns a list of all files songs in a directory.
- songs_in () {
- find "$1" -type f \( -iname '*.mp3' -o -iname '*.ogg' -o -iname '*.flac' \)
- }
- # Returns the number of lines outputted by a command passed as arguments.
- count () {
- "$@" | wc -l
- }
- # Discards all output from a command passed as arguments.
- quietly () {
- "$@" &>/dev/null
- }
- ############
- ### Main ###
- ############
- status -n 'Counting songs... '
- # Total number of songs to process.
- total=0
- for path in "$@"; do
- count=$(count songs_in "$path")
- ((total += count))
- done
- status "$total"
- quietly rm "$songs"
- quietly rm "$tags"
- # Do we need to rescan?
- if [[ ! -f $songs && ! -f $tags ]]; then
- # Number of the song that is currently being processed.
- count=1
- # Number of songs that had the tags we needed.
- valid=0
- # Read list of songs found in passed directory.
- for path in "$@"; do
- while read -r file; do
- status "Reading tag $count of $total (valid: $valid): $file"
- unset title artist
- # Read tags from file and filter out all but the title and artist.
- # We assume only ID3 and VorbisComment-based tracks are being read.
- while read -r line; do
- # Split tag value from the line.
- data=${line#*=}
- # Look for title/artist tag name and remember its value.
- case "${line%%=*}" in
- TIT2|TITLE)
- title=$data ;;
- TPE1|ARTIST)
- artist=$data ;;
- esac
- # If we've found both tags at this point...
- if [[ $title && $artist ]]; then
- # ...normalize them for comparison...
- artist=${artist,,}
- artist=${artist//[[:punct:] ]/}
- title=${title,,}
- title=${title//[[:punct:] ]/}
- ((valid++))
- # ...and keep track of what file had what normalized tags.
- echo "$file" >> "$songs"
- echo "$artist $title" >> "$tags"
- # We can then move on to the next file.
- break
- fi
- done < <(mutagen-inspect "$file" | grep -aE '^(TIT2|TPE1|TITLE|ARTIST)')
- ((count++))
- done < <(songs_in "$path")
- done
- status
- else
- # Don't waste time rescanning if we don't need to.
- valid=$(wc -l "$tags" | cut -d' ' -f1)
- fi
- status "$valid valid tags read."
- count=1
- # Read tags list, after filtering out
- while read -r line; do
- status "Comparing $count of $valid: $line"
- grep -hnwF "$line" "$tags" | while IFS=':' read -r lnum match; do
- f=$(head -n+$lnum "$songs" | tail -1)
- echo "$f"
- status " Found: $f"
- done | sort | tr -s '\n' '\t' | sed 's/\t*$//'
- echo
- ((count++))
- done < "$tags" | cut -sf1- | sort -u > "$dups"
- status
- status 'Formatting duplicates list...'
- while read -r line; do
- printf -- '%s\n\n' "${line//$'\t'/$'\n'}"
- done < "$dups" > "$dups".1
- mv "$dups".1 "$dups"
- status 'Done.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement