Untitled

#!/bin/bash
### Duplicate Song Detection ##########
# Version 20140608-3 by Scott Garrett #
# mail [(at)] exovenom.net            #
#######################################

songs=/tmp/songs.txt
tags=/tmp/tags.txt
dups=/tmp/dups.txt


#################
### Functions ###
#################
status () {
    echo "$@" >&2
}

# Recursively returns a list of all files songs in a directory.
songs_in () {
    find "$1" -type f \( -iname '*.mp3' -o -iname '*.ogg' -o -iname '*.flac' \)
}

# Returns the number of lines outputted by a command passed as arguments.
count () {
    "$@" | wc -l
}

# Discards all output from a command passed as arguments.
quietly () {
    "$@" &>/dev/null
}


############
### Main ###
############
status -n 'Counting songs... '

# Total number of songs to process.
total=0

for path in "$@"; do
    count=$(count songs_in "$path")
    ((total += count))
done
status "$total"


quietly rm "$songs"
quietly rm "$tags"

# Do we need to rescan?
if [[ ! -f $songs && ! -f $tags ]]; then
    # Number of the song that is currently being processed.
    count=1

    # Number of songs that had the tags we needed.
    valid=0

    # Read list of songs found in passed directory.
    for path in "$@"; do
        while read -r file; do
            status "Reading tag $count of $total (valid: $valid): $file"

            unset title artist

            # Read tags from file and filter out all but the title and artist.
            # We assume only ID3 and VorbisComment-based tracks are being read.
            while read -r line; do
                # Split tag value from the line.
                data=${line#*=}

                # Look for title/artist tag name and remember its value.
                case "${line%%=*}" in
                    TIT2|TITLE)
                        title=$data ;;
                    TPE1|ARTIST)
                        artist=$data ;;
                esac

                # If we've found both tags at this point...
                if [[ $title && $artist ]]; then
                    # ...normalize them for comparison...
                    artist=${artist,,}
                    artist=${artist//[[:punct:] ]/}

                    title=${title,,}
                    title=${title//[[:punct:] ]/}

                    ((valid++))

                    # ...and keep track of what file had what normalized tags.
                    echo "$file" >> "$songs"
                    echo "$artist $title" >> "$tags"

                    # We can then move on to the next file.
                    break
                fi
            done < <(mutagen-inspect "$file" | grep -aE '^(TIT2|TPE1|TITLE|ARTIST)')
            ((count++))
        done < <(songs_in "$path")
    done
    status
else
    # Don't waste time rescanning if we don't need to.
    valid=$(wc -l "$tags" | cut -d' ' -f1)
fi

status "$valid valid tags read."

count=1

# Read tags list, after filtering out
while read -r line; do
    status "Comparing $count of $valid: $line"
    grep -hnwF "$line" "$tags" | while IFS=':' read -r lnum match; do
        f=$(head -n+$lnum "$songs" | tail -1)
        echo "$f"
        status "    Found: $f"
    done | sort | tr -s '\n' '\t' | sed 's/\t*$//'
    echo
    ((count++))
done < "$tags" | cut -sf1- | sort -u > "$dups"
status

status 'Formatting duplicates list...'
while read -r line; do
    printf -- '%s\n\n' "${line//$'\t'/$'\n'}"
done < "$dups" > "$dups".1

mv "$dups".1 "$dups"
status 'Done.'