Advertisement
Guest User

Untitled

a guest
Dec 27th, 2014
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.39 KB | None | 0 0
  1. #!/bin/bash
  2. ### Duplicate Song Detection ##########
  3. # Version 20140608-3 by Scott Garrett #
  4. # mail [(at)] exovenom.net #
  5. #######################################
  6.  
  7. songs=/tmp/songs.txt
  8. tags=/tmp/tags.txt
  9. dups=/tmp/dups.txt
  10.  
  11.  
  12.  
  13. #################
  14. ### Functions ###
  15. #################
  16. status () {
  17. echo "$@" >&2
  18. }
  19.  
  20. # Recursively returns a list of all files songs in a directory.
  21. songs_in () {
  22. find "$1" -type f \( -iname '*.mp3' -o -iname '*.ogg' -o -iname '*.flac' \)
  23. }
  24.  
  25. # Returns the number of lines outputted by a command passed as arguments.
  26. count () {
  27. "$@" | wc -l
  28. }
  29.  
  30. # Discards all output from a command passed as arguments.
  31. quietly () {
  32. "$@" &>/dev/null
  33. }
  34.  
  35.  
  36.  
  37. ############
  38. ### Main ###
  39. ############
  40. status -n 'Counting songs... '
  41.  
  42. # Total number of songs to process.
  43. total=0
  44.  
  45. for path in "$@"; do
  46. count=$(count songs_in "$path")
  47. ((total += count))
  48. done
  49. status "$total"
  50.  
  51.  
  52. quietly rm "$songs"
  53. quietly rm "$tags"
  54.  
  55. # Do we need to rescan?
  56. if [[ ! -f $songs && ! -f $tags ]]; then
  57. # Number of the song that is currently being processed.
  58. count=1
  59.  
  60. # Number of songs that had the tags we needed.
  61. valid=0
  62.  
  63. # Read list of songs found in passed directory.
  64. for path in "$@"; do
  65. while read -r file; do
  66. status "Reading tag $count of $total (valid: $valid): $file"
  67.  
  68. unset title artist
  69.  
  70. # Read tags from file and filter out all but the title and artist.
  71. # We assume only ID3 and VorbisComment-based tracks are being read.
  72. while read -r line; do
  73. # Split tag value from the line.
  74. data=${line#*=}
  75.  
  76. # Look for title/artist tag name and remember its value.
  77. case "${line%%=*}" in
  78. TIT2|TITLE)
  79. title=$data ;;
  80. TPE1|ARTIST)
  81. artist=$data ;;
  82. esac
  83.  
  84. # If we've found both tags at this point...
  85. if [[ $title && $artist ]]; then
  86. # ...normalize them for comparison...
  87. artist=${artist,,}
  88. artist=${artist//[[:punct:] ]/}
  89.  
  90. title=${title,,}
  91. title=${title//[[:punct:] ]/}
  92.  
  93. ((valid++))
  94.  
  95. # ...and keep track of what file had what normalized tags.
  96. echo "$file" >> "$songs"
  97. echo "$artist $title" >> "$tags"
  98.  
  99. # We can then move on to the next file.
  100. break
  101. fi
  102. done < <(mutagen-inspect "$file" | grep -aE '^(TIT2|TPE1|TITLE|ARTIST)')
  103. ((count++))
  104. done < <(songs_in "$path")
  105. done
  106. status
  107. else
  108. # Don't waste time rescanning if we don't need to.
  109. valid=$(wc -l "$tags" | cut -d' ' -f1)
  110. fi
  111.  
  112. status "$valid valid tags read."
  113.  
  114. count=1
  115.  
  116. # Read tags list, after filtering out
  117. while read -r line; do
  118. status "Comparing $count of $valid: $line"
  119. grep -hnwF "$line" "$tags" | while IFS=':' read -r lnum match; do
  120. f=$(head -n+$lnum "$songs" | tail -1)
  121. echo "$f"
  122. status " Found: $f"
  123. done | sort | tr -s '\n' '\t' | sed 's/\t*$//'
  124. echo
  125. ((count++))
  126. done < "$tags" | cut -sf1- | sort -u > "$dups"
  127. status
  128.  
  129. status 'Formatting duplicates list...'
  130. while read -r line; do
  131. printf -- '%s\n\n' "${line//$'\t'/$'\n'}"
  132. done < "$dups" > "$dups".1
  133.  
  134. mv "$dups".1 "$dups"
  135. status 'Done.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement