fant0men

find_movies.sh

Sep 20th, 2019 (edited)
91
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2. # This script is intended to search all mounted drives for movies, sort
  3. # them in alphabetical order, remove duplicates, and store all the
  4. # filenames in a list in a temp file in /dev/shm. The script looks for,
  5. # and tries to parse, filenames that follow scene naming rules.
  6.  
  7. #mkdir /home/lucifer/add_drives
  8. #cd /home/lucifer/add_drives
  9. #for p in '/run/media/lucifer/6583ee20-9f0a-45d2-bdc4-915df28d7c60' '/run/media/lucifer/92a70222-75b7-4607-829c-88f15c6a8e04' '/run/media/lucifer/70c52833-1153-4e9d-b8bb-4278aa377b31'; do
  10. #    bname=$(basename "$p")
  11. #    ln -s "$p" "$bname"
  12. #done
  13.  
  14. # Creates a pause message that will be used in the for loops, with find,
  15. # to prevent the sudo prompt from timing out.
  16. pause_msg='Press Enter to continue...'
  17.  
  18. # The $list variable is the filename of the finished list, that will be
  19. # created at the end of this script.
  20. list="${HOME}/find_movies-123.txt"
  21.  
  22. # Creates the temp basename that will be used for all temp files.
  23. temp_bname="find_movies-${RANDOM}"
  24.  
  25. # Creates two variables containing the temp filename.
  26. temp="/dev/shm/${temp_bname}.txt"
  27. temp_sorted="/dev/shm/${temp_bname}_sorted.txt"
  28.  
  29. # Creates the temp file in RAM. /dev/shm is a RAM disk.
  30. touch "$temp"
  31.  
  32. # Creates an array with all the different scene tags to look for in
  33. # each filename.
  34. rip=(720p 1080p screener hc dvb hdtv tvrip webrip webdl web-dl hddvd hd-dvd bluray blu-ray bdrip dvdrip divx xvid h264 x264 h265 x265 hevc dts ac3 pcm vorbis aac mp3)
  35.  
  36. # Creates an array with the directories to search.
  37. dirs=("${HOME}" "/run/media/${USER}")
  38.  
  39. # Creates the 'break_name' function, which breaks the filename
  40. # into multiple words, and tries to match those words against
  41. # the words in $rip array. It echoes the number of matches.
  42. break_name () {
  43. # Sets $bname to the first argument passed to this function.
  44. # Sets the $bname_chars variable to the character length of $bname.
  45. # Initializes the $count variable.
  46.     bname="$1"
  47.     bname_chars=$(echo "$bname" | wc --chars)
  48.     count=0
  49.  
  50. # Break $bname up in a list of words, and store those words in arrays,
  51. # depending on whether $bname is separated by dots, hyphens, underscores
  52. # or spaces.
  53.     mapfile -d'.' -t bname_dots <<<"$bname"
  54.     mapfile -d'-' -t bname_hyphens <<<"$bname"
  55.     mapfile -d'_' -t bname_underscores <<<"$bname"
  56.     mapfile -d' ' -t bname_spaces <<<"$bname"
  57.  
  58. # Declares an associative array (hash), that stores the element numbers
  59. # for each kind of word separator: dots, hyphens, underscores, spaces.
  60.     declare -A bname_elements
  61.     bname_elements[dots]=${#bname_dots[@]}
  62.     bname_elements[hyphens]=${#bname_hyphens[@]}
  63.     bname_elements[underscores]=${#bname_underscores[@]}
  64.     bname_elements[spaces]=${#bname_spaces[@]}
  65.  
  66. # If there are more dots in $bname than hyphens, underscores or spaces,
  67. # that means $bname is separated by dots. Otherwise, it's separated by
  68. # hyphens, underscores or spaces. In either case, loop through the word
  69. # list in either array and compare each word with the words in $rip
  70. # array.
  71.  
  72.     elements=0
  73.  
  74. # This for loop is to figure out if $bname is separated by dots,
  75. # hyphens, underscores or spaces.
  76.     for type in dots hyphens underscores spaces; do
  77.         temp_number="bname_elements[${type}]"
  78.  
  79.         if [[ ${!temp_number} -gt $elements ]]; then
  80.             elements="${!temp_number}"
  81.             temp_type="$type"
  82.         fi
  83.     done
  84.  
  85. # This for loop is to go through the word list and compare each word
  86. # with the words in $rip array.
  87.     for (( i = 0; i < $elements; i++ )); do
  88.  
  89. # Creates a reference, pointing to the $i element of the
  90. # 'bname_$temp_type' array.
  91.         array_ref="bname_${temp_type}[${i}]"
  92.  
  93.         for g in ${rip[@]}; do
  94.             if [[ ${!array_ref} =~ $g ]]; then
  95.                 let count++
  96.             fi
  97.         done
  98.     done
  99.  
  100.     echo "$count"
  101. }
  102.  
  103. # Look for both files and directories that match the scene naming
  104. # rules.
  105. read -p "$pause_msg"
  106.  
  107. sleep 1
  108.  
  109. sudo find "${dirs[@]}" -type f \( -iname "*.avi" -o -iname "*.mp4" -o -iname "*.mkv" \) -print 2>&- | while read line; do
  110.  
  111. # Create the $bname variable, that contains the basename of $line.
  112. # Translate uppercase to lowercase while we're at it.
  113.     bname=$(basename "$line")
  114.     bname_lc=$(tr '[:upper:]' '[:lower:]' <<<"$bname")
  115.  
  116. # If file name pattern matches YouTube videos, ignore file, and continue
  117. # with the next iteration of the loop.
  118. # *(1080p_30fps_H264-128kbit_AAC).mp4
  119.     grep -Eq "\(([0-9]{3,4})p_([0-9]{1,2})fps_h264-([0-9]{2,3})kbit_aac\)" <<<"$bname_lc"
  120.  
  121.     if [[ $? -eq 0 ]]; then
  122.         continue
  123.     fi
  124.  
  125. # *(720p_H.264-AAC).mp4
  126.     grep -Eq "\(([0-9]{3,4})p_h\.264-aac\)" <<<"$bname_lc"
  127.  
  128.     if [[ $? -eq 0 ]]; then
  129.         continue
  130.     fi
  131.  
  132. # Loop through the rip array, in order to find at least two matches for
  133. # the current $line.
  134.     count=0
  135.     count=$(break_name "$bname_lc")
  136.  
  137. # If directory name contains at least two of the search terms in the
  138. # 'rip' array, continue on.
  139.     if [[ $count -ge 2 ]]; then
  140.         echo "$bname" >> "$temp"
  141.  
  142. # Since we've found what we're looking for, move to the next iteration
  143. # of the loop.
  144.         continue
  145.     fi
  146. done
  147.  
  148. # Look for "empty" directories, or at least directories that are smaller
  149. # than 100MB in size, and again, match them against the scene naming
  150. # rules. As for me personally, I tend to save the directories of
  151. # downloaded movies, even after having deleted the movie. Left in the
  152. # directory will typically be, subtitles and perhaps a sample file.
  153. # That's why I put the 100MB size limit, because any directory larger
  154. # than that will already have been matched by the previous loop.
  155.  
  156. # Pause until the user responds, since a password prompt will come
  157. # shortly, and we want to prevent that from timing out in case the user
  158. # isn't paying attention.
  159. read -p "$pause_msg"
  160.  
  161. sleep 1
  162.  
  163. sudo find "${dirs[@]}" -type d -iname "*" 2>&- | while read line; do
  164. # Create the $bname variable, that contains the basename of $line.
  165.     bname=$(basename "$line")
  166.     bname_lc=$(tr '[:upper:]' '[:lower:]' <<<"$bname")
  167.  
  168. # Loop through the rip array, in order to find at least two matches for
  169. # the current $line.
  170.     count=$(break_name "$bname_lc")
  171.  
  172. # If directory name contains at least two of the search terms in the
  173. # 'rip' array, continue on.
  174.     if [[ $count -ge 2 ]]; then
  175. # If size is lower than 100MB, include directory in temp text file.
  176.         size=$(du -BM -s "$line" | tr '[:space:]' ' ' | cut -d' ' -f1 | tr -d '[:alpha:]')
  177.  
  178.         if [[ $size -le 100 ]]; then
  179.             echo "$bname" >> "$temp"
  180.         fi
  181.  
  182. # Since we've found what we're looking for, move to the next iteration
  183. # of the loop.
  184.         continue
  185.     fi
  186. done
  187.  
  188. # Sort the output in alphanumerical order, remove duplicate lines.
  189. # Remove the unsorted temp file, by replacing it with the sorted file.
  190. cat "$temp" | sort -u > "$temp_sorted"
  191. mv "$temp_sorted" "$temp"
  192.  
  193. # If filename in $list already exists, then merge that list with the
  194. # new one, remove duplicates, redirect to the filename in $temp_sorted.
  195. # Then rename $temp_sorted to $list, effectively deleting the $temp
  196. # sorted file. After that, delete the $temp file from RAM.
  197. # If filename in $list does NOT exist, move the sorted file to the
  198. # $list filename.
  199. if [[ -f $list ]]; then
  200.     cat "$list" "$temp" | sort -u > "$temp_sorted"
  201.     mv "$temp_sorted" "$list"
  202.     rm "$temp"
  203. else
  204.     #true
  205.     mv "$temp" "$list"
  206. fi
  207.  
  208.  
RAW Paste Data