Advertisement
Guest User

Untitled

a guest
Mar 10th, 2022
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.66 KB | None | 0 0
  1. #!/bin/sh
  2. # REDDIT EDITION
  3. # cleans srt formatted subtitles of common blocks that may be considered unwanted, works well as a post-process script for software such as Bazarr or Sub-Zero
  4. # please consider leaving or modifying this regex to properly credit the hard work that is put into providing these subtitles
  5.  
  6. ### usage:
  7. ## Download this file from the command line to your current directory:
  8. # curl https://raw.githubusercontent.com/brianspilner01/media-server-scripts/master/sub-clean.sh > sub-clean.sh && chmod +x sub-clean.sh
  9. ## Run this script across your whole media library:
  10. # find /path/to/library -name '*.srt' -exec /path/to/sub-clean.sh "{}" \;
  11. ## Add to Bazarr (Settings > Subtitles > Use Custom Post-Processing > Post-processing command):
  12. # /path/to/sub-clean.sh "{{subtitles}}" --
  13. ## Add to Sub-Zero (in Plex > Settings > under Manage > Plugins > Sub-Zero Subtitles > Call this executable upon successful subtitle download (near the bottom):
  14. # /path/to/sub-clean.sh %(subtitle_path)s
  15. ## Test out what lines this script would remove:
  16. # REGEX_TO_REMOVE='(br|dvd|web).?(rip|scr)|english (- )?us|sdh|srt|(yahoo|mail|book|fb|4m|hd)\. ?com|(sub(title)?(bed)?(s)?(fix)?|encode(d)?|correct(ed|ion(s)?)|caption(s|ed)|sync(ed|hroniz(ation|ed))?|english)(.pr(esented|oduced))?.?(by|&)|[^a-z]www\.|http|\. ?(co|pl|link|org|net|mp4|mkv|avi|pdf)([^a-z]|$)|©|™'
  17. # REGEX_TO_REMOVE2='opensubtitles|sub(scene|rip)|podnapisi|addic7ed|titlovi|bozxphd|sazu489|psagmeno|normita|anoxmous|isubdb|americascardroom'
  18. # awk 'tolower($0) ~ '"/$REGEX_TO_REMOVE/" RS='' ORS='\n\n' "/path/to/sub.srt"
  19. # awk 'tolower($0) ~ '"/$REGEX_TO_REMOVE2/" RS='' ORS='\n\n' "/path/to/sub.srt"
  20.  
  21. # specify file ownership
  22. CHMOD=666
  23.  
  24. SUB_FILEPATH="$1"
  25.  
  26. # check usage
  27. [ ! -f "$SUB_FILEPATH" ] && { echo "usage: sub-clean.sh [FILE]" ; echo "Warning: subtitle file does not exist" ; exit 1 ; }
  28.  
  29. # RUN on Synology:
  30. # find /<PATH>/Movies/ -not -path "*/@eaDir/*" -not -path "*/#snapshot/*" -not -path "*/#recycle/*" -name "*.srt" -exec /<PATH>/sub-clean.sh "{}" \;
  31.  
  32. # lowercase list of regex (gore/magic?) that will be removed from srt
  33. REGEX_TO_REMOVE='(br|dvd|web).?(rip|scr)|english (- )?us|(yahoo|mail|book|fb|4m|hd)\. ?com\/|(sub(title)?(bed)?(s)?(fix)?|encode(d)?|correct(ed|ion(s)?)|caption(s|ed)|sync(ed|hroniz(ation|ed))?|english)(.pr(esented|oduced))?.?(by|&)|[^a-z]www\.|http|\.(com|pl|link|org|net|mp4|mkv|avi|pdf)([^a-z]|$)'
  34. # regex lists seperated for compatibility with old implementations of awk that require <400 characters
  35. REGEX_TO_REMOVE2='opensubtitles|sub(scene|rip)|podnapisi|addic7ed|titlovi|bozxphd|sazu489|psagmeno|normita|anoxmous|isubdb|americascardroom'
  36.  
  37. if [ "$(echo "$SUB_FILEPATH" | grep '\.srt$')" ] # only operate on srt files
  38. then
  39.  
  40. # convert any DOS formatted files to UNIX (remove carriage return line endings)
  41. awk '{ sub("\r$", ""); print }' "$SUB_FILEPATH" > "/tmp/sub-clean3.tmp" # && mv "${SUB_FILEPATH}.bak" "$SUB_FILEPATH"
  42.  
  43. ### each record (in awk) is defined as a block of srt formatted subs (record seperator RS is essentially \n\n+, see docs), with each line of the block a seperate field .i.e.:
  44. # LINE NUMBER
  45. # TIMESTAMP --> TIMESTAMP
  46. # SUB LINE 1
  47. # SUB LINE 2
  48. # ...
  49. #
  50.  
  51. awk 'tolower($0) !~ /'"$REGEX_TO_REMOVE"'/ { $1 = VAR++ ; print ; next } { print >> TRASH }' RS='' FS='\n' OFS='\n' ORS='\n\n' VAR=1 TRASH="/tmp/sub-clean.trash.tmp" "/tmp/sub-clean3.tmp" > "/tmp/sub-clean2.tmp" && \
  52. # mv "$SUB_FILEPATH.tmp" "$SUB_FILEPATH" && \
  53. awk 'tolower($0) !~ /'"$REGEX_TO_REMOVE2"'/ { $1 = VAR++ ; print ; next } { print >> TRASH }' RS='' FS='\n' OFS='\n' ORS='\n\n' VAR=1 TRASH="/tmp/sub-clean.trash.tmp" "/tmp/sub-clean2.tmp" > "/tmp/sub-clean.tmp" && \
  54. # ls -al "$SUB_FILEPATH" && \
  55. # ls -al "/tmp/sub-clean.tmp" && \
  56. [[ $(stat -c %s /tmp/sub-clean.tmp) != $(stat -c %s "$SUB_FILEPATH") ]] && mv "/tmp/sub-clean.tmp" "$SUB_FILEPATH"
  57. # chmod $CHMOD "$SUB_FILEPATH" && \
  58. echo "sub-clean.sh succesfully processed $SUB_FILEPATH"
  59.  
  60. if [ -f "/tmp/sub-clean.trash.tmp" ]
  61. then
  62.  
  63. REMOVED_LINES=$(cat "/tmp/sub-clean.trash.tmp")
  64. rm "/tmp/sub-clean.trash.tmp"
  65.  
  66. if [ "$REMOVED_LINES" ]
  67. then
  68. echo "The following lines were removed:"
  69. echo "$REMOVED_LINES"
  70. fi
  71. fi
  72. [ -f "/tmp/sub-clean3.tmp" ] && rm "/tmp/sub-clean3.tmp"
  73. [ -f "/tmp/sub-clean2.tmp" ] && rm "/tmp/sub-clean2.tmp"
  74. [ -f "/tmp/sub-clean.tmp" ] && rm "/tmp/sub-clean.tmp"
  75.  
  76. else
  77. echo "Provided file must be .srt"
  78. exit 1
  79. fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement