Hellerick_Ferlibay

Extract subtitles from each MKV file in the given directory

Dec 7th, 2014
317
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.61 KB | None | 0 0
  1. #!/bin/bash
  2. # Extract subtitles from each MKV file in the given directory
  3.  
  4. # If no directory is given, work in local dir
  5. if [ "$1" = "" ]; then
  6.   DIR="."
  7. else
  8.   DIR="$1"
  9. fi
  10.  
  11. # Get all the MKV files in this dir and its subdirs
  12. find "$DIR" -type f -name '*.mkv' | while read filename
  13. do
  14.   # Find out which tracks contain the subtitles
  15.   mkvmerge -i "$filename" | grep 'subtitles' | while read subline
  16.   do
  17.     # Grep the number of the subtitle track
  18.     tracknumber=`echo $subline | egrep -o "[0-9]{1,2}" | head -1`
  19.  
  20.     # Get base name for subtitle
  21.     subtitlename=${filename%.*}
  22.  
  23.     # Extract the track to a .tmp file
  24.     `mkvextract tracks "$filename" $tracknumber:"$subtitlename.srt.tmp" > /dev/null 2>&1`
  25.     `chmod g+rw "$subtitlename.srt.tmp"`
  26.  
  27.  
  28.     # Do a super-primitive language guess: ENGLISH
  29.     langtest=`egrep -ic ' you | to | the ' "$subtitlename".srt.tmp`
  30.     trimregex=""
  31.  
  32.  
  33.  
  34.     # Check if subtitle passes our language filter (10 or more matches)
  35.     if [ $langtest -ge 10 ]; then
  36.       # Regex to remove credits at the end of subtitles (read my reason why!)
  37.       `sed 's/\r//g' < "$subtitlename.srt.tmp" \
  38.         | sed 's/%/%%/g' \
  39.         | awk '{if (a){printf("\t")};printf $0; a=1; } /^$/{print ""; a=0;}' \
  40.         | grep -iv "$trimregex" \
  41.         | sed 's/\t/\r\n/g' > "$subtitlename.srt"`
  42.       `rm "$subtitlename.srt.tmp"`
  43.       `chmod g+rw "$subtitlename.srt"`
  44.     else
  45.       # Not our desired language: add a number to the filename and keep anyway, just in case
  46.       `mv "$subtitlename.srt.tmp" "$subtitlename.$tracknumber.srt" > /dev/null 2>&1`
  47.     fi
  48.   done
  49. done
Advertisement
Add Comment
Please, Sign In to add comment