SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/bin/bash | |
| 2 | # Extract subtitles from each MKV file in the given directory | |
| 3 | ||
| 4 | # If no directory is given, work in local dir | |
| 5 | if [ "$1" = "" ]; then | |
| 6 | DIR="." | |
| 7 | else | |
| 8 | DIR="$1" | |
| 9 | fi | |
| 10 | ||
| 11 | # Get all the MKV files in this dir and its subdirs | |
| 12 | find "$DIR" -type f -name '*.mkv' | while read filename | |
| 13 | do | |
| 14 | # Find out which tracks contain the subtitles | |
| 15 | mkvmerge -i "$filename" | grep 'subtitles' | while read subline | |
| 16 | do | |
| 17 | # Grep the number of the subtitle track | |
| 18 | tracknumber=`echo $subline | egrep -o "[0-9]{1,2}" | head -1`
| |
| 19 | ||
| 20 | # Get base name for subtitle | |
| 21 | subtitlename=${filename%.*}
| |
| 22 | ||
| 23 | # Extract the track to a .tmp file | |
| 24 | `mkvextract tracks "$filename" $tracknumber:"$subtitlename.srt.tmp" > /dev/null 2>&1` | |
| 25 | `chmod g+rw "$subtitlename.srt.tmp"` | |
| 26 | ||
| 27 | # Do a super-primitive language guess: DUTCH | |
| 28 | langtest=`egrep -ic ' ik | je | een ' "$subtitlename".srt.tmp` | |
| 29 | trimregex="vertaling &\|vertaling:\|vertaald door\|bierdopje" | |
| 30 | ||
| 31 | # Do a super-primitive language guess: ENGLISH | |
| 32 | #langtest=`egrep -ic ' you | to | the ' "$subtitlename".srt.tmp` | |
| 33 | #trimregex="" | |
| 34 | ||
| 35 | # Do a super-primitive language guess: GERMAN | |
| 36 | #langtest=`egrep -ic ' ich | ist | sie ' "$subtitlename".srt.tmp` | |
| 37 | #trimregex="" | |
| 38 | ||
| 39 | # Do a super-primitive language guess: SPANISH | |
| 40 | #langtest=`egrep -ic ' el | es | por ' "$subtitlename".srt.tmp` | |
| 41 | #trimregex="" | |
| 42 | ||
| 43 | # Check if subtitle passes our language filter (10 or more matches) | |
| 44 | if [ $langtest -ge 10 ]; then | |
| 45 | # Regex to remove credits at the end of subtitles (read my reason why!) | |
| 46 | `sed 's/\r//g' < "$subtitlename.srt.tmp" \ | |
| 47 | | sed 's/%/%%/g' \ | |
| 48 | | awk '{if (a){printf("\t")};printf $0; a=1; } /^$/{print ""; a=0;}' \
| |
| 49 | | grep -iv "$trimregex" \ | |
| 50 | - | | sed 's/\t/\r\n/g' > "$subtitlename.srt"` |
| 50 | + | | sed 's/\t/\r\n/g' > "$subtitlename.nl.srt"` |
| 51 | `rm "$subtitlename.srt.tmp"` | |
| 52 | `chmod g+rw "$subtitlename.srt"` | |
| 53 | else | |
| 54 | # Not our desired language: delete it | |
| 55 | `rm "$subtitlename.srt.tmp"` | |
| 56 | fi | |
| 57 | done | |
| 58 | done |