Advertisement
Guest User

Untitled

a guest
Oct 10th, 2020
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.72 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import os
  4. import subprocess
  5. import json
  6. import os.path
  7. from multiprocessing import Pool
  8. from collections import defaultdict
  9.  
  10. NEW_SRT_SUFFIX = ".jp.srt"
  11. os.chdir(os.path.dirname(os.path.realpath(__file__)))
  12. os.chdir('..')
  13. CONF = {'split_pen': '7'}
  14.  
  15. def get_lib_folder():
  16.     return "auto-sub-retimer"
  17.  
  18. def remove_styles(lines, styles):
  19.     res = []
  20.     for line in lines:
  21.         if line.startswith("Dialogue: "):
  22.             style = line.split(',')[3]
  23.             if style not in styles:
  24.                 continue
  25.         res.append(line)
  26.     return res
  27.  
  28. def count_style_occurences(lines, style):
  29.     res = 0
  30.     for line in lines:
  31.         if line.startswith("Dialogue: "):
  32.             line_style = line.split(',')[3]
  33.             if style == line_style:
  34.                 res += 1
  35.     return res
  36.  
  37. def get_example_lines(lines, style, sample=5):
  38.     res = []
  39.     for line in lines:
  40.         if line.startswith("Dialogue: "):
  41.             line_style = line.split(',')[3]
  42.             if style == line_style:
  43.                 res.append(line)
  44.                 sample -= 1
  45.                 if sample == 0:
  46.                     return res
  47.     return res
  48.  
  49. def get_styles(lines):
  50.     res = []
  51.     for line in lines:
  52.         if line.startswith('Style: '):
  53.             res.append(line[7:].split(',')[0])
  54.     return res
  55.  
  56. def select_keep_styles(all_styles, lines):
  57.     styles_occ = sorted([(s, count_style_occurences(lines, s)) for s in all_styles], key=lambda x: x[1], reverse=True)
  58.     for i, s in enumerate(styles_occ):
  59.         print("[{}] {} ({} lines use this style)".format(i, s[0], s[1]))
  60.         if i < 3:
  61.             print(''.join(get_example_lines(lines, s[0])))
  62.     keep_indices = input("Which sub styles to keep? Enter numbers separated by spaces or leave blank for all: ").split()
  63.     if not keep_indices:
  64.         return all_styles
  65.     return [s[0] for i, s in enumerate(styles_occ) if str(i) in keep_indices]
  66.  
  67. def extract_eng_subs(mkv, index=None):
  68.     print("\nExtracting english subtitles, this may take a while...\n")
  69.     mkv_json = json.loads(
  70.         subprocess.run(
  71.             [
  72.                 os.path.join(get_lib_folder(), "ffmpeg\\bin\\ffprobe.exe"),
  73.                 "-v",
  74.                 "quiet",
  75.                 "-print_format",
  76.                 "json",
  77.                 "-show_streams",
  78.                 "-select_streams",
  79.                 "s",
  80.                 mkv,
  81.             ],
  82.             capture_output=True,
  83.             universal_newlines=True,
  84.         ).stdout
  85.     )
  86.  
  87.     # Use the subtitle track as provided. If there is not one,
  88.     # ask instead.
  89.     if index is None:
  90.         if not mkv_json.get("streams"):
  91.             raise Exception("No subtitle streams to extract? Can't do any syncing. {}".format(mkv))
  92.         elif len(mkv_json["streams"]) == 1:
  93.             index = mkv_json["streams"][0]["index"]
  94.         else:
  95.             for s in mkv_json["streams"]:
  96.                 title = 'Unknown'
  97.                 try:
  98.                     title = s['tags']['title']
  99.                 except:
  100.                     pass
  101.                 print(f"{s['index']}: {s['tags']['title']}")
  102.             index = int(input("Pick the stream to retime against: "))
  103.  
  104.     # Extract
  105.     stream = {s["index"]: s for s in mkv_json["streams"]}[index]
  106.     extracted = mkv.replace(".mkv", f".EXTRACTED.{stream['codec_name']}")
  107.     subprocess.run([os.path.join(get_lib_folder(), "mkvtoolnix\\mkvextract.exe"), "tracks", mkv, f"{index}:{extracted}"])
  108.     return index
  109.  
  110. def fix_styling():
  111.     extracted_subs = [f for f in os.listdir() if '.EXTRACTED.' in f]
  112.     if extracted_subs[0].split('.')[-1] != 'ass':
  113.         print("Extracted subs are not .ASS format. Skipping style removal.")
  114.         return
  115.     all_lines = sum([open(f, encoding="utf-8").readlines() for f in extracted_subs], [])
  116.     all_styles = list(set(get_styles(all_lines)))
  117.     keep_styles = select_keep_styles(all_styles, all_lines)
  118.     for sub in extracted_subs:
  119.         with open(sub, encoding="utf-8") as ass:
  120.             lines = ass.readlines()
  121.         lines = remove_styles(lines, keep_styles)
  122.         with open(sub, 'w', encoding="utf-8") as ass:
  123.             ass.write('\r\n'.join(lines))
  124.  
  125. def retime_based_on_audio(mkv, srt):
  126.     retimed = mkv.replace(".mkv", ".jp.RETIMED.srt")
  127.     subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", CONF['split_pen'], mkv, srt, retimed])
  128.     os.remove(srt)
  129.     os.rename(retimed, mkv.replace(".mkv", NEW_SRT_SUFFIX))
  130.  
  131. def retime(mkv, srt):
  132.     retimed = mkv.replace(".mkv", ".jp.RETIMED.srt")
  133.     extracted = [f for f in os.listdir() if mkv.replace(".mkv", ".EXTRACTED.") in f][0]
  134.     subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", CONF['split_pen'], extracted, srt, retimed])
  135.     os.remove(srt)
  136.     os.rename(retimed, mkv.replace(".mkv", NEW_SRT_SUFFIX))
  137.     os.remove(extracted)
  138.  
  139. if __name__ == '__main__':
  140.     # Guess the local mkv/srt pairs that need syncing
  141.     files = os.listdir()
  142.     srts = sorted([f for f in files if f.endswith(".srt")])
  143.     mkvs = [f for f in files if f.endswith(".mkv")]
  144.     while len(mkvs) > len(srts):
  145.         print("Found more .mkv files than .srt files. This can occur when OP/ED/extras are in the same folder as episodes.")
  146.         print("Please enter a pattern to filter out of mkv results. (e.g. OP)")
  147.         pattern = input(">>> Pattern: ")
  148.         mkvs = [m for m in mkvs if pattern not in m]
  149.         removed = [m for m in mkvs if pattern in m]
  150.         print("Ignoring filtered mkv files: {}".format("\n".join(removed)))
  151.     mkvs = sorted([f for f in mkvs])
  152.     if len(mkvs) == 0 or len(mkvs) != len(srts):
  153.         print("ERROR: Some .srt files don't have a matching .mkv!")
  154.         print("{} srt files found. {} mkv files found".format(len(srts), len(mkvs)))
  155.         print("Ensure auto-sub-retimer folder is in same folder as your .mkv and .srt files.")
  156.         print("Ensure you have an equal number of .mkv and .srt files!")
  157.         input("press enter to exit...")
  158.         exit(1)
  159.     pool = Pool()
  160.     print("[1] Retime using embedded subs. (Default)")
  161.     print("[2] Retime using audio.")
  162.     retime_choice = input("Enter desired option number: ")
  163.     SPLIT_PEN = input("Split Penalty? Only change this if default resulted in mis-timed subs. (Default 7): ").strip()
  164.     if not SPLIT_PEN:
  165.         SPLIT_PEN = "7"
  166.     CONF['split_pen'] = SPLIT_PEN
  167.     if "2" in retime_choice:
  168.         pool.starmap(retime_based_on_audio, zip(mkvs, srts))
  169.     else:
  170.         index = extract_eng_subs(mkvs[0])
  171.         pool.starmap(extract_eng_subs, [(x, index) for x in mkvs])
  172.         print("English Sub Extraction Complete.")
  173.         fix_styling()
  174.         pool.starmap(retime, zip(mkvs, srts))
  175.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement