Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import os
- import subprocess
- import json
- import os.path
- from multiprocessing import Pool
- from collections import defaultdict
- NEW_SRT_SUFFIX = ".jp.srt"
- os.chdir(os.path.dirname(os.path.realpath(__file__)))
- os.chdir('..')
- CONF = {'split_pen': '7'}
- def get_lib_folder():
- return "auto-sub-retimer"
- def remove_styles(lines, styles):
- res = []
- for line in lines:
- if line.startswith("Dialogue: "):
- style = line.split(',')[3]
- if style not in styles:
- continue
- res.append(line)
- return res
- def count_style_occurences(lines, style):
- res = 0
- for line in lines:
- if line.startswith("Dialogue: "):
- line_style = line.split(',')[3]
- if style == line_style:
- res += 1
- return res
- def get_example_lines(lines, style, sample=5):
- res = []
- for line in lines:
- if line.startswith("Dialogue: "):
- line_style = line.split(',')[3]
- if style == line_style:
- res.append(line)
- sample -= 1
- if sample == 0:
- return res
- return res
- def get_styles(lines):
- res = []
- for line in lines:
- if line.startswith('Style: '):
- res.append(line[7:].split(',')[0])
- return res
- def select_keep_styles(all_styles, lines):
- styles_occ = sorted([(s, count_style_occurences(lines, s)) for s in all_styles], key=lambda x: x[1], reverse=True)
- for i, s in enumerate(styles_occ):
- print("[{}] {} ({} lines use this style)".format(i, s[0], s[1]))
- if i < 3:
- print(''.join(get_example_lines(lines, s[0])))
- keep_indices = input("Which sub styles to keep? Enter numbers separated by spaces or leave blank for all: ").split()
- if not keep_indices:
- return all_styles
- return [s[0] for i, s in enumerate(styles_occ) if str(i) in keep_indices]
- def extract_eng_subs(mkv, index=None):
- print("\nExtracting english subtitles, this may take a while...\n")
- mkv_json = json.loads(
- subprocess.run(
- [
- os.path.join(get_lib_folder(), "ffmpeg\\bin\\ffprobe.exe"),
- "-v",
- "quiet",
- "-print_format",
- "json",
- "-show_streams",
- "-select_streams",
- "s",
- mkv,
- ],
- capture_output=True,
- universal_newlines=True,
- ).stdout
- )
- # Use the subtitle track as provided. If there is not one,
- # ask instead.
- if index is None:
- if not mkv_json.get("streams"):
- raise Exception("No subtitle streams to extract? Can't do any syncing. {}".format(mkv))
- elif len(mkv_json["streams"]) == 1:
- index = mkv_json["streams"][0]["index"]
- else:
- for s in mkv_json["streams"]:
- title = 'Unknown'
- try:
- title = s['tags']['title']
- except:
- pass
- print(f"{s['index']}: {s['tags']['title']}")
- index = int(input("Pick the stream to retime against: "))
- # Extract
- stream = {s["index"]: s for s in mkv_json["streams"]}[index]
- extracted = mkv.replace(".mkv", f".EXTRACTED.{stream['codec_name']}")
- subprocess.run([os.path.join(get_lib_folder(), "mkvtoolnix\\mkvextract.exe"), "tracks", mkv, f"{index}:{extracted}"])
- return index
- def fix_styling():
- extracted_subs = [f for f in os.listdir() if '.EXTRACTED.' in f]
- if extracted_subs[0].split('.')[-1] != 'ass':
- print("Extracted subs are not .ASS format. Skipping style removal.")
- return
- all_lines = sum([open(f, encoding="utf-8").readlines() for f in extracted_subs], [])
- all_styles = list(set(get_styles(all_lines)))
- keep_styles = select_keep_styles(all_styles, all_lines)
- for sub in extracted_subs:
- with open(sub, encoding="utf-8") as ass:
- lines = ass.readlines()
- lines = remove_styles(lines, keep_styles)
- with open(sub, 'w', encoding="utf-8") as ass:
- ass.write('\r\n'.join(lines))
- def retime_based_on_audio(mkv, srt):
- retimed = mkv.replace(".mkv", ".jp.RETIMED.srt")
- subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", CONF['split_pen'], mkv, srt, retimed])
- os.remove(srt)
- os.rename(retimed, mkv.replace(".mkv", NEW_SRT_SUFFIX))
- def retime(mkv, srt):
- retimed = mkv.replace(".mkv", ".jp.RETIMED.srt")
- extracted = [f for f in os.listdir() if mkv.replace(".mkv", ".EXTRACTED.") in f][0]
- subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", CONF['split_pen'], extracted, srt, retimed])
- os.remove(srt)
- os.rename(retimed, mkv.replace(".mkv", NEW_SRT_SUFFIX))
- os.remove(extracted)
- if __name__ == '__main__':
- # Guess the local mkv/srt pairs that need syncing
- files = os.listdir()
- srts = sorted([f for f in files if f.endswith(".srt")])
- mkvs = [f for f in files if f.endswith(".mkv")]
- while len(mkvs) > len(srts):
- print("Found more .mkv files than .srt files. This can occur when OP/ED/extras are in the same folder as episodes.")
- print("Please enter a pattern to filter out of mkv results. (e.g. OP)")
- pattern = input(">>> Pattern: ")
- mkvs = [m for m in mkvs if pattern not in m]
- removed = [m for m in mkvs if pattern in m]
- print("Ignoring filtered mkv files: {}".format("\n".join(removed)))
- mkvs = sorted([f for f in mkvs])
- if len(mkvs) == 0 or len(mkvs) != len(srts):
- print("ERROR: Some .srt files don't have a matching .mkv!")
- print("{} srt files found. {} mkv files found".format(len(srts), len(mkvs)))
- print("Ensure auto-sub-retimer folder is in same folder as your .mkv and .srt files.")
- print("Ensure you have an equal number of .mkv and .srt files!")
- input("press enter to exit...")
- exit(1)
- pool = Pool()
- print("[1] Retime using embedded subs. (Default)")
- print("[2] Retime using audio.")
- retime_choice = input("Enter desired option number: ")
- SPLIT_PEN = input("Split Penalty? Only change this if default resulted in mis-timed subs. (Default 7): ").strip()
- if not SPLIT_PEN:
- SPLIT_PEN = "7"
- CONF['split_pen'] = SPLIT_PEN
- if "2" in retime_choice:
- pool.starmap(retime_based_on_audio, zip(mkvs, srts))
- else:
- index = extract_eng_subs(mkvs[0])
- pool.starmap(extract_eng_subs, [(x, index) for x in mkvs])
- print("English Sub Extraction Complete.")
- fix_styling()
- pool.starmap(retime, zip(mkvs, srts))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement