Advertisement
edfhgjkdc

scribe-json-to-ass

Apr 2nd, 2025 (edited)
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.31 KB | None | 0 0
  1. # ver.4 (Apr 18, 2025)
  2.  
  3. import msgspec
  4. import pysubs2
  5.  
  6. path = r"MIRU\ep3"
  7. max_word_duration = 1  # in seconds
  8.  
  9. punc1 = ["。", "!", "?", ".", "!", "?"]
  10. punc2 = ["、", ",", "、", ","]
  11. punc3 = ["…"]
  12. punc = punc1 + punc2
  13.  
  14. scribe = msgspec.json.decode(open(rf"{path}\scribe.json", "rb").read())
  15. # words = sum([segment["words"] for segment in scribe["segments"]], [])  # web ui
  16. words = scribe["words"]  # api
  17.  
  18.  
  19. def events_to_string(events):
  20.     ssa_file = pysubs2.SSAFile()
  21.     ssa_file.events = events
  22.     return (
  23.         ssa_file.to_string(format_="ass")
  24.         .split("\n[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")[1]
  25.         .strip()
  26.     )
  27.  
  28.  
  29. subs = pysubs2.SSAFile()
  30. style = pysubs2.SSAStyle(
  31.     fontname="IPAexGothic",
  32.     fontsize=40,
  33.     primarycolor="&H00FFFFFF",
  34.     secondarycolor="&H00FFD500",
  35.     outlinecolor="&H00000000",
  36.     backcolor="&H004A4A4A",
  37.     bold=True,
  38.     alignment=2,
  39.     shadow=0,
  40. )
  41. subs.styles["ja"] = style
  42. subs.info["PlayResX"] = "1920"
  43. subs.info["PlayResY"] = "1080"
  44.  
  45. current_line = []
  46.  
  47. words = [w for w in words if w["type"] != "audio_event"]
  48. for word in words:
  49.     word["text"] = word["text"].replace("...", "…")
  50.  
  51. # if there are multiple words with text "・" in a row, replace them with a combined word with text "…" and the start time of the first word and end time of the last word. similar for "-"
  52. # ------------------------------------------
  53. i = 0
  54. while i < len(words) - 1:
  55.     if words[i]["text"] == "・" and words[i + 1]["text"] == "・":
  56.         start_idx = i
  57.         end_idx = i
  58.         while end_idx < len(words) - 1 and words[end_idx + 1]["text"] == "・":
  59.             end_idx += 1
  60.  
  61.         combined_word = dict(words[i])
  62.         combined_word["text"] = "…"
  63.         combined_word["start"] = words[start_idx]["start"]
  64.         combined_word["end"] = words[end_idx]["end"]
  65.  
  66.         words[start_idx : end_idx + 1] = [combined_word]
  67.         # no need to increment i since we've modified the list
  68.     else:
  69.         i += 1
  70.  
  71. i = 0
  72. while i < len(words) - 1:
  73.     if words[i]["text"] == "-" and words[i + 1]["text"] == "-":
  74.         start_idx = i
  75.         end_idx = i
  76.         while end_idx < len(words) - 1 and words[end_idx + 1]["text"] == "-":
  77.             end_idx += 1
  78.  
  79.         combined_word = dict(words[i])
  80.         combined_word["text"] = "——"
  81.         combined_word["start"] = words[start_idx]["start"]
  82.         combined_word["end"] = words[end_idx]["end"]
  83.  
  84.         words[start_idx : end_idx + 1] = [combined_word]
  85.         # no need to increment i since we've modified the list
  86.     else:
  87.         i += 1
  88. # ------------------------------------------
  89.  
  90. # iterate over all word in words. check if any punctation appears not in the end of word["text"] if word["type"]!="audio_event" (and word["text"]>1). for example, if word["text"] is "aaa.bbb", split it into 3 word dictionaries with text "aaa", ".", "bbb", corrsponding start and end time inferred from text length (punctation seen as taking zero time)
  91. # ------------------------------------------
  92. processed_words = []
  93.  
  94. for word in words:
  95.     if word["type"] == "audio_event" or len(word["text"]) <= 1:
  96.         processed_words.append(word)
  97.         continue
  98.  
  99.     text = word["text"]
  100.     segments = []
  101.     current_segment = ""
  102.  
  103.     for char in text:
  104.         if char in punc:
  105.             if current_segment:
  106.                 segments.append(current_segment)
  107.             segments.append(char)
  108.             current_segment = ""
  109.         else:
  110.             current_segment += char
  111.  
  112.     if current_segment:
  113.         segments.append(current_segment)
  114.  
  115.     if len(segments) <= 1:
  116.         processed_words.append(word)
  117.         continue
  118.  
  119.     non_punct_segments = [seg for seg in segments if seg not in punc]
  120.     total_length = sum(len(seg) for seg in non_punct_segments) if non_punct_segments else 0
  121.     start_time = word["start"]
  122.     total_duration = word["end"] - word["start"]
  123.  
  124.     for segment in segments:
  125.         new_word = dict(word)
  126.         new_word["text"] = segment
  127.  
  128.         if segment in punc:
  129.             new_word["start"] = start_time
  130.             new_word["end"] = start_time
  131.         else:
  132.             segment_duration = total_duration * (len(segment) / total_length) if total_length > 0 else 0
  133.             end_time = start_time + segment_duration
  134.             new_word["start"] = start_time
  135.             new_word["end"] = end_time
  136.             start_time = end_time
  137.  
  138.         processed_words.append(new_word)
  139.  
  140. words = processed_words
  141. # ------------------------------------------
  142.  
  143. flag_dict = {}
  144.  
  145. for i, word in enumerate(words):
  146.     current_line.append(word)
  147.  
  148.     # if we encounter punctuation that suggests end of line, or speaker changes, or it's the last word
  149.     if (
  150.         (word["text"][-1] in punc1 + punc3)
  151.         or (word["text"][-1] in punc2 and sum(len(i["text"]) for i in current_line) >= 15)
  152.         or (i < len(words) - 1 and word["speaker_id"] != words[i + 1]["speaker_id"])
  153.         or word == words[-1]
  154.     ):
  155.         normal_text = ""
  156.         karaoke_text = ""
  157.         speaker_id = current_line[0].get("speaker_id", "")
  158.  
  159.         duration_list = []
  160.  
  161.         for w in current_line:
  162.             while len(w["text"]) > 0 and w["text"][-1] in punc1:
  163.                 w["text"] = w["text"][:-1]
  164.             for p in punc2:
  165.                 w["text"] = w["text"].replace(p, " ")
  166.  
  167.         # if there are multiple words with text being space (" " or " ") in a row, replace them with a combined word with text " " and the start time of the first word and end time of the last word.
  168.         # ------------------------------------------
  169.         j = 0
  170.         while j < len(current_line) - 1:
  171.             if current_line[j]["text"] in [" ", " "] and current_line[j + 1]["text"] in [" ", " "]:
  172.                 start_idx2 = j
  173.                 end_idx2 = j
  174.                 while end_idx2 < len(current_line) - 1 and current_line[end_idx2 + 1]["text"] in [" ", " "]:
  175.                     end_idx2 += 1
  176.  
  177.                 combined_space = dict(current_line[start_idx2])
  178.                 combined_space["text"] = " "
  179.                 combined_space["start"] = current_line[start_idx2]["start"]
  180.                 combined_space["end"] = current_line[end_idx2]["end"]
  181.  
  182.                 current_line[start_idx2 : end_idx2 + 1] = [combined_space]
  183.             else:
  184.                 j += 1
  185.         # ------------------------------------------
  186.  
  187.         while current_line and not current_line[0]["text"].strip():
  188.             current_line = current_line[1:]
  189.         while current_line and not current_line[-1]["text"].strip():
  190.             current_line = current_line[:-1]
  191.  
  192.         if (
  193.             current_line
  194.             and sum(len(w["text"].strip()) for w in current_line) > 0
  195.             and current_line[-1]["end"] - current_line[0]["start"] > 0
  196.         ):
  197.             if current_line[0]["end"] - current_line[0]["start"] > max_word_duration:
  198.                 current_line[0]["start"] = current_line[0]["end"] - max_word_duration
  199.  
  200.             if current_line[-1]["end"] - current_line[-1]["start"] > max_word_duration:
  201.                 current_line[-1]["end"] = current_line[-1]["start"] + max_word_duration
  202.  
  203.             for w in current_line:
  204.                 duration = w["end"] - w["start"]
  205.                 duration_list.append(duration)
  206.                 normal_text += f"{w['text']}"
  207.                 karaoke_text += f"{{\\kf{round(duration * 100)}}}{w['text']}"
  208.  
  209.             if normal_text.strip():
  210.                 event = pysubs2.SSAEvent(
  211.                     start=current_line[0]["start"] * 1000,
  212.                     end=current_line[-1]["end"] * 1000,
  213.                     text=karaoke_text.strip(),
  214.                     style="ja",
  215.                     name=speaker_id.lstrip("Speaker ").lstrip("speaker_"),
  216.                 )
  217.                 subs.append(event)
  218.                 if max(duration_list) > max_word_duration:
  219.                     if len(subs) - 1 not in flag_dict:
  220.                         flag_dict[len(subs) - 1] = []
  221.                     flag_dict[len(subs) - 1].append("duration")
  222.  
  223.         current_line = []
  224.  
  225. for i, e in enumerate(subs):
  226.     if i in flag_dict:
  227.         print(f"[{i + 1}] {' '.join(flag_dict[i])}\n{events_to_string([e])}")
  228.         pass
  229.  
  230. subs.save(rf"{path}\scribe.ass")
  231. subs.save(rf"{path}\scribe.srt")
  232.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement