hhhzzzsss

srt parser

Jul 26th, 2025 (edited)
491
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.05 KB | None | 0 0
  1. import re
  2. from typing import List
  3. import json
  4.  
  5. class Subtitle:
  6.     def __init__(self, start: float, end: float, text: str):
  7.         self.start = start
  8.         self.end = end
  9.         self.text = text
  10.  
  11. def parse_srt(file_path: str) -> List[Subtitle]:
  12.     def timestamp_to_seconds(timestamp: str) -> float:
  13.         hours, minutes, seconds = re.split("[:,]", timestamp)
  14.         return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
  15.  
  16.     with open(file_path, 'r', encoding='utf-8') as f:
  17.         content = f.read()
  18.  
  19.     blocks = re.split(r'\n\s*\n', content.strip())
  20.     subtitles = []
  21.  
  22.     for block in blocks:
  23.         lines = block.strip().splitlines()
  24.         if len(lines) >= 3:
  25.             timing_line = lines[1]
  26.             start_str, end_str = re.split(r'\s*-->\s*', timing_line)
  27.             start = timestamp_to_seconds(start_str.replace(',', '.'))
  28.             end = timestamp_to_seconds(end_str.replace(',', '.'))
  29.             text = '\\n'.join(lines[2:])
  30.             subtitles.append(Subtitle(start, end, text))
  31.  
  32.     return subtitles
Advertisement
Add Comment
Please, Sign In to add comment