Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from typing import List
- import json
- class Subtitle:
- def __init__(self, start: float, end: float, text: str):
- self.start = start
- self.end = end
- self.text = text
- def parse_srt(file_path: str) -> List[Subtitle]:
- def timestamp_to_seconds(timestamp: str) -> float:
- hours, minutes, seconds = re.split("[:,]", timestamp)
- return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- blocks = re.split(r'\n\s*\n', content.strip())
- subtitles = []
- for block in blocks:
- lines = block.strip().splitlines()
- if len(lines) >= 3:
- timing_line = lines[1]
- start_str, end_str = re.split(r'\s*-->\s*', timing_line)
- start = timestamp_to_seconds(start_str.replace(',', '.'))
- end = timestamp_to_seconds(end_str.replace(',', '.'))
- text = '\\n'.join(lines[2:])
- subtitles.append(Subtitle(start, end, text))
- return subtitles
Advertisement
Add Comment
Please, Sign In to add comment