Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import codecs
- def merge_subtitle_lines(input_file, output_file, max_characters):
- with codecs.open(input_file, 'r', encoding='utf-8-sig') as f:
- content = f.readlines()
- new_content = []
- buffer = []
- for line in content:
- if re.match(r'^\d+$', line.strip()):
- if buffer:
- merged_line = ' '.join(buffer)
- if len(merged_line) <= max_characters:
- new_content.append(merged_line + '\n')
- else:
- new_content.extend(buffer)
- new_content.append('\n')
- buffer = []
- new_content.append(line)
- elif re.match(r'^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$', line.strip()):
- new_content.append(line)
- elif line.strip():
- if not buffer:
- buffer.append(line.strip())
- elif len(' '.join(buffer + [line.strip()])) <= max_characters:
- buffer.append(line.strip())
- else:
- new_content.extend(buffer)
- new_content.append('\n')
- buffer = [line.strip()]
- else:
- if buffer:
- new_content.extend(buffer)
- new_content.append('\n')
- new_content.append(line)
- buffer = []
- if buffer:
- new_content.extend(buffer)
- new_content.append('\n')
- with open(output_file, 'w', encoding='utf-8') as f:
- f.writelines(new_content)
- input_file = "E:/Desktop/testsub/16.Sub_ING.srt"
- output_file = "E:/Desktop/testsub/out/16.Sub_ING1.srt"
- max_characters = 56
- merge_subtitle_lines(input_file, output_file, max_characters)
Advertisement
Advertisement