Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import os
- subtitle_dir = r"."
- """
- - Projde rekurzivně daný adresář a hledá soubory s koncovkou *.srt a *.sub.
- - Ty se snaží otevřít postupně v kódování windows-1250, utf-8-sig a utf-8.
- - Obsah načte (převede do utf-8) a zapíše zpět do původního souboru s
- kódováním utf-8-sig, což je BOM verze.
- """
- def readContent(fn, encodings=['utf-8-sig', 'windows-1250', 'utf-8']):
- for enc in encodings:
- try:
- with open(fn, "rt", encoding=enc) as f:
- return enc, f.readlines()
- except UnicodeDecodeError as e:
- continue
- raise UnicodeDecodeError('Unknown encoding: ' + fn)
- def writeContent(fn, lines, encoding='utf-8-sig'):
- with open(fn, "wt", encoding=encoding) as f:
- for line in lines:
- f.write(line)
- # ----------------------------------------------------------------------
- # MAIN
- # ----------------------------------------------------------------------
- if __name__ == '__main__':
- for root, dirs, files in os.walk(subtitle_dir):
- print(f"[{root}]")
- for file in files:
- src = os.path.join(root, file)
- fn, ext = os.path.splitext(src)
- if ext.lower() in (".srt", ".sub"):
- print(f"\t{file}")
- try:
- enc, lines = readContent(src)
- if enc != 'utf-8-sig':
- print(f"\t\tencoding: {enc}")
- writeContent(src, lines, 'utf-8-sig')
- except UnicodeDecodeError as e:
- print("\t\tunknown encoding - unchanged.")
- print()
- input("Press Enter to continue...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement