Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Turn the .srt files into .txt files
- 1. unzip the lesson files into a directory
- 2. run this code in that directory.
- the code will strip out the timestamps and carriage returns
- and produce a text file that you can use for reference and class notes.
- """
- import re
- from os import listdir, getcwd
- from os.path import isfile, join
- currpath = getcwd()
- thefiles = [f for f in listdir(currpath) if isfile(join(currpath, f))]
- for f in thefiles:
- file = open(f, mode='r', encoding = "ISO-8859-1")
- # read it all
- raw_text = file.read()
- raw_text = re.sub('^\d.*?$', '', raw_text, flags=re.M|re.S)
- raw_text = re.sub('\n\n\n\n', ' ', raw_text, flags=re.M|re.S)
- fixed = open(f.replace('.srt', '.txt') , mode='w')
- fixed.write(raw_text)
- print('done')
Add Comment
Please, Sign In to add comment