Guest User

Untitled

a guest
Nov 12th, 2018
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.75 KB | None | 0 0
  1. """
  2. Turn the .srt files into .txt files
  3. 1. unzip the lesson files into a directory
  4. 2. run this code in that directory.
  5. the code will strip out the timestamps and carriage returns
  6. and produce a text file that you can use for reference and class notes.
  7. """
  8. import re
  9. from os import listdir, getcwd
  10. from os.path import isfile, join
  11.  
  12. currpath = getcwd()
  13. thefiles = [f for f in listdir(currpath) if isfile(join(currpath, f))]
  14.  
  15. for f in thefiles:
  16. file = open(f, mode='r', encoding = "ISO-8859-1")
  17. # read it all
  18. raw_text = file.read()
  19. raw_text = re.sub('^\d.*?$', '', raw_text, flags=re.M|re.S)
  20. raw_text = re.sub('\n\n\n\n', ' ', raw_text, flags=re.M|re.S)
  21. fixed = open(f.replace('.srt', '.txt') , mode='w')
  22. fixed.write(raw_text)
  23.  
  24. print('done')
Add Comment
Please, Sign In to add comment