Advertisement
Guest User

Python srt to csv converter

a guest
May 17th, 2013
3,046
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. #This script converts .srt subtitle files to .csv files with timestamp,
  2. #subtitle number, and utterance stored as separate variables
  3. #I only sort of know Python, so sorry in advance.
  4.  
  5. import csv
  6. import os
  7. import string
  8.  
  9. filePath = "./"
  10. dirList = os.listdir(filePath)
  11.  
  12. for filename in dirList:
  13.     timestamps = ["timestamp"] #the complete timestamp of each subtitle
  14.     timeStarts = ["start time"] #when each subtitle appears on screen
  15.     timeEnds = ["end time"] #when each subtitle is removed from the screen
  16.     subtitleIndices = ["index"]
  17.     utterances = ["utterance"]
  18.     extension = str(os.path.splitext(filename)[1])
  19.     root = str(os.path.splitext(filename)[0])
  20.     if extension != ".srt": #skip files other than subtitle files
  21.         continue
  22.     currentFile = open(filename)
  23.     prevLineWasUtt = False #was the most recent line looked at an utterance?
  24.     for line in currentFile: #populate the lists that will be put in the csv
  25.         line = line.strip() #get rid of whitespace characters on end of line
  26.         if line.find("-") == 0: #prevents Excel from viewing entry as formula
  27.             line = "\'" + line
  28.         if line.isdigit():
  29.             subtitleIndices.append(line)
  30.         elif "-->" in line:
  31.             timestamps.append(line)
  32.             timeStarts.append(line[:8])
  33.             timeEnds.append(line[17:25])
  34.         elif len(line) > 1:
  35.             if prevLineWasUtt:
  36.                 utterances[len(utterances)-1] = utterances[len(utterances)-1] + " " + line
  37.             else:
  38.                 utterances.append(line)
  39.             prevLineWasUtt = True
  40.             continue
  41.         prevLineWasUtt = False     
  42.     #put all the lists in a csv file with the same name as the subtitle file
  43.     csvWriter = csv.writer(file(root + ".csv",'wb'), dialect = 'excel')
  44.     csvWriter.writerows([subtitleIndices, timestamps, timeStarts, timeEnds, utterances])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement