Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Tries to fix a srt file to match the sub time with the actual dialog time.
- # Inputs:
- # - the srt file
- # - a list of (srt time, spoken dialog time) pairs that will be used
- # to infer the parameters for the srt time adjustment
- # Outputs:
- # - the corrected srt file to standard output
- # - the correction parameters to standard error
- import sys
- import datetime
- import re
- import math
- class SrtEntry(object):
- def __init__(self, number, startTime, endTime, text):
- self.number = number
- self.startTime = startTime
- self.endTime = endTime
- self.text = text
- def __repr__(self):
- return "SrtEntry(%d, %lf, %lf, %s)" % (self.number,
- self.startTime,
- self.endTime,
- repr(self.text))
- def __str__(self):
- def breakTime(t):
- h = int(t / 3600)
- m = int((t - h * 3600) / 60)
- s = int(t - h * 3600 - m * 60)
- ms = int((t - math.floor(t)) * 1000)
- return [h,m,s,ms]
- args = [self.number] + breakTime(self.startTime) + breakTime(self.endTime) + [self.text]
- return "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\n%s\n" % tuple(args)
- def parseSubtitle(lines):
- result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]]))
- pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)'
- match = re.search(pattern, lines[1])
- result.startTime = int(match.group(1)) * 3600 \
- + int(match.group(2)) * 60 \
- + int(match.group(3)) \
- + int(match.group(4)) / 1000.0
- result.endTime = int(match.group(5)) * 3600 \
- + int(match.group(6)) * 60 \
- + int(match.group(7)) \
- + int(match.group(8)) / 1000.0
- return result
- def parseFile(fileName):
- f = file(fileName)
- content = f.readlines()
- f.close()
- subtitles = []
- currentSubtitle = []
- for line in content:
- if line.strip() == "":
- if len(currentSubtitle) > 0:
- subtitles.append(currentSubtitle)
- currentSubtitle = []
- else:
- currentSubtitle.append(line)
- if len(currentSubtitle) > 0:
- subtitles.append(currentSubtitle)
- return [parseSubtitle(lines) for lines in subtitles]
- def parseOneCorrection(line):
- pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)'
- match = re.search(pattern, line)
- srtTime = int(match.group(1)) * 3600 \
- + int(match.group(2)) * 60 \
- + int(match.group(3)) \
- + int(match.group(4)) / 1000.0
- correctTime = int(match.group(5)) * 3600 \
- + int(match.group(6)) * 60 \
- + int(match.group(7)) \
- + int(match.group(8)) / 1000.0
- return (srtTime, correctTime)
- def parseCorrections(fileName):
- f = file(fileName)
- content = f.readlines()
- f.close()
- return [parseOneCorrection(line) for line in content if line.strip != ""]
- def leastSquares(corrections):
- sum_x=0
- sum_y=0
- sum_xx=0
- sum_xy=0
- for (x, y) in corrections:
- sum_x = sum_x + x
- sum_y = sum_y + y
- xx = math.pow(x, 2)
- sum_xx = sum_xx + xx
- xy = x*y
- sum_xy = sum_xy + xy
- n = len(corrections)
- b = (-sum_x * sum_xy + sum_xx * sum_y) / (n * sum_xx-sum_x * sum_x)
- a = (-sum_x * sum_y + n * sum_xy) / (n * sum_xx-sum_x * sum_x)
- return (a, b)
- def processSub(sub, a, b):
- return SrtEntry(sub.number,
- sub.startTime * a + b,
- sub.endTime * a + b,
- sub.text)
- if __name__ == "__main__":
- subs = parseFile(sys.argv[1])
- corrections = parseCorrections(sys.argv[2])
- a, b = leastSquares(corrections)
- sys.stderr.write("%lf, %lf\n" % (a, b))
- fixedSubs = [processSub(sub, a, b) for sub in subs]
- for sub in fixedSubs:
- print sub
Add Comment
Please, Sign In to add comment