Guest User

Untitled

a guest
Jul 19th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.99 KB | None | 0 0
  1. # Tries to fix a srt file to match the sub time with the actual dialog time.
  2. # Inputs:
  3. # - the srt file
  4. # - a list of (srt time, spoken dialog time) pairs that will be used
  5. # to infer the parameters for the srt time adjustment
  6. # Outputs:
  7. # - the corrected srt file to standard output
  8. # - the correction parameters to standard error
  9.  
  10. import sys
  11. import datetime
  12. import re
  13. import math
  14.  
  15. class SrtEntry(object):
  16. def __init__(self, number, startTime, endTime, text):
  17. self.number = number
  18. self.startTime = startTime
  19. self.endTime = endTime
  20. self.text = text
  21.  
  22. def __repr__(self):
  23. return "SrtEntry(%d, %lf, %lf, %s)" % (self.number,
  24. self.startTime,
  25. self.endTime,
  26. repr(self.text))
  27.  
  28. def __str__(self):
  29. def breakTime(t):
  30. h = int(t / 3600)
  31. m = int((t - h * 3600) / 60)
  32. s = int(t - h * 3600 - m * 60)
  33. ms = int((t - math.floor(t)) * 1000)
  34. return [h,m,s,ms]
  35.  
  36. args = [self.number] + breakTime(self.startTime) + breakTime(self.endTime) + [self.text]
  37. return "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\n%s\n" % tuple(args)
  38.  
  39. def parseSubtitle(lines):
  40. result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]]))
  41. pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)'
  42. match = re.search(pattern, lines[1])
  43. result.startTime = int(match.group(1)) * 3600 \
  44. + int(match.group(2)) * 60 \
  45. + int(match.group(3)) \
  46. + int(match.group(4)) / 1000.0
  47. result.endTime = int(match.group(5)) * 3600 \
  48. + int(match.group(6)) * 60 \
  49. + int(match.group(7)) \
  50. + int(match.group(8)) / 1000.0
  51. return result
  52.  
  53. def parseFile(fileName):
  54. f = file(fileName)
  55. content = f.readlines()
  56. f.close()
  57. subtitles = []
  58. currentSubtitle = []
  59. for line in content:
  60. if line.strip() == "":
  61. if len(currentSubtitle) > 0:
  62. subtitles.append(currentSubtitle)
  63. currentSubtitle = []
  64. else:
  65. currentSubtitle.append(line)
  66. if len(currentSubtitle) > 0:
  67. subtitles.append(currentSubtitle)
  68. return [parseSubtitle(lines) for lines in subtitles]
  69.  
  70. def parseOneCorrection(line):
  71. pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)'
  72. match = re.search(pattern, line)
  73. srtTime = int(match.group(1)) * 3600 \
  74. + int(match.group(2)) * 60 \
  75. + int(match.group(3)) \
  76. + int(match.group(4)) / 1000.0
  77. correctTime = int(match.group(5)) * 3600 \
  78. + int(match.group(6)) * 60 \
  79. + int(match.group(7)) \
  80. + int(match.group(8)) / 1000.0
  81. return (srtTime, correctTime)
  82.  
  83. def parseCorrections(fileName):
  84. f = file(fileName)
  85. content = f.readlines()
  86. f.close()
  87. return [parseOneCorrection(line) for line in content if line.strip != ""]
  88.  
  89. def leastSquares(corrections):
  90. sum_x=0
  91. sum_y=0
  92. sum_xx=0
  93. sum_xy=0
  94. for (x, y) in corrections:
  95. sum_x = sum_x + x
  96. sum_y = sum_y + y
  97. xx = math.pow(x, 2)
  98. sum_xx = sum_xx + xx
  99. xy = x*y
  100. sum_xy = sum_xy + xy
  101. n = len(corrections)
  102. b = (-sum_x * sum_xy + sum_xx * sum_y) / (n * sum_xx-sum_x * sum_x)
  103. a = (-sum_x * sum_y + n * sum_xy) / (n * sum_xx-sum_x * sum_x)
  104. return (a, b)
  105.  
  106. def processSub(sub, a, b):
  107. return SrtEntry(sub.number,
  108. sub.startTime * a + b,
  109. sub.endTime * a + b,
  110. sub.text)
  111.  
  112. if __name__ == "__main__":
  113. subs = parseFile(sys.argv[1])
  114. corrections = parseCorrections(sys.argv[2])
  115. a, b = leastSquares(corrections)
  116. sys.stderr.write("%lf, %lf\n" % (a, b))
  117. fixedSubs = [processSub(sub, a, b) for sub in subs]
  118. for sub in fixedSubs:
  119. print sub
Add Comment
Please, Sign In to add comment