Guest User

Untitled

a guest
Dec 17th, 2017
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.05 KB | None | 0 0
  1. import tkinter as tk
  2. from tkinter import ttk, filedialog
  3. import wave
  4. import webrtcvad
  5. import collections
  6. import contextlib
  7. import sys
  8. import speech_recognition as sr
  9. import subprocess
  10. import os
  11. import threading
  12. import multiprocessing as mp
  13. import time
  14. import io
  15.  
  16. AV_FORMAT = ['mp4', 'avi', 'mkv', 'wmv', 'flac', 'wav', 'mp3', 'aac', 'ac3', 'rmvb', 'flv', 'ts', 'mts']
  17. FFMPEG_PATH = 'bin/ffmpeg.exe'
  18. SAMPLE_RATE = 32000
  19. LENGTH_CAP = 10
  20. FRAME_DURATION = 30
  21. PADDING_DURATION = 300
  22. MAX_SEGMENT_DURATION = 5000 # ms
  23. LANG = 'zh-TW'
  24.  
  25.  
  26. def read_wave(path):
  27. with contextlib.closing(wave.open(path, 'rb')) as wf:
  28. num_channels = wf.getnchannels()
  29. assert num_channels == 1
  30. sample_width = wf.getsampwidth()
  31. assert sample_width == 2
  32. sample_rate = wf.getframerate()
  33. assert sample_rate in (8000, 16000, 32000)
  34. pcm_data = wf.readframes(wf.getnframes())
  35. return pcm_data, sample_rate
  36.  
  37.  
  38. class Frame(object):
  39. def __init__(self, bytes, timestamp, duration):
  40. self.bytes = bytes
  41. self.timestamp = timestamp
  42. self.duration = duration
  43.  
  44.  
  45. def frame_generator(frame_duration_ms, audio, sample_rate):
  46. n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
  47. offset = 0
  48. timestamp = 0.0
  49. duration = (float(n) / sample_rate) / 2.0
  50. while offset + n < len(audio):
  51. yield Frame(audio[offset:offset + n], timestamp, duration)
  52. timestamp += duration
  53. offset += n
  54.  
  55.  
  56. def vad_collector(sample_rate, frame_duration_ms,
  57. padding_duration_ms, vad, frames, max_frame_duration_ms):
  58. num_padding_frames = int(padding_duration_ms / frame_duration_ms)
  59. ring_buffer = collections.deque(maxlen=num_padding_frames)
  60. max_seg_frame = int(max_frame_duration_ms / frame_duration_ms)
  61. triggered = False
  62. voiced_frames = []
  63. for frame in frames:
  64. if not triggered:
  65. ring_buffer.append(frame)
  66. num_voiced = len([f for f in ring_buffer
  67. if vad.is_speech(f.bytes, sample_rate)])
  68. if num_voiced > 0.9 * ring_buffer.maxlen:
  69. begin = ring_buffer[0].timestamp
  70. triggered = True
  71. voiced_frames.extend(ring_buffer)
  72. ring_buffer.clear()
  73. else:
  74. voiced_frames.append(frame)
  75. ring_buffer.append(frame)
  76. num_unvoiced = len([f for f in ring_buffer
  77. if not vad.is_speech(f.bytes, sample_rate)])
  78. if num_unvoiced > 0.9 * ring_buffer.maxlen or len(voiced_frames) > max_seg_frame:
  79. end = frame.timestamp + frame.duration
  80. triggered = False
  81. yield b''.join([f.bytes for f in voiced_frames]), begin, end
  82. ring_buffer.clear()
  83. voiced_frames = []
  84. if triggered:
  85. end = frame.timestamp + frame.duration
  86. if voiced_frames:
  87. yield b''.join([f.bytes for f in voiced_frames]), begin, end
  88.  
  89.  
  90. def getFilenameExt(filename):
  91. s = str(filename).split('.')
  92. return s[len(s) - 1].lower()
  93.  
  94.  
  95. def changeFilenameExt(path, newExt):
  96. path = str(path)
  97. pos = 0;
  98. for i in range(0, len(path)):
  99. if (path[i] == '.'):
  100. pos = i
  101. path = path[0:pos + 1]
  102. return path + str(newExt)
  103.  
  104.  
  105. class ffmpegProcess(threading.Thread):
  106. def __init__(self, file_path):
  107. threading.Thread.__init__(self)
  108. self.file_path = file_path
  109.  
  110. def run(self):
  111. sp = subprocess.Popen(
  112. FFMPEG_PATH + " -y -i \"%s\" -c copy -vn -acodec pcm_s16le -ar %d -ac 1 \"%s\"" %
  113. (self.file_path, SAMPLE_RATE, changeFilenameExt(self.file_path, 'wav'))
  114. )
  115. self.stdout, self.stderr = sp.communicate()
  116.  
  117.  
  118. def extractAudio(file_path):
  119. ext = getFilenameExt(file_path)
  120. if ext in AV_FORMAT:
  121. pFFmepg = ffmpegProcess(file_path)
  122. pFFmepg.start()
  123. pFFmepg.join()
  124. else:
  125. print('Not support file')
  126. sys.exit(1)
  127.  
  128.  
  129. def recognize_mp(segments, samprate, lang, window, num_process):
  130. manager = mp.Manager()
  131. subtitles = manager.list()
  132. mpPool = mp.Pool(processes=int(num_process))
  133. progress = manager.Value('i', 0)
  134. lock = manager.Lock()
  135. total = 0
  136. window.progressbar['value'] = 0
  137. window.update()
  138. for segment, begin, end in segments:
  139. total += 1
  140. mpPool.apply_async(APIProcess.recognize,
  141. args=(segment, begin, end, samprate, lang, subtitles, progress, lock))
  142. mpPool.close()
  143. window.progressbar.config(maximum=total)
  144. while progress.value < total:
  145. window.progressbar['value'] = progress.value
  146. window.update()
  147. # print('%d/%d'%(progress.value,total))
  148. time.sleep(0.1)
  149. mpPool.join()
  150. window.progressbar['value'] = total
  151. window.update()
  152. return list(subtitles)
  153.  
  154.  
  155. class APIProcess(threading.Thread):
  156. def __init__(self, lang, samprate, maxlength, sensetive, progress, window, num_process):
  157. threading.Thread.__init__(self)
  158. self.lang = lang
  159. self.samprate = int(samprate)
  160. self.maxlength = int(maxlength)
  161. self.sensetive = int(sensetive)
  162. self.progress = progress
  163. self.window = window
  164. self.start()
  165. self.num_process = num_process
  166.  
  167. @staticmethod
  168. def recognize(segment, begin, end, samprate, lang, subtitles, progess, lock):
  169. r = sr.Recognizer()
  170. audio = sr.AudioData(segment, samprate, 4)
  171. try:
  172. subtitle = r.recognize_google(audio, language=lang)
  173. print(subtitle)
  174. subtitles.append((begin, end, subtitle))
  175. finally:
  176. lock.acquire()
  177. progess.value += 1
  178. lock.release()
  179. return
  180.  
  181. def run(self):
  182. self.window.startButton.config(state='disable')
  183. self.window.update()
  184. progress = self.progress
  185. file_list = filedialog.askopenfilenames()
  186. complete = 0
  187. for file_path in file_list:
  188. self.window.root.title('ScribeArbeit - %s' % (file_path))
  189. progress.set('Extracting audio...(%d/%d)' % (complete, len(file_list)))
  190. self.window.update()
  191. extractAudio(file_path)
  192. progress.set('Generating subtitles...(%d/%d)' % (complete, len(file_list)))
  193. self.window.update()
  194. tmp_path = changeFilenameExt(file_path, 'wav')
  195.  
  196. audio, sample_rate = read_wave(tmp_path)
  197. vad = webrtcvad.Vad(self.sensetive)
  198. frames = frame_generator(FRAME_DURATION, audio, sample_rate)
  199. frames = list(frames)
  200. segments = vad_collector(sample_rate, FRAME_DURATION, PADDING_DURATION, vad, frames, self.maxlength)
  201. subtitles = recognize_mp(segments, self.samprate, self.lang, self.window, self.num_process)
  202.  
  203. srt = io.open(changeFilenameExt(file_path, 'srt'), 'wb')
  204. subtitles.sort()
  205. index = 0
  206. for sub in subtitles:
  207. begin = sub[0]
  208. end = sub[1]
  209. srt.write(("%d\r\n" % (index)).encode(encoding='utf8'))
  210. srt.write(("%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n" % (
  211. begin / 3600, begin / 60, begin % 60, begin % 1 * 1000,
  212. end / 3600, end / 60, end % 60, end % 1 * 1000)
  213. ).encode(encoding='utf8'))
  214. srt.write((sub[2] + '\r\n\r\n').encode(encoding='utf8'))
  215. index += 1
  216. srt.close()
  217. os.remove(tmp_path)
  218. complete += 1
  219. progress.set('Complete!(%d/%d)' % (complete, len(file_list)))
  220. self.window.root.title('ScribeArbeit - %s' % ('All complete!'))
  221. self.window.startButton.config(state='normal')
  222. self.window.update()
  223. self.window.update()
  224.  
  225.  
  226. class MainWindow(tk.Frame):
  227. def __init__(self, root=None, daemon=None):
  228. tk.Frame.__init__(self, root)
  229. self.root = root
  230. self.labelLang = ttk.Label(root, text='Language : ')
  231. self.labelLang.grid(column=0, row=0)
  232. self.comboLang = ttk.Combobox(root)
  233. self.comboLang['values'] = ('zh-TW', 'en-US', 'ja-JP')
  234. self.comboLang.current(0)
  235. self.comboLang.grid(column=1, row=0)
  236. self.labelSample = ttk.Label(root, text='Sampling Rate : ')
  237. self.labelSample.grid(column=0, row=1)
  238. self.comboSample = ttk.Combobox(root)
  239. self.comboSample['values'] = (8000, 16000, 32000)
  240. self.comboSample.current(1)
  241. self.comboSample.grid(column=1, row=1)
  242. self.labelMaxLen = ttk.Label(root, text='Max Segment Length(ms) : ')
  243. self.labelMaxLen.grid(column=0, row=2)
  244. self.entryMaxLen = ttk.Entry(root)
  245. self.entryMaxLen.insert(0, '5000')
  246. self.entryMaxLen.grid(column=1, row=2)
  247. self.labelSense = ttk.Label(root, text='Sensitive')
  248. self.labelSense.grid(column=0, row=3)
  249. self.comboSense = ttk.Combobox(root)
  250. self.comboSense['values'] = (0, 1, 2, 3)
  251. self.comboSense.grid(column=1, row=3)
  252. self.comboSense.current(1)
  253. self.labelProcess = ttk.Label(root, text='Processes')
  254. self.labelProcess.grid(column=0, row=4)
  255. self.comboProcess = ttk.Combobox(root)
  256. self.comboProcess['values'] = tuple([i + 1 for i in range(32)])
  257. self.comboProcess.grid(column=1, row=4)
  258. self.comboProcess.current(7)
  259. self.stringVar = tk.StringVar()
  260. self.labelProgress = ttk.Label(root, textvariable=self.stringVar)
  261. self.labelProgress.grid(column=0, row=5)
  262. self.progressbar = ttk.Progressbar(root, length=400, maximum=100, mode='determinate')
  263. self.progressbar.grid(column=0, row=6, columnspan=2)
  264. self.startButton = ttk.Button(root, text='start', command=lambda:
  265. APIProcess(self.comboLang.get(), self.comboSample.get(), self.entryMaxLen.get(), self.comboSense.get(),
  266. self.stringVar, self, self.comboProcess.get()))
  267. self.startButton.grid(column=1, row=5)
  268.  
  269.  
  270. class Daemon:
  271. def __init__(self):
  272. self.root = tk.Tk()
  273. self.root.title('ScribeArbeit')
  274. self.mainWindow = MainWindow(self.root, self)
  275. self.mainWindow.mainloop()
  276.  
  277.  
  278. if __name__ == '__main__':
  279. mp.freeze_support()
  280. daemon = Daemon()
Add Comment
Please, Sign In to add comment