Advertisement
Guest User

Untitled

a guest
Jun 16th, 2019
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.95 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. DEFAULT_VOLUME = 150
  5. DEFAULT_AGGRESSIVENESS = 2
  6. DEFAULT_MODEL_DIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
  7. DEFAULT_ACOUSTIC_SCALE = 1.0
  8. DEFAULT_BEAM = 7.0
  9. DEFAULT_FRAME_SUBSAMPLING_FACTOR = 3
  10.  
  11. # listening config
  12. CONFIG = {
  13.  
  14. # need to match transcription
  15. "commands": {
  16. # simple commands
  17. "hello": {
  18. "transcriptions": ["hello"],
  19. "sound": None,
  20. "intent": "greeting",
  21. "active": True
  22. },
  23. "thank you": {
  24. "transcriptions": ["thank you"],
  25. "sound": None,
  26. "intent": "thank",
  27. "active": True
  28. },
  29. "lights on": {
  30. "transcriptions": ["lights on"],
  31. "sound": None,
  32. "intent": "turn on lights",
  33. "active": True
  34. },
  35. "lights off": {
  36. "transcriptions": ["lights off"],
  37. "sound": None,
  38. "intent": "turn off lights",
  39. "active": True
  40. },
  41. # full sentences
  42. # CRITERIA
  43. # - fairly accurate,
  44. # - important enough to want offline functionality
  45. # - worth answering even if speech not directed at device
  46. "time": {
  47. "transcriptions": ["what time is it"],
  48. "sound": None,
  49. "intent": "what time is it",
  50. "active": True
  51. },
  52. "weather": {
  53. "transcriptions": ["what's the weather like",
  54. "what's the weather life"],
  55. "sound": None,
  56. "intent": "turn off lights",
  57. "active": True
  58. }
  59.  
  60. },
  61.  
  62. # need to be present in transcription
  63. "hotwords": {
  64.  
  65. # wake words
  66. # PROTIP: just run the live demo and see which transcriptions come up
  67. "christopher": {
  68. "transcriptions": ["christopher"],
  69. "sound": None,
  70. "intent": "listen",
  71. "active": True
  72. },
  73. "hey marty": {
  74. "transcriptions": ["hey marty"],
  75. "sound": None,
  76. "intent": "listen",
  77. "active": True
  78. },
  79. "hey mycroft": {
  80. # not in language model
  81. "transcriptions": ["hey mike off", "hey microsoft",
  82. "hey migrants"],
  83. "sound": None,
  84. "intent": "listen",
  85. "active": True
  86. },
  87. "hey robin": {
  88. # seems to struggle with this one
  89. "transcriptions": ["hey rob him", "hey rob in", "hey robin",
  90. "hey rob it", "hey rob"],
  91. "sound": None,
  92. "intent": "listen",
  93. "active": True
  94. },
  95. "hey mike": {
  96. "transcriptions": ["hey mike"],
  97. "sound": None,
  98. "intent": "listen",
  99. "active": True
  100. },
  101. "hey joe": {
  102. "transcriptions": ["hey joe"],
  103. "sound": None,
  104. "intent": "listen",
  105. "active": True
  106. },
  107. "hey johnnie": {
  108. "transcriptions": ["hey johnnie"],
  109. "sound": None,
  110. "intent": "listen",
  111. "active": True
  112. },
  113. "hey jonathan": {
  114. "transcriptions": ["hey jonathan"],
  115. "sound": None,
  116. "intent": "listen",
  117. "active": True
  118. },
  119. "hey bob": {
  120. "transcriptions": ["hey bob"],
  121. "sound": None,
  122. "intent": "listen",
  123. "active": True
  124. },
  125. "hey lex": {
  126. "transcriptions": ["hey lex"],
  127. "sound": None,
  128. "intent": "listen",
  129. "active": True
  130. },
  131. "hey computer": {
  132. "transcriptions": ["hey computer", "a computer", "they computer"],
  133. "sound": None,
  134. "intent": "listen",
  135. "active": True
  136. }
  137. }
  138. }
  139.  
  140. import logging
  141. from nltools import misc
  142. from nltools.pulserecorder import PulseRecorder
  143. from nltools.vad import VAD, BUFFER_DURATION
  144. from nltools.asr import ASR, ASR_ENGINE_NNET3
  145. from optparse import OptionParser
  146.  
  147. from pyee import EventEmitter
  148. import json
  149.  
  150.  
  151. class LocalListener(EventEmitter):
  152. hotwords = CONFIG["hotwords"]
  153. commands = CONFIG["commands"]
  154.  
  155. def initialize(self, source , volume, aggressiveness, model_dir):
  156. self.rec = PulseRecorder(source_name=source, volume=volume)
  157. self.vad = VAD(aggressiveness=aggressiveness)
  158. logging.info("Loading model from %s ..." % model_dir)
  159.  
  160. self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir,
  161. kaldi_beam=DEFAULT_BEAM,
  162. kaldi_acoustic_scale=DEFAULT_ACOUSTIC_SCALE,
  163. kaldi_frame_subsampling_factor=DEFAULT_FRAME_SUBSAMPLING_FACTOR)
  164.  
  165. def _emit(self, message_type, message_data):
  166. serialized_message = json.dumps(
  167. {"type": message_type, "data": message_data})
  168. logging.debug(serialized_message)
  169. # TODO plug into mycroft message bus
  170. self.emit(serialized_message)
  171.  
  172. def on_transcription(self, user_utt, confidence):
  173. for cmd in self.commands:
  174. if not self.commands[cmd].get("active"):
  175. continue
  176. for c in self.commands[cmd]["transcriptions"]:
  177. if c.lower().strip() == user_utt.lower().strip():
  178. data = self.commands[cmd]
  179. data["confidence"] = confidence
  180. data["command"] = cmd
  181. self._emit("command", data)
  182. return
  183. for hotw in self.hotwords:
  184. if not self.hotwords[hotw].get("active"):
  185. continue
  186. for w in self.hotwords[hotw]["transcriptions"]:
  187. if w in user_utt:
  188. data = self.hotwords[hotw]
  189. data["confidence"] = confidence
  190. data["hotword"] = hotw
  191. self._emit("hotword", data)
  192. return
  193.  
  194. def run(self):
  195.  
  196. self.rec.start_recording()
  197.  
  198. logging.info("Listening")
  199.  
  200. while True:
  201.  
  202. samples = self.rec.get_samples()
  203.  
  204. audio, finalize = self.vad.process_audio(samples)
  205.  
  206. if not audio:
  207. continue
  208.  
  209. logging.debug('decoding audio len=%d finalize=%s audio=%s' % (
  210. len(audio), repr(finalize), audio[0].__class__))
  211.  
  212. user_utt, confidence = self.asr.decode(audio, finalize,
  213. stream_id=STREAM_ID)
  214.  
  215. if finalize:
  216. self.on_transcription(user_utt, confidence)
  217.  
  218.  
  219. if __name__ == "__main__":
  220. PROC_TITLE = 'kaldi_live'
  221.  
  222. STREAM_ID = 'mic'
  223.  
  224. misc.init_app(PROC_TITLE)
  225.  
  226. parser = OptionParser("usage: %prog [options]")
  227.  
  228. parser.add_option("-a", "--aggressiveness", dest="aggressiveness", type="int",
  229. default=DEFAULT_AGGRESSIVENESS,
  230. help="VAD aggressiveness, default: %d" % DEFAULT_AGGRESSIVENESS)
  231.  
  232. parser.add_option("-m", "--model-dir", dest="model_dir", type="string",
  233. default=DEFAULT_MODEL_DIR,
  234. help="kaldi model directory, default: %s" % DEFAULT_MODEL_DIR)
  235.  
  236. parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
  237. help="verbose output")
  238.  
  239. parser.add_option("-s", "--source", dest="source", type="string", default=None,
  240. help="pulseaudio source, default: auto-detect mic")
  241.  
  242. parser.add_option("-V", "--volume", dest="volume", type="int",
  243. default=DEFAULT_VOLUME,
  244. help="broker port, default: %d" % DEFAULT_VOLUME)
  245.  
  246. (options, args) = parser.parse_args()
  247.  
  248. if options.verbose:
  249. logging.basicConfig(level=logging.DEBUG)
  250. else:
  251. logging.basicConfig(level=logging.INFO)
  252.  
  253. source = options.source
  254. volume = options.volume
  255. aggressiveness = options.aggressiveness
  256. model_dir = options.model_dir
  257.  
  258. listener = LocalListener()
  259. listener.initialize(source, volume, aggressiveness, model_dir)
  260. listener.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement