SHARE
TWEET

Untitled

a guest Jun 16th, 2019 61 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. DEFAULT_VOLUME = 150
  5. DEFAULT_AGGRESSIVENESS = 2
  6. DEFAULT_MODEL_DIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
  7. DEFAULT_ACOUSTIC_SCALE = 1.0
  8. DEFAULT_BEAM = 7.0
  9. DEFAULT_FRAME_SUBSAMPLING_FACTOR = 3
  10.  
  11. # listening config
  12. CONFIG = {
  13.  
  14.     # need to match transcription
  15.     "commands": {
  16.         # simple commands
  17.         "hello": {
  18.             "transcriptions": ["hello"],
  19.             "sound": None,
  20.             "intent": "greeting",
  21.             "active": True
  22.         },
  23.         "thank you": {
  24.             "transcriptions": ["thank you"],
  25.             "sound": None,
  26.             "intent": "thank",
  27.             "active": True
  28.         },
  29.         "lights on": {
  30.             "transcriptions": ["lights on"],
  31.             "sound": None,
  32.             "intent": "turn on lights",
  33.             "active": True
  34.         },
  35.         "lights off": {
  36.             "transcriptions": ["lights off"],
  37.             "sound": None,
  38.             "intent": "turn off lights",
  39.             "active": True
  40.         },
  41.         # full sentences
  42.         # CRITERIA
  43.         # - fairly accurate,
  44.         # - important enough to want offline functionality
  45.         # - worth answering even if speech not directed at device
  46.         "time": {
  47.             "transcriptions": ["what time is it"],
  48.             "sound": None,
  49.             "intent": "what time is it",
  50.             "active": True
  51.         },
  52.         "weather": {
  53.             "transcriptions": ["what's the weather like",
  54.                                "what's the weather life"],
  55.             "sound": None,
  56.             "intent": "turn off lights",
  57.             "active": True
  58.         }
  59.  
  60.     },
  61.  
  62.     # need to be present in transcription
  63.     "hotwords": {
  64.  
  65.         # wake words
  66.         # PROTIP: just run the live demo and see which transcriptions come up
  67.         "christopher": {
  68.             "transcriptions": ["christopher"],
  69.             "sound": None,
  70.             "intent": "listen",
  71.             "active": True
  72.         },
  73.         "hey marty": {
  74.             "transcriptions": ["hey marty"],
  75.             "sound": None,
  76.             "intent": "listen",
  77.             "active": True
  78.         },
  79.         "hey mycroft": {
  80.             # not in language model
  81.             "transcriptions": ["hey mike off", "hey microsoft",
  82.                                "hey migrants"],
  83.             "sound": None,
  84.             "intent": "listen",
  85.             "active": True
  86.         },
  87.         "hey robin": {
  88.             # seems to struggle with this one
  89.             "transcriptions": ["hey rob him", "hey rob in", "hey robin",
  90.                                "hey rob it", "hey rob"],
  91.             "sound": None,
  92.             "intent": "listen",
  93.             "active": True
  94.         },
  95.         "hey mike": {
  96.             "transcriptions": ["hey mike"],
  97.             "sound": None,
  98.             "intent": "listen",
  99.             "active": True
  100.         },
  101.         "hey joe": {
  102.             "transcriptions": ["hey joe"],
  103.             "sound": None,
  104.             "intent": "listen",
  105.             "active": True
  106.         },
  107.         "hey johnnie": {
  108.             "transcriptions": ["hey johnnie"],
  109.             "sound": None,
  110.             "intent": "listen",
  111.             "active": True
  112.         },
  113.         "hey jonathan": {
  114.             "transcriptions": ["hey jonathan"],
  115.             "sound": None,
  116.             "intent": "listen",
  117.             "active": True
  118.         },
  119.         "hey bob": {
  120.             "transcriptions": ["hey bob"],
  121.             "sound": None,
  122.             "intent": "listen",
  123.             "active": True
  124.         },
  125.         "hey lex": {
  126.             "transcriptions": ["hey lex"],
  127.             "sound": None,
  128.             "intent": "listen",
  129.             "active": True
  130.         },
  131.         "hey computer": {
  132.             "transcriptions": ["hey computer", "a computer", "they computer"],
  133.             "sound": None,
  134.             "intent": "listen",
  135.             "active": True
  136.         }
  137.     }
  138. }
  139.  
  140. import logging
  141. from nltools import misc
  142. from nltools.pulserecorder import PulseRecorder
  143. from nltools.vad import VAD, BUFFER_DURATION
  144. from nltools.asr import ASR, ASR_ENGINE_NNET3
  145. from optparse import OptionParser
  146.  
  147. from pyee import EventEmitter
  148. import json
  149.  
  150.  
  151. class LocalListener(EventEmitter):
  152.     hotwords = CONFIG["hotwords"]
  153.     commands = CONFIG["commands"]
  154.  
  155.     def initialize(self, source , volume, aggressiveness, model_dir):
  156.         self.rec = PulseRecorder(source_name=source, volume=volume)
  157.         self.vad = VAD(aggressiveness=aggressiveness)
  158.         logging.info("Loading model from %s ..." % model_dir)
  159.  
  160.         self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir,
  161.                        kaldi_beam=DEFAULT_BEAM,
  162.                        kaldi_acoustic_scale=DEFAULT_ACOUSTIC_SCALE,
  163.                        kaldi_frame_subsampling_factor=DEFAULT_FRAME_SUBSAMPLING_FACTOR)
  164.  
  165.     def _emit(self, message_type, message_data):
  166.         serialized_message = json.dumps(
  167.             {"type": message_type, "data": message_data})
  168.         logging.debug(serialized_message)
  169.         # TODO plug into mycroft message bus
  170.         self.emit(serialized_message)
  171.  
  172.     def on_transcription(self, user_utt, confidence):
  173.         for cmd in self.commands:
  174.             if not self.commands[cmd].get("active"):
  175.                 continue
  176.             for c in self.commands[cmd]["transcriptions"]:
  177.                 if c.lower().strip() == user_utt.lower().strip():
  178.                     data = self.commands[cmd]
  179.                     data["confidence"] = confidence
  180.                     data["command"] = cmd
  181.                     self._emit("command", data)
  182.                     return
  183.         for hotw in self.hotwords:
  184.             if not self.hotwords[hotw].get("active"):
  185.                 continue
  186.             for w in self.hotwords[hotw]["transcriptions"]:
  187.                 if w in user_utt:
  188.                     data = self.hotwords[hotw]
  189.                     data["confidence"] = confidence
  190.                     data["hotword"] = hotw
  191.                     self._emit("hotword", data)
  192.                     return
  193.  
  194.     def run(self):
  195.  
  196.         self.rec.start_recording()
  197.  
  198.         logging.info("Listening")
  199.  
  200.         while True:
  201.  
  202.             samples = self.rec.get_samples()
  203.  
  204.             audio, finalize = self.vad.process_audio(samples)
  205.  
  206.             if not audio:
  207.                 continue
  208.  
  209.             logging.debug('decoding audio len=%d finalize=%s audio=%s' % (
  210.                 len(audio), repr(finalize), audio[0].__class__))
  211.  
  212.             user_utt, confidence = self.asr.decode(audio, finalize,
  213.                                                    stream_id=STREAM_ID)
  214.  
  215.             if finalize:
  216.                 self.on_transcription(user_utt, confidence)
  217.  
  218.  
  219. if __name__ == "__main__":
  220.     PROC_TITLE = 'kaldi_live'
  221.  
  222.     STREAM_ID = 'mic'
  223.  
  224.     misc.init_app(PROC_TITLE)
  225.  
  226.     parser = OptionParser("usage: %prog [options]")
  227.  
  228.     parser.add_option("-a", "--aggressiveness", dest="aggressiveness", type="int",
  229.                       default=DEFAULT_AGGRESSIVENESS,
  230.                       help="VAD aggressiveness, default: %d" % DEFAULT_AGGRESSIVENESS)
  231.  
  232.     parser.add_option("-m", "--model-dir", dest="model_dir", type="string",
  233.                       default=DEFAULT_MODEL_DIR,
  234.                       help="kaldi model directory, default: %s" % DEFAULT_MODEL_DIR)
  235.  
  236.     parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
  237.                       help="verbose output")
  238.  
  239.     parser.add_option("-s", "--source", dest="source", type="string", default=None,
  240.                       help="pulseaudio source, default: auto-detect mic")
  241.  
  242.     parser.add_option("-V", "--volume", dest="volume", type="int",
  243.                       default=DEFAULT_VOLUME,
  244.                       help="broker port, default: %d" % DEFAULT_VOLUME)
  245.  
  246.     (options, args) = parser.parse_args()
  247.  
  248.     if options.verbose:
  249.         logging.basicConfig(level=logging.DEBUG)
  250.     else:
  251.         logging.basicConfig(level=logging.INFO)
  252.  
  253.     source = options.source
  254.     volume = options.volume
  255.     aggressiveness = options.aggressiveness
  256.     model_dir = options.model_dir
  257.  
  258.     listener = LocalListener()
  259.     listener.initialize(source, volume, aggressiveness, model_dir)
  260.     listener.run()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top