Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import threading
- import soundcard as sc
- import azure.cognitiveservices.speech as speechsdk
- import queue
- import warnings
- from dotenv import load_dotenv
- import os
- # Suppress the SoundcardRuntimeWarning
- warnings.filterwarnings("ignore", category=sc.SoundcardRuntimeWarning)
- # Your Azure subscription key and region
- load_dotenv()
- audio_key = os.getenv("audio_key")
- audio_region = os.getenv("audio_region")
- # Queue for passing audio data between threads
- mic_queue = queue.Queue()
- speaker_queue = queue.Queue()
- # Function to capture microphone audio
- def capture_mic_audio(mic_queue):
- mic = sc.get_microphone(id=str(sc.default_microphone().name))
- with mic.recorder(samplerate=48000) as mic_recorder:
- while True:
- data = mic_recorder.record(numframes=1024)
- mic_queue.put(data)
- # Function to capture speaker audio
- def capture_speaker_audio(speaker_queue):
- speaker = sc.get_microphone(id=str(sc.default_speaker().name), include_loopback=True)
- with speaker.recorder(samplerate=48000) as speaker_recorder:
- while True:
- data = speaker_recorder.record(numframes=1024)
- speaker_queue.put(data)
- # Function to create an audio input stream for Azure Speech SDK
- def create_audio_input_stream(audio_queue):
- class AudioInputStream(speechsdk.audio.PullAudioInputStreamCallback):
- def __init__(self):
- super().__init__()
- def read(self, buffer):
- try:
- data = audio_queue.get(block=False)
- buffer[:len(data)] = data
- return len(data)
- except queue.Empty:
- return 0
- def close(self):
- pass
- return speechsdk.audio.AudioConfig(stream=speechsdk.audio.PullAudioInputStream(AudioInputStream()))
- # Function to start Azure speech recognition
- def start_recognition(audio_config, speech_config, name):
- transcriber = speechsdk.transcription.ConversationTranscriber(speech_config=speech_config, audio_config=audio_config)
- print("starting recognition")
- def transcribed(evt):
- print(f"{name} transcribed!")
- def transcribing(evt):
- print(f"{name} transcribed!")
- transcriber.transcribed.connect(lambda evt: transcribed(evt))
- transcriber.transcribing.connect(lambda evt: transcribing(evt))
- transcriber.start_transcribing_async()
- print("started transcirbing")
- return transcriber
- # Main function
- def main():
- speech_config = speechsdk.SpeechConfig(subscription=audio_key, region=audio_region)
- speech_config.speech_recognition_language = "en-US"
- # Start capturing audio
- threading.Thread(target=capture_mic_audio, args=(mic_queue,), daemon=True).start()
- threading.Thread(target=capture_speaker_audio, args=(speaker_queue,), daemon=True).start()
- # Create audio input streams
- mic_audio_input = create_audio_input_stream(mic_queue)
- speaker_audio_input = create_audio_input_stream(speaker_queue)
- # Start speech recognizers
- mic_transcriber= start_recognition(mic_audio_input, speech_config, "Microphone")
- speaker_transcriber = start_recognition(speaker_audio_input, speech_config, "Speaker")
- # Keep the program running to process transcriptions
- try:
- while True:
- pass
- except KeyboardInterrupt:
- mic_transcriber.stop_transcribing_async()
- speaker_transcriber.stop_transcribing_async()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement