Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Whisper + Pyannote, audio-to-text transcription with speaker diarization ##
- """
- https://github.com/pyannote/pyannote-audio/issues/1474#issuecomment-1746998271
- https://huggingface.co/pyannote/speaker-diarization-3.0
- https://huggingface.co/pyannote/speaker-diarization-3.1
- https://huggingface.co/pyannote/speaker-diarization
- https://huggingface.co/pyannote/segmentation
- """
- import whisper
- from pyannote.audio import Pipeline
- import json
- # Load the audio file
- audio_file = "r:\\output\\chunk_4.wav"
- # Transcribe with Whisper
- model = whisper.load_model("small")
- result = model.transcribe(audio_file, language="slovenian")
- # Perform diarization with Pyannote
- pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="YOUR_TOKEN_HERE")
- diarization = pipeline(audio_file, num_speakers=2)
- # Prepare output
- output = []
- # Match diarization with transcription
- for segment in result["segments"]:
- start = segment["start"]
- end = segment["end"]
- text = segment["text"]
- # Find the speaker for this segment
- speaker = None
- for turn, _, spk in diarization.itertracks(yield_label=True):
- if turn.start <= start < turn.end:
- speaker = spk
- break
- output.append({
- "start": start,
- "end": end,
- "speaker": speaker,
- "text": text
- })
- # Write results to file
- with open("r:\\transcription_with_speakers.txt", "w", encoding="utf-8") as f:
- for item in output:
- f.write(f"Speaker {item['speaker']}:\n")
- f.write(f"{item['text']}\n")
- f.write(f"[{item['start']:.2f} - {item['end']:.2f}]\n\n")
- # # Optionally, also save as JSON for easier parsing later
- # with open("r:\\output\\transcription_with_speakers.json", "w", encoding="utf-8") as f:
- # json.dump(output, f, ensure_ascii=False, indent=2)
- print("Transcription with speaker diarization completed and saved to file.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement