Advertisement
voidmesmer

whisperx mode

Apr 23rd, 2024 (edited)
744
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.84 KB | None | 0 0
  1. import whisperx as wx
  2. from pydub import AudioSegment
  3. import os
  4. from dotenv import load_dotenv
  5. from config import AUDIO_FILE_DIR
  6.  
  7. # Load .env file if present
  8. load_dotenv()
  9. device = "cuda"
  10. batch_size = 16
  11. compute_type = "int8"
  12. model_dir = "C:\\test"
  13. language = "en"
  14. model = wx.load_model("tiny", device, language=language, compute_type=compute_type, download_root=model_dir)
  15.  
  16. def transcribe_audio(file_path):
  17. try:
  18. audio = AudioSegment.from_file(f"{AUDIO_FILE_DIR}/{file_path}")
  19. chunk_size = 10 * 60 * 1000
  20. num_chunks = len(audio) // chunk_size + (1 if len(audio) % chunk_size else 0)
  21. transcript = ""
  22. file_size = os.path.getsize(f"{AUDIO_FILE_DIR}/{file_path}")
  23.  
  24. if file_size <= 24 * 1024 * 1024:
  25. result = model.transcribe(f"{AUDIO_FILE_DIR}/{file_path}", batch_size=batch_size)
  26. for segment in result['segments']:
  27. transcript += segment['text'] + " "
  28. else:
  29. for i in range(num_chunks):
  30. temp_chunk_path = f"{AUDIO_FILE_DIR}/temp_chunk.mp3"
  31. chunk = audio[i*chunk_size:(i+1)*chunk_size]
  32. with open(temp_chunk_path, 'wb') as f:
  33. chunk.export(f, format="mp3")
  34. try:
  35. result = model.transcribe(temp_chunk_path, batch_size=batch_size)
  36. for segment in result['segments']:
  37. transcript += segment['text'] + " "
  38. finally:
  39. os.remove(temp_chunk_path)
  40.  
  41. os.remove(f"{AUDIO_FILE_DIR}/{file_path}")
  42.  
  43. return transcript
  44.  
  45. except FileNotFoundError as e:
  46. raise FileNotFoundError(f"The audio file {file_path} was not found.") from e
  47. except Exception as e:
  48. raise Exception(f"An error occurred during the transcription process: {e}") from e
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement