Advertisement
Guest User

Untitled

a guest
Sep 18th, 2024
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.87 KB | None | 0 0
  1. ## process wav, split to 8 min parts and produce output.txt
  2. import speech_recognition as sr
  3. from pydub import AudioSegment
  4. import math
  5. import datetime
  6.  
  7. def format_timestamp(milliseconds):
  8. seconds = milliseconds / 1000
  9. return str(datetime.timedelta(seconds=seconds)).split('.')[0]
  10.  
  11. def split_and_recognize(wav_file, chunk_length_ms=480000): # 480000 ms = 8 minutes 0 seconds
  12. audio = AudioSegment.from_wav(wav_file)
  13. total_duration = len(audio)
  14. chunks = math.ceil(total_duration / chunk_length_ms)
  15.  
  16. recognizer = sr.Recognizer()
  17. full_text = []
  18.  
  19. for i in range(chunks):
  20. start_time = i * chunk_length_ms
  21. end_time = min((i + 1) * chunk_length_ms, total_duration)
  22.  
  23. chunk = audio[start_time:end_time]
  24. chunk_file = f"temp_chunk_{i}.wav"
  25. chunk.export(chunk_file, format="wav")
  26.  
  27. with sr.AudioFile(chunk_file) as source:
  28. audio_data = recognizer.record(source)
  29.  
  30. try:
  31. print(f"Recognizing chunk {i + 1} of {chunks}...")
  32. text = recognizer.recognize_google(audio_data, language="sl-SI")
  33. timestamp = format_timestamp(start_time)
  34. full_text.append(f"[{timestamp}] {text}")
  35. except sr.UnknownValueError:
  36. print(f"Chunk {i + 1}: Speech recognition could not understand the audio")
  37. except sr.RequestError as e:
  38. print(f"Chunk {i + 1}: Could not request results; {e}")
  39.  
  40. # Clean up temporary file
  41. import os
  42. os.remove(chunk_file)
  43.  
  44. return "\n\n".join(full_text) # Join with double newline for paragraph separation
  45.  
  46. # Usage
  47. wav_file = "r:\\sandi.wav"
  48. result = split_and_recognize(wav_file)
  49.  
  50. # Write result to file
  51. with open("r:\\output.txt", "w", encoding="utf-8") as output_file:
  52. output_file.write(result)
  53.  
  54. print("Transcription complete. Text written to r:\\output.txt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement