Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## process wav, split to 8 min parts and produce output.txt
- import speech_recognition as sr
- from pydub import AudioSegment
- import math
- import datetime
- def format_timestamp(milliseconds):
- seconds = milliseconds / 1000
- return str(datetime.timedelta(seconds=seconds)).split('.')[0]
- def split_and_recognize(wav_file, chunk_length_ms=480000): # 480000 ms = 8 minutes 0 seconds
- audio = AudioSegment.from_wav(wav_file)
- total_duration = len(audio)
- chunks = math.ceil(total_duration / chunk_length_ms)
- recognizer = sr.Recognizer()
- full_text = []
- for i in range(chunks):
- start_time = i * chunk_length_ms
- end_time = min((i + 1) * chunk_length_ms, total_duration)
- chunk = audio[start_time:end_time]
- chunk_file = f"temp_chunk_{i}.wav"
- chunk.export(chunk_file, format="wav")
- with sr.AudioFile(chunk_file) as source:
- audio_data = recognizer.record(source)
- try:
- print(f"Recognizing chunk {i + 1} of {chunks}...")
- text = recognizer.recognize_google(audio_data, language="sl-SI")
- timestamp = format_timestamp(start_time)
- full_text.append(f"[{timestamp}] {text}")
- except sr.UnknownValueError:
- print(f"Chunk {i + 1}: Speech recognition could not understand the audio")
- except sr.RequestError as e:
- print(f"Chunk {i + 1}: Could not request results; {e}")
- # Clean up temporary file
- import os
- os.remove(chunk_file)
- return "\n\n".join(full_text) # Join with double newline for paragraph separation
- # Usage
- wav_file = "r:\\sandi.wav"
- result = split_and_recognize(wav_file)
- # Write result to file
- with open("r:\\output.txt", "w", encoding="utf-8") as output_file:
- output_file.write(result)
- print("Transcription complete. Text written to r:\\output.txt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement