Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import edge_tts
- import asyncio
- import librosa
- import soundfile
- import io
- import sounddevice as sd
- from inference.infer_tool import Svc
- TEXT = "こんにちは"
- VOICE = "ja-JP-NanamiNeural"
- OUTPUT_FILE = "test.mp3"
- def text2tts(text):
- asyncio.run(edge_tts.Communicate(text, VOICE).save(OUTPUT_FILE))
- audio, sr = librosa.load(OUTPUT_FILE, sr=16000, mono=True)
- raw_path = io.BytesIO()
- soundfile.write(raw_path, audio, 16000, format="wav")
- raw_path.seek(0)
- return raw_path
- def tts2herta(raw_path):
- out_audio, out_sr = model.infer('speaker0', 0, raw_path,
- auto_predict_f0 = True,
- )
- outsound = out_audio.cpu().numpy()
- return outsound
- model = Svc(fr"Herta-Svc/G_10000.pth", f"Herta-Svc/config.json", device = 'cuda')
- while(1):
- prompt = input("TTS text (100 words limitation): ")
- out = tts2herta(text2tts(prompt))
- soundfile.write('out_audio.wav', out, 44100)
- sd.play(out, 44100)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement