Advertisement
Guest User

Untitled

a guest
Jul 30th, 2023
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. import edge_tts
  2. import asyncio
  3. import librosa
  4. import soundfile
  5. import io
  6. import sounddevice as sd
  7.  
  8. from inference.infer_tool import Svc
  9.  
  10. TEXT = "こんにちは"
  11. VOICE = "ja-JP-NanamiNeural"
  12. OUTPUT_FILE = "test.mp3"
  13.  
  14.  
  15. def text2tts(text):
  16.     asyncio.run(edge_tts.Communicate(text, VOICE).save(OUTPUT_FILE))
  17.     audio, sr = librosa.load(OUTPUT_FILE, sr=16000, mono=True)
  18.     raw_path = io.BytesIO()
  19.     soundfile.write(raw_path, audio, 16000, format="wav")
  20.     raw_path.seek(0)
  21.     return raw_path
  22.  
  23.  
  24. def tts2herta(raw_path):
  25.     out_audio, out_sr = model.infer('speaker0', 0, raw_path,
  26.                                         auto_predict_f0 = True,
  27.                                         )
  28.     outsound = out_audio.cpu().numpy()
  29.     return outsound
  30.  
  31. model = Svc(fr"Herta-Svc/G_10000.pth", f"Herta-Svc/config.json", device = 'cuda')
  32.  
  33. while(1):
  34.     prompt = input("TTS text (100 words limitation): ")
  35.     out = tts2herta(text2tts(prompt))
  36.     soundfile.write('out_audio.wav', out, 44100)
  37.     sd.play(out, 44100)
  38.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement