Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import edge_tts
- import asyncio
- import librosa
- import soundfile
- import io
- import sounddevice as sd
- import argparse
- from inference.infer_tool import Svc
- def text2tts(text, voice, output_file):
- asyncio.run(edge_tts.Communicate(text, voice).save(output_file))
- audio, sr = librosa.load(output_file, sr=16000, mono=True)
- raw_path = io.BytesIO()
- soundfile.write(raw_path, audio, 16000, format="wav")
- raw_path.seek(0)
- return raw_path
- def tts2herta(raw_path, model):
- out_audio, out_sr = model.infer('speaker0', 0, raw_path, auto_predict_f0=True)
- outsound = out_audio.cpu().numpy()
- return outsound
- def main(text, output_file):
- voice = "ru-RU-SvetlanaNeural"
- model = Svc("Herta-Svc/G_10000.pth", "Herta-Svc/config.json", device='cpu')
- raw_path = text2tts(text, voice, output_file)
- out = tts2herta(raw_path, model)
- soundfile.write('out_audio.wav', out, 44100)
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Text-to-Speech using Herta-Svc")
- parser.add_argument("-text", type=str, required=True, help="The text to convert to speech")
- parser.add_argument("-file_name", type=str, default="output.wav", help="The output file name")
- args = parser.parse_args()
- main(args.text, args.file_name)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement