Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import torch
- import time
- import outetts
- device = torch.device("cuda:0")
- # Configure the GGUF model
- model_config = outetts.GGUFModelConfig_v1(
- model_path="OuteTTS-0.2-500M-Q6_K.gguf",
- language="en", # Supported languages in v0.2: en, zh, ja, ko
- n_gpu_layers=24,
- device=device
- )
- interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config) # Initialize the GGUF interface
- speaker = interface.load_default_speaker(name="male_1") # Load a default speaker profile
- def calculate_audio_duration(output):
- audio_tensor = output.audio
- sample_rate = output.sr
- num_samples = audio_tensor.size(1)
- print(f"Samples: {num_samples}")
- print(f"Sample Rate: {sample_rate}")
- return num_samples / sample_rate
- # Function to perform inference and time it
- def synthesize(text):
- start_time = time.time() # Record start time
- output = interface.generate(
- text=text,
- temperature=0.1,
- repetition_penalty=1.1,
- max_length=4096,
- speaker=speaker,
- )
- end_time = time.time() # Record end time
- inference_time = end_time - start_time # Calculate duration
- audio_duration = calculate_audio_duration(output)
- print(f"Inference time for '{text}': {audio_duration:.4f} seconds audio in {inference_time:.4f} seconds")
- return output
- # Text to synthesize
- text_to_synthesize_1 = "Speech synthesis is the artificial production of human speech. A computer system used for this purpose is called a speech synthesizer, and it can be implemented in software or hardware products."
- text_to_synthesize_2 = "Hello my boy"
- # Perform synthesis and timing for both texts
- synthesize(text_to_synthesize_1).save("output1.wav")
Advertisement
Add Comment
Please, Sign In to add comment