Spaces:
Running
Running
| # It is helpful if you want to use it in a voice assistant project. | |
| # Know more about {your gradio app url}/?view=api. Example: http://127.0.0.1:7860/?view=api | |
| import shutil | |
| import os | |
| from gradio_client import Client | |
| # Ensure the output directory exists | |
| output_dir = "temp_audio" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Initialize the Gradio client | |
| api_url = "http://127.0.0.1:7860/" | |
| client = Client(api_url) | |
| def text_to_speech( | |
| text="Hello!!", | |
| model_name="kokoro-v0_19.pth", | |
| voice_name="af_bella", | |
| speed=1, | |
| trim=0, | |
| pad_between_segments=0, | |
| remove_silence=False, | |
| minimum_silence=0.05, | |
| ): | |
| """ | |
| Generates speech from text using a specified model and saves the audio file. | |
| Parameters: | |
| text (str): The text to convert to speech. | |
| model_name (str): The name of the model to use for synthesis. | |
| voice_name (str): The name of the voice to use. | |
| speed (float): The speed of speech. | |
| trim (int): Whether to trim silence at the beginning and end. | |
| pad_between_segments (int): Padding between audio segments. | |
| remove_silence (bool): Whether to remove silence from the audio. | |
| minimum_silence (float): Minimum silence duration to consider. | |
| Returns: | |
| str: Path to the saved audio file. | |
| """ | |
| # Call the API with provided parameters | |
| result = client.predict( | |
| text=text, | |
| model_name=model_name, | |
| voice_name=voice_name, | |
| speed=speed, | |
| trim=trim, | |
| pad_between_segments=pad_between_segments, | |
| remove_silence=remove_silence, | |
| minimum_silence=minimum_silence, | |
| api_name="/text_to_speech" | |
| ) | |
| # Save the audio file in the specified directory | |
| save_at = f"{output_dir}/{os.path.basename(result)}" | |
| shutil.move(result, save_at) | |
| print(f"Saved at {save_at}") | |
| return save_at | |
| # Example usage | |
| if __name__ == "__main__": | |
| text="This is Kokoro TTS. I am a text-to-speech model and Super Fast." | |
| model_name="kokoro-v0_19.pth" #kokoro-v0_19-half.pth | |
| voice_name="af_bella" #get voice names | |
| speed=1 | |
| only_trim_both_ends_silence=0 | |
| add_silence_between_segments=0 #it use in large text | |
| remove_silence=False | |
| keep_silence_upto=0.05 #in seconds | |
| audio_path = text_to_speech(text=text, model_name=model_name, | |
| voice_name=voice_name, speed=speed, | |
| trim=only_trim_both_ends_silence, | |
| pad_between_segments=add_silence_between_segments, | |
| remove_silence=remove_silence, | |
| minimum_silence=keep_silence_upto) | |
| print(f"Audio file saved at: {audio_path}") | |