import subprocess import sys from pathlib import Path APP_ROOT = Path(__file__).resolve().parent if str(APP_ROOT) not in sys.path: sys.path.insert(0, str(APP_ROOT)) import gradio as gr import gradio_client.utils as grc_utils import torch from TTS.api import TTS try: from TTS.tts.utils.monotonic_align import core as _monotonic_align_core # noqa: F401 except ImportError: build_cmd = [sys.executable, "setup.py", "build_ext", "--inplace"] subprocess.run(build_cmd, cwd=APP_ROOT, check=True) try: from TTS.tts.utils.monotonic_align import core as _monotonic_align_core # noqa: F401 except ImportError as exc: raise RuntimeError( "Failed to build monotonic_align extension; ensure build dependencies are installed." ) from exc # Patch Gradio schema helpers to guard against boolean schemas until upstream fix lands. _ORIG_GET_TYPE = getattr(grc_utils, "get_type", None) _ORIG_JSON_TO_PY = getattr(grc_utils, "_json_schema_to_python_type", None) def _safe_get_type(schema): # pragma: no cover - runtime patching if isinstance(schema, bool): return "Any" if schema else "Never" if _ORIG_GET_TYPE is None: raise AttributeError("gradio_client.utils.get_type is unavailable") return _ORIG_GET_TYPE(schema) def _safe_json_schema_to_python_type(schema, defs=None): # pragma: no cover if isinstance(schema, bool): return "Any" if schema else "Never" if _ORIG_JSON_TO_PY is None: raise AttributeError("gradio_client.utils._json_schema_to_python_type is unavailable") return _ORIG_JSON_TO_PY(schema, defs) if _ORIG_GET_TYPE is not None: grc_utils.get_type = _safe_get_type if _ORIG_JSON_TO_PY is not None: grc_utils._json_schema_to_python_type = _safe_json_schema_to_python_type # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Init TTS tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) def voice_clone(text: str, speaker_wav: str, language: str): # Run TTS print("Speaker wav:", speaker_wav) tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.wav") return "output.wav" iface = gr.Interface( fn=voice_clone, theme="Nymbo/Nymbo_Theme", inputs=[ gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"), gr.Audio(type="filepath", label="Upload audio file"), gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"), ], outputs=gr.Audio(type="filepath", label="Generated audio file"), title="Voice Cloning", allow_flagging="never", ) iface.launch()