import subprocess
import sys
from pathlib import Path

APP_ROOT = Path(__file__).resolve().parent
if str(APP_ROOT) not in sys.path:
    sys.path.insert(0, str(APP_ROOT))

import gradio as gr
import gradio_client.utils as grc_utils
import torch
from TTS.api import TTS

try:
    from TTS.tts.utils.monotonic_align import core as _monotonic_align_core  # noqa: F401
except ImportError:
    build_cmd = [sys.executable, "setup.py", "build_ext", "--inplace"]
    subprocess.run(build_cmd, cwd=APP_ROOT, check=True)
    try:
        from TTS.tts.utils.monotonic_align import core as _monotonic_align_core  # noqa: F401
    except ImportError as exc:
        raise RuntimeError(
            "Failed to build monotonic_align extension; ensure build dependencies are installed."
        ) from exc

# Patch Gradio schema helpers to guard against boolean schemas until upstream fix lands.
_ORIG_GET_TYPE = getattr(grc_utils, "get_type", None)
_ORIG_JSON_TO_PY = getattr(grc_utils, "_json_schema_to_python_type", None)

def _safe_get_type(schema):  # pragma: no cover - runtime patching
    if isinstance(schema, bool):
        return "Any" if schema else "Never"
    if _ORIG_GET_TYPE is None:
        raise AttributeError("gradio_client.utils.get_type is unavailable")
    return _ORIG_GET_TYPE(schema)


def _safe_json_schema_to_python_type(schema, defs=None):  # pragma: no cover
    if isinstance(schema, bool):
        return "Any" if schema else "Never"
    if _ORIG_JSON_TO_PY is None:
        raise AttributeError("gradio_client.utils._json_schema_to_python_type is unavailable")
    return _ORIG_JSON_TO_PY(schema, defs)


if _ORIG_GET_TYPE is not None:
    grc_utils.get_type = _safe_get_type
if _ORIG_JSON_TO_PY is not None:
    grc_utils._json_schema_to_python_type = _safe_json_schema_to_python_type

# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Init TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

def voice_clone(text: str, speaker_wav: str, language: str):
    # Run TTS
    print("Speaker wav:", speaker_wav)
    tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.wav")
    return "output.wav"

iface = gr.Interface(
    fn=voice_clone,
    theme="Nymbo/Nymbo_Theme",
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
        gr.Audio(type="filepath", label="Upload audio file"),
        gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
    ],
    outputs=gr.Audio(type="filepath", label="Generated audio file"),
    title="Voice Cloning",
    allow_flagging="never",
)

iface.launch()