File size: 5,017 Bytes
e2c61ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import json
import numpy as np
import gradio as gr
from dotenv import load_dotenv
from matplotlib import cm
 
load_dotenv()
 
# -------------------------
# 0) Env & defaults
# -------------------------
BACKEND = os.getenv("DETECTOR_BACKEND", "wav2vec2").strip().lower()  # "wav2vec2" or "cnn"
 
DEFAULT_W2V_WEIGHTS = "app/models/weights/wav2vec2_classifier.pth"
DEFAULT_CNN_WEIGHTS = "app/models/weights/cnn_melspec.pth"
DEFAULT_WEIGHTS = DEFAULT_W2V_WEIGHTS if BACKEND == "wav2vec2" else DEFAULT_CNN_WEIGHTS
MODEL_WEIGHTS_PATH = os.getenv("MODEL_WEIGHTS_PATH", DEFAULT_WEIGHTS).strip()
 
 
# -------------------------
# 1) Import your Detector
# -------------------------
def _import_detector(backend):
    """

    Import the correct Detector class depending on backend and package layout.

    Works both when run as a module ('.inference_*') and as a script ('app.inference_*').

    """
    try:
        if backend == "wav2vec2":
            from .inference_wav2vec import Detector  # type: ignore
        else:
            from .inference import Detector  # type: ignore
    except Exception:
        if backend == "wav2vec2":
            from app.inference_wav2vec import Detector  # type: ignore
        else:
            from app.inference import Detector  # type: ignore
    return Detector
 
 
try:
    Detector = _import_detector(BACKEND)
except Exception as e:
    # Fallback dummy to keep the UI alive even if import fails,
    # so you can see the error in the JSON panel.
    class Detector:  # type: ignore
        def __init__(self, *args, **kwargs):
            self._err = f"Detector import failed: {e}"
 
        def predict_proba(self, *args, **kwargs):
            return {"error": self._err}
 
        def explain(self, *args, **kwargs):
            return {"cam": np.zeros((128, 128), dtype=np.float32).tolist()}
 
 
# Single, shared detector (created lazily so startup is fast on Spaces)
_DET = None
def _get_detector():
    global _DET
    if _DET is None:
        _DET = Detector(weights_path=MODEL_WEIGHTS_PATH)
    return _DET
 
 
# -------------------------
# 2) Core functions
# -------------------------
def predict_and_explain(audio_path: str | None, source_hint: str):
    """

    audio_path: filepath from Gradio (since type='filepath')

    source_hint: "Auto", "Microphone", "Upload"

    """
    source = (source_hint or "Auto").strip().lower()
 
    if not audio_path or not os.path.exists(audio_path):
        return {"error": "No audio received. Record or upload a 2–4s clip."}, None
 
    det = _get_detector()
 
    # Your Detector is expected to accept a file path and optional source hint
    proba = det.predict_proba(audio_path, source_hint=source)
    exp = det.explain(audio_path, source_hint=source)
 
    # Explanation to heatmap (float [0,1] -> magma RGB uint8)
    cam = np.array(exp.get("cam", []), dtype=np.float32)
    if cam.ndim == 1:
        # if model returned a 1D vector, tile to square-ish map
        side = int(np.sqrt(cam.size))
        side = max(side, 2)
        cam = cam[: side * side].reshape(side, side)
    cam = np.clip(cam, 0.0, 1.0)
    cam_rgb = (cm.magma(cam)[..., :3] * 255).astype(np.uint8)
 
    # Ensure proba is JSON-serializable
    if not isinstance(proba, dict):
        proba = {"result": proba}
 
    return proba, cam_rgb
 
 
def provenance(audio_path: str | None):
    # Stub (you can wire a provenance model or checksum here)
    return {"ok": True, "note": "Provenance check not wired in this app.py."}
 
 
# -------------------------
# 3) UI
# -------------------------
with gr.Blocks(title=f"AI Voice Detector · {BACKEND.upper()}") as demo:
    gr.Markdown(f"# 🔎 AI Voice Detector — Backend: **{BACKEND.upper()}**")
    gr.Markdown(
        "Record or upload a short clip (~3s). Get probabilities, a label, and an explanation heatmap."
    )
 
    with gr.Row():
        audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
        with gr.Column():
            src = gr.Radio(choices=["Auto", "Microphone", "Upload"], value="Auto", label="Source")
            btn_predict = gr.Button("Analyze", variant="primary")
            btn_prov = gr.Button("Provenance Check (optional)")
 
    with gr.Row():
        json_out = gr.JSON(label="Prediction (probabilities + label)")
        cam_out = gr.Image(label="Explanation Heatmap (saliency)")
    prov_out = gr.JSON(label="Provenance Result (if available)")
 
    btn_predict.click(predict_and_explain, inputs=[audio_in, src], outputs=[json_out, cam_out])
    btn_prov.click(provenance, inputs=audio_in, outputs=prov_out)
 
 
# -------------------------
# 4) Launch (Spaces-friendly)
# -------------------------
if __name__ == "__main__":
    # queue() keeps UI responsive under load; host/port are Spaces-safe and local-friendly
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)