Spaces:

MahmoudElsamadony
/

vtt-with-diariazation

Paused

App Files Files Community

Mahmoud Elsamadony commited on 16 days ago

Commit

ce8875c

1 Parent(s): cf179b4

Update GPU Usage

Browse files

Files changed (3) hide show

api_client.py +1 -1
app.py +41 -6
spaces.yml +7 -0

api_client.py CHANGED Viewed

@@ -125,4 +125,4 @@ if __name__ == "__main__":
     # Install gradio_client first:
     # pip install gradio_client
-    main()

     # Install gradio_client first:
     # pip install gradio_client
+    main()

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import tempfile
-from typing import Dict, List, Optional
 import gradio as gr
 import torch
@@ -24,8 +25,22 @@ load_dotenv()
 # Whisper model: use same model names as Django app (tiny, base, small, medium, large-v3)
 # faster-whisper will download these automatically from Hugging Face on first run
 WHISPER_MODEL_SIZE = os.environ.get("WHISPER_MODEL_SIZE", "large-v3")
-WHISPER_DEVICE = os.environ.get("WHISPER_DEVICE", "cpu")
-WHISPER_COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE_TYPE", "int8_float32")
 # Diarization: NVIDIA NeMo Sortformer model
 DIARIZATION_MODEL_NAME = os.environ.get(
@@ -57,7 +72,7 @@ expected_speakers_default = int(os.environ.get("EXPECTED_SPEAKERS", 2))
 # Lazy singletons for the heavy models
 # ---------------------------------------------------------------------------
 _whisper_model: Optional[WhisperModel] = None
-_diarization_model: Optional[SortformerEncLabelModel] = None
 def _ensure_snapshot(repo_id: str, local_dir: str, allow_patterns: Optional[List[str]] = None) -> str:
@@ -92,7 +107,7 @@ def _load_whisper_model() -> WhisperModel:
     return _whisper_model
-def _load_diarization_model() -> Optional[SortformerEncLabelModel]:
     """Load NVIDIA NeMo Sortformer diarization model lazily (singleton)"""
     global _diarization_model
     if _diarization_model is None:
@@ -111,6 +126,19 @@ def _load_diarization_model() -> Optional[SortformerEncLabelModel]:
             # Switch to evaluation mode
             _diarization_model.eval()
             # Configure streaming parameters (high latency preset for better accuracy)
             # See: https://huggingface.co/nvidia/diar_streaming_sortformer_4spk-v2#setting-up-streaming-configuration
@@ -443,6 +471,10 @@ def build_interface() -> gr.Blocks:
             """
         )
         with gr.Row():
             audio_input = gr.Audio(type="filepath", label="Upload audio (mp3, wav, m4a, ...)")
             options = gr.Column()
@@ -512,10 +544,13 @@ def build_interface() -> gr.Blocks:
             """
         )
     return demo
 demo = build_interface()
 if __name__ == "__main__":
-    demo.launch()

+from __future__ import annotations
 import os
 import tempfile
+from typing import Dict, List, Optional, Any
 import gradio as gr
 import torch
 # Whisper model: use same model names as Django app (tiny, base, small, medium, large-v3)
 # faster-whisper will download these automatically from Hugging Face on first run
 WHISPER_MODEL_SIZE = os.environ.get("WHISPER_MODEL_SIZE", "large-v3")
+# Prefer GPU on Hugging Face Spaces if available, but allow override via env
+def _default_device() -> str:
+    try:
+        return "cuda" if torch.cuda.is_available() else "cpu"
+    except Exception:
+        return "cpu"
+WHISPER_DEVICE = os.environ.get("WHISPER_DEVICE") or _default_device()
+# Choose a sensible default compute type based on device (can be overridden by env)
+# - GPU: float16 is fastest and fits T4 for small/medium; use int8_float16 to save VRAM for large-v3
+# - CPU: int8_float32 works well
+WHISPER_COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE_TYPE") or (
+    "float16" if WHISPER_DEVICE == "cuda" else "int8_float32"
+)
 # Diarization: NVIDIA NeMo Sortformer model
 DIARIZATION_MODEL_NAME = os.environ.get(
 # Lazy singletons for the heavy models
 # ---------------------------------------------------------------------------
 _whisper_model: Optional[WhisperModel] = None
+_diarization_model: Optional[Any] = None
 def _ensure_snapshot(repo_id: str, local_dir: str, allow_patterns: Optional[List[str]] = None) -> str:
     return _whisper_model
+def _load_diarization_model() -> Optional[Any]:
     """Load NVIDIA NeMo Sortformer diarization model lazily (singleton)"""
     global _diarization_model
     if _diarization_model is None:
             # Switch to evaluation mode
             _diarization_model.eval()
+            # Move to GPU if available on Spaces
+            if torch.cuda.is_available():
+                try:
+                    _diarization_model.to("cuda")
+                    print("[DEBUG] Moved Sortformer model to CUDA device")
+                except Exception:
+                    # Fallback for modules exposing .cuda()
+                    try:
+                        _diarization_model.cuda()
+                        print("[DEBUG] Moved Sortformer model to CUDA via .cuda()")
+                    except Exception as _e:
+                        print(f"[WARN] Could not move Sortformer model to GPU: {_e}")
             # Configure streaming parameters (high latency preset for better accuracy)
             # See: https://huggingface.co/nvidia/diar_streaming_sortformer_4spk-v2#setting-up-streaming-configuration
             """
         )
+        gr.Markdown(
+            f"Running on device: `{WHISPER_DEVICE}` with compute type: `{WHISPER_COMPUTE_TYPE}`"
+        )
         with gr.Row():
             audio_input = gr.Audio(type="filepath", label="Upload audio (mp3, wav, m4a, ...)")
             options = gr.Column()
             """
         )
+        # Use a queue to serialize work on GPU and avoid OOM on Spaces free/shared GPUs
+        demo.queue(concurrency_count=1, max_size=16)
     return demo
 demo = build_interface()
 if __name__ == "__main__":
+    demo.launch()

spaces.yml CHANGED Viewed

@@ -1,3 +1,10 @@
 sdk: gradio
 sdk_version: 4.42.0
 python_version: 3.10

 sdk: gradio
 sdk_version: 4.42.0
 python_version: 3.10
+# Request a GPU on Hugging Face Spaces. Common options include:
+# - t4-small (free/shared tier)
+# - a10g-small
+# - a100-large
+# Adjust as needed in the Space settings UI.
+hardware: t4-small