Spaces:
Paused
Paused
| set -euo pipefail | |
| echo "================= RUNTIME CAPABILITIES =================" | |
| date | |
| echo | |
| if command -v nvidia-smi >/dev/null 2>&1; then | |
| nvidia-smi | |
| else | |
| echo "nvidia-smi: not available" | |
| fi | |
| echo | |
| echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}" | |
| if command -v nvcc >/dev/null 2>&1; then | |
| nvcc --version || true | |
| else | |
| echo "nvcc: not available" | |
| fi | |
| echo | |
| echo "[PyTorch / CUDA backend]" | |
| python3 - <<'PY' | |
| import json, os, torch, inspect | |
| def to_bool(x): | |
| try: | |
| if callable(x): | |
| try: | |
| sig = inspect.signature(x) | |
| if len(sig.parameters)==0: | |
| return bool(x()) | |
| except Exception: | |
| pass | |
| return True | |
| return bool(x) | |
| except Exception: | |
| return None | |
| info = { | |
| "torch": getattr(torch, "__version__", None), | |
| "cuda_available": torch.cuda.is_available(), | |
| "cuda_device_count": torch.cuda.device_count(), | |
| "cuda_runtime_version": getattr(torch.version, "cuda", None), | |
| "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None, | |
| "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None), | |
| "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None), | |
| "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None), | |
| "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None), | |
| } | |
| print(json.dumps(info, indent=2)) | |
| for i in range(min(torch.cuda.device_count(), 16)): | |
| print(f"GPU {i}: {torch.cuda.get_device_name(i)}") | |
| PY | |
| echo | |
| echo "[Apex (FusedLayerNorm/RMSNorm)]" | |
| python3 - <<'PY' | |
| try: | |
| from apex.normalization import FusedLayerNorm, FusedRMSNorm | |
| import importlib; importlib.import_module("fused_layer_norm_cuda") | |
| print("apex.normalization: OK") | |
| except Exception as e: | |
| print("apex.normalization: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[FlashAttention (CUDA/Triton/RMSNorm)]" | |
| python3 - <<'PY' | |
| import importlib | |
| mods = [ | |
| 'flash_attn', 'flash_attn_2_cuda', | |
| 'flash_attn.ops.rms_norm', 'flash_attn.ops.layer_norm', | |
| 'flash_attn.layers.layer_norm' | |
| ] | |
| for m in mods: | |
| try: | |
| importlib.import_module(m) | |
| print(f"{m}: OK") | |
| except Exception as e: | |
| print(f"{m}: FAIL -> {e}") | |
| PY | |
| echo | |
| echo "[FlashAttention versão/details]" | |
| python3 - <<'PY' | |
| try: | |
| import flash_attn | |
| fa_ver = getattr(flash_attn, "__version__", None) | |
| print(f"flash_attn: {fa_ver}") | |
| except Exception: | |
| print("flash_attn: not importable.") | |
| try: | |
| import torch | |
| print(f"torch: {torch.__version__} | cuda: {getattr(torch.version, 'cuda', None)}") | |
| except Exception: | |
| pass | |
| PY | |
| echo | |
| echo "[Triton]" | |
| python3 - <<'PY' | |
| try: | |
| import triton | |
| print("triton:", triton.__version__) | |
| try: | |
| import triton.ops as _; print("triton.ops: OK") | |
| except Exception: | |
| print("triton.ops: not present (ok on Triton>=3.x)") | |
| except Exception as e: | |
| print("triton: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[BitsAndBytes (Q8/Q4)]" | |
| python3 - <<'PY' | |
| try: | |
| import bitsandbytes as bnb | |
| print("bitsandbytes:", bnb.__version__) | |
| try: | |
| from bitsandbytes.triton import _custom_ops as _; print("bnb.triton._custom_ops: OK") | |
| except Exception as e: | |
| print("bnb.triton: partial ->", e) | |
| except Exception as e: | |
| print("bitsandbytes: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[Transformers / Diffusers / XFormers / EcoML]" | |
| python3 - <<'PY' | |
| def _v(m): | |
| try: | |
| mod = __import__(m) | |
| print(f"{m}: {getattr(mod, '__version__', 'unknown')}") | |
| except Exception as e: | |
| print(f"{m}: FAIL -> {e}") | |
| for m in ("transformers", "diffusers", "xformers", "ecuml", "mlx", "ecobase"): | |
| _v(m) | |
| PY | |
| echo | |
| echo "[Distribuído / NCCL Env]" | |
| env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort | |
| echo | |
| echo "[Output dir/perms]" | |
| OUT="/app/outputs" | |
| echo "OUT dir: $OUT" | |
| mkdir -p "$OUT" | |
| ls -la "$OUT" || true | |
| echo "================= END CAPABILITIES =================" | |