File size: 4,166 Bytes
5f7901d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env bash

set -euo pipefail

echo "================= RUNTIME CAPABILITIES ================="
date

echo
if command -v nvidia-smi >/dev/null 2>&1; then
  nvidia-smi
else
  echo "nvidia-smi: not available"
fi
echo

echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
if command -v nvcc >/dev/null 2>&1; then
  nvcc --version || true
else
  echo "nvcc: not available"
fi
echo

echo "[PyTorch / CUDA backend]"
python3 - <<'PY'
import json, os, torch, inspect

def to_bool(x):
    try:
        if callable(x):
            try:
                sig = inspect.signature(x)
                if len(sig.parameters)==0:
                    return bool(x())
            except Exception:
                pass
            return True
        return bool(x)
    except Exception:
        return None

info = {
    "torch": getattr(torch, "__version__", None),
    "cuda_available": torch.cuda.is_available(),
    "cuda_device_count": torch.cuda.device_count(),
    "cuda_runtime_version": getattr(torch.version, "cuda", None),
    "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
    "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
    "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
    "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
    "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
}
print(json.dumps(info, indent=2))
for i in range(min(torch.cuda.device_count(), 16)):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
PY
echo

echo "[Apex (FusedLayerNorm/RMSNorm)]"
python3 - <<'PY'
try:
    from apex.normalization import FusedLayerNorm, FusedRMSNorm
    import importlib; importlib.import_module("fused_layer_norm_cuda")
    print("apex.normalization: OK")
except Exception as e:
    print("apex.normalization: FAIL ->", e)
PY
echo

echo "[FlashAttention (CUDA/Triton/RMSNorm)]"
python3 - <<'PY'
import importlib
mods = [
    'flash_attn', 'flash_attn_2_cuda',
    'flash_attn.ops.rms_norm', 'flash_attn.ops.layer_norm',
    'flash_attn.layers.layer_norm'
]
for m in mods:
    try:
        importlib.import_module(m)
        print(f"{m}: OK")
    except Exception as e:
        print(f"{m}: FAIL -> {e}")
PY
echo

echo "[FlashAttention versão/details]"
python3 - <<'PY'
try:
    import flash_attn
    fa_ver = getattr(flash_attn, "__version__", None)
    print(f"flash_attn: {fa_ver}")
except Exception:
    print("flash_attn: not importable.")
try:
    import torch
    print(f"torch: {torch.__version__} | cuda: {getattr(torch.version, 'cuda', None)}")
except Exception:
    pass
PY
echo

echo "[Triton]"
python3 - <<'PY'
try:
    import triton
    print("triton:", triton.__version__)
    try:
        import triton.ops as _; print("triton.ops: OK")
    except Exception:
        print("triton.ops: not present (ok on Triton>=3.x)")
except Exception as e:
    print("triton: FAIL ->", e)
PY
echo

echo "[BitsAndBytes (Q8/Q4)]"
python3 - <<'PY'
try:
    import bitsandbytes as bnb
    print("bitsandbytes:", bnb.__version__)
    try:
        from bitsandbytes.triton import _custom_ops as _; print("bnb.triton._custom_ops: OK")
    except Exception as e:
        print("bnb.triton: partial ->", e)
except Exception as e:
    print("bitsandbytes: FAIL ->", e)
PY
echo

echo "[Transformers / Diffusers / XFormers / EcoML]"
python3 - <<'PY'
def _v(m):
    try:
        mod = __import__(m)
        print(f"{m}: {getattr(mod, '__version__', 'unknown')}")
    except Exception as e:
        print(f"{m}: FAIL -> {e}")
for m in ("transformers", "diffusers", "xformers", "ecuml", "mlx", "ecobase"):
    _v(m)
PY
echo

echo "[Distribuído / NCCL Env]"
env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
echo

echo "[Output dir/perms]"
OUT="/app/outputs"
echo "OUT dir: $OUT"
mkdir -p "$OUT"
ls -la "$OUT" || true

echo "================= END CAPABILITIES ================="