Spaces:
Paused
Paused
| set -euo pipefail | |
| echo "🚀 Builder (FlashAttn LayerNorm extra + Apex + Q8) — runtime com GPU visível" | |
| # ===== Config e diretórios ===== | |
| mkdir -p /app/wheels /app/cuda_cache /app/wheels/src | |
| chmod -R 777 /app/wheels || true | |
| export CUDA_CACHE_PATH="/app/cuda_cache" | |
| # Preserva licença NGC (se existir) | |
| if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then | |
| cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true | |
| fi | |
| # ===== Dependências mínimas ===== | |
| python -m pip install -v -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true | |
| # ===== Tags de ambiente (Python/CUDA/Torch) ===== | |
| PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)" | |
| TORCH_VER="$(python - <<'PY' | |
| try: | |
| import torch, re | |
| v = torch.__version__ | |
| print(re.sub(r'\+.*$', '', v)) | |
| except Exception: | |
| print("unknown") | |
| PY | |
| )" | |
| CU_TAG="$(python - <<'PY' | |
| try: | |
| import torch | |
| cu = getattr(torch.version, "cuda", None) | |
| print("cu"+cu.replace(".","")) if cu else print("") | |
| except Exception: | |
| print("") | |
| PY | |
| )" | |
| echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}" | |
| # ============================================================================ | |
| # CHECKERS | |
| # ============================================================================ | |
| # Checa especificamente o módulo nativo requerido pelo layer_norm (sem checar 'flash-attn' geral) | |
| check_flash_layer_norm_bin () { | |
| python - <<'PY' | |
| import importlib | |
| ok = False | |
| # extensões conhecidas produzidas por csrc/layer_norm | |
| for name in [ | |
| "dropout_layer_norm", # nome do módulo nativo | |
| "flash_attn.ops.layer_norm", # wrapper python que usa o nativo | |
| "flash_attn.ops.rms_norm", # pode depender do mesmo backend em alguns empacotamentos | |
| ]: | |
| try: | |
| importlib.import_module(name) | |
| ok = True | |
| break | |
| except Exception: | |
| pass | |
| raise SystemExit(0 if ok else 1) | |
| PY | |
| } | |
| check_apex () { | |
| python - <<'PY' | |
| try: | |
| from apex.normalization import FusedLayerNorm | |
| import importlib; importlib.import_module("fused_layer_norm_cuda") | |
| ok = True | |
| except Exception: | |
| ok = False | |
| raise SystemExit(0 if ok else 1) | |
| PY | |
| } | |
| check_q8 () { | |
| python - <<'PY' | |
| import importlib.util | |
| spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels") | |
| raise SystemExit(0 if spec else 1) | |
| PY | |
| } | |
| # ============================================================================ | |
| # DOWNLOAD DO HUB (GENÉRICO) | |
| # ============================================================================ | |
| # Instala uma wheel do HF por prefixo simples (ex.: apex-, q8_kernels-) | |
| install_from_hf_by_prefix () { | |
| local PREFIX="$1" | |
| echo "[hub] Procurando wheels '${PREFIX}-*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${CU_TAG}" | |
| python - "$PREFIX" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0 | |
| import os, sys | |
| from huggingface_hub import HfApi, hf_hub_download, HfFolder | |
| prefix, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3] | |
| repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr") | |
| api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token()) | |
| try: | |
| files = api.list_repo_files(repo_id=repo, repo_type="model") | |
| except Exception: | |
| raise SystemExit(0) | |
| def match(name: str) -> bool: | |
| return name.endswith(".whl") and name.rsplit("/",1)[-1].startswith(prefix + "-") and (py_tag in name) | |
| cands = [f for f in files if match(f)] | |
| pref = [f for f in cands if cu_tag and cu_tag in f] or cands | |
| if not pref: | |
| raise SystemExit(0) | |
| target = sorted(pref, reverse=True)[0] | |
| print(target) | |
| path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels") | |
| print(path) | |
| PY | |
| } | |
| # Instala wheels do submódulo layer_norm aceitando variantes de nome | |
| install_flash_layer_norm_from_hf () { | |
| echo "[hub] Procurando wheels FlashAttention LayerNorm em ${SELF_HF_REPO_ID}" | |
| python - "$PY_TAG" "$CU_TAG" <<'PY' || exit 0 | |
| import os, sys, re | |
| from huggingface_hub import HfApi, hf_hub_download, HfFolder | |
| py_tag, cu_tag = sys.argv[1], sys.argv[2] | |
| repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr") | |
| api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token()) | |
| try: | |
| files = api.list_repo_files(repo_id=repo, repo_type="model") | |
| except Exception: | |
| raise SystemExit(0) | |
| pats = [ | |
| r"^flash[_-]?attn[_-]?.*layer[_-]?norm-.*\.whl$", | |
| r"^dropout[_-]?layer[_-]?norm-.*\.whl$", | |
| ] | |
| def ok(fn: str) -> bool: | |
| name = fn.rsplit("/",1)[-1] | |
| if py_tag not in name: return False | |
| return any(re.search(p, name, flags=re.I) for p in pats) | |
| cands = [f for f in files if ok(f)] | |
| pref = [f for f in cands if cu_tag and cu_tag in f] or cands | |
| if not pref: | |
| raise SystemExit(0) | |
| target = sorted(pref, reverse=True)[0] | |
| print(target) | |
| path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels") | |
| print(path) | |
| PY | |
| } | |
| # ============================================================================ | |
| # BUILDERS | |
| # ============================================================================ | |
| # Passo extra: SIEMPRE tenta instalar o submódulo layer_norm via wheel do HF; | |
| # se não houver wheel compatível, compila a partir de csrc/layer_norm e gera wheel. | |
| build_or_install_flash_layer_norm () { | |
| echo "[flow] === FlashAttn LayerNorm (passo extra) ===" | |
| # 1) Tentar instalar wheel do HF primeiro (evita recompilar) | |
| HF_OUT="$(install_flash_layer_norm_from_hf || true)" | |
| if [ -n "${HF_OUT:-}" ]; then | |
| WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)" | |
| echo "[hub] Baixado: ${WHEEL_PATH}" | |
| python -m pip install -v -U --no-build-isolation --no-deps "${WHEEL_PATH}" || true | |
| if check_flash_layer_norm_bin; then | |
| echo "[flow] FlashAttn LayerNorm: OK via wheel do Hub" | |
| return 0 | |
| fi | |
| echo "[flow] Wheel do Hub não resolveu import; seguirá com build" | |
| else | |
| echo "[hub] Nenhuma wheel compatível encontrada para FlashAttn LayerNorm" | |
| fi | |
| # 2) Build from source do submódulo csrc/layer_norm -> wheel | |
| local SRC="/app/wheels/src/flash-attn" | |
| echo "[build] Preparando fonte FlashAttention (layer_norm) em ${SRC}" | |
| if [ -d "$SRC/.git" ]; then | |
| git -C "$SRC" fetch --all -p || true | |
| git -C "$SRC" reset --hard origin/main || true | |
| git -C "$SRC" clean -fdx || true | |
| else | |
| rm -rf "$SRC" | |
| git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC" | |
| fi | |
| # Define CC alvo a partir da GPU ativa (reduz tempo/ruído de build) | |
| export TORCH_CUDA_ARCH_LIST="$(python - <<'PY' | |
| import torch | |
| try: | |
| cc = "%d.%d" % torch.cuda.get_device_capability(0) | |
| print(cc) | |
| except Exception: | |
| print("8.9") # fallback p/ Ada (L40S) caso build sem GPU visível | |
| PY | |
| )" | |
| echo "[build] TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}" | |
| pushd "$SRC/csrc/layer_norm" >/dev/null | |
| export MAX_JOBS="${MAX_JOBS:-90}" | |
| # Gera wheel reutilizável | |
| python -m pip wheel -v --no-build-isolation --no-deps . -w /app/wheels || true | |
| popd >/dev/null | |
| # Instala a wheel gerada | |
| local W="$(ls -t /app/wheels/*flash*attn*layer*norm*-*.whl 2>/dev/null | head -n1 || true)" | |
| if [ -z "${W}" ]; then | |
| W="$(ls -t /app/wheels/*dropout*layer*norm*-*.whl 2>/dev/null | head -n1 || true)" | |
| fi | |
| if [ -z "${W}" ]; then | |
| # fallback para qualquer .whl recém gerado | |
| W="$(ls -t /app/wheels/*.whl 2>/dev/null | head -n1 || true)" | |
| fi | |
| if [ -n "${W}" ]; then | |
| python -m pip install -v -U --no-deps "${W}" || true | |
| echo "[build] FlashAttn LayerNorm instalado da wheel: ${W}" | |
| else | |
| echo "[build] Nenhuma wheel gerada; instalando direto do source (último recurso)" | |
| python -m pip install -v --no-build-isolation "$SRC/csrc/layer_norm" || true | |
| fi | |
| # Checagem final do binário | |
| if check_flash_layer_norm_bin; then | |
| echo "[flow] FlashAttn LayerNorm: import OK após build" | |
| return 0 | |
| fi | |
| echo "[flow] FlashAttn LayerNorm: falhou import após build" | |
| return 1 | |
| } | |
| build_apex () { | |
| local SRC="/app/wheels/src/apex" | |
| echo "[build] Preparando fonte Apex em ${SRC}" | |
| if [ -d "$SRC/.git" ]; then | |
| git -C "$SRC" fetch --all -p || true | |
| git -C "$SRC" reset --hard HEAD || true | |
| git -C "$SRC" clean -fdx || true | |
| else | |
| rm -rf "$SRC" | |
| git clone --depth 1 https://github.com/NVIDIA/apex "$SRC" | |
| fi | |
| echo "[build] Compilando Apex -> wheel" | |
| export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0 | |
| python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w /app/wheels || true | |
| local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)" | |
| if [ -n "${W}" ]; then | |
| python -m pip install -v -U --no-deps "${W}" || true | |
| echo "[build] Apex instalado da wheel recém-compilada: ${W}" | |
| else | |
| echo "[build] Nenhuma wheel Apex gerada; instalando do source" | |
| python -m pip install -v --no-build-isolation "$SRC" || true | |
| fi | |
| } | |
| Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels}" | |
| Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}" | |
| build_q8 () { | |
| local SRC="/app/wheels/src/q8_kernels" | |
| rm -rf "$SRC" | |
| git clone --filter=blob:none "$Q8_REPO" "$SRC" | |
| git -C "$SRC" checkout "$Q8_COMMIT" | |
| git -C "$SRC" submodule update --init --recursive | |
| echo "[build] Compilando Q8 Kernels -> wheel" | |
| python -m pip wheel -v --no-build-isolation "$SRC" -w /app/wheels || true | |
| local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)" | |
| if [ -n "${W}" ]; then | |
| python -m pip install -v -U --no-deps "${W}" || true | |
| echo "[build] Q8 instalado da wheel recém-compilada: ${W}" | |
| else | |
| echo "[build] Nenhuma wheel q8_kernels gerada; instalando do source" | |
| python -m pip install -v --no-build-isolation "$SRC" || true | |
| fi | |
| } | |
| # ============================================================================ | |
| # EXECUÇÃO | |
| # ============================================================================ | |
| # Passo adicional SEM depender de "flash-attn" já instalado: trata somente o layer_norm | |
| #build_q8 || true | |
| # Apex (mantido) | |
| # Tenta primeiro via wheel no HF e, se não houver, compila e instala em wheel | |
| #echo "[flow] === apex ===" | |
| #HF_OUT="$(install_from_hf_by_prefix "apex" || true)" | |
| #if [ -n "${HF_OUT:-}" ]; then | |
| # WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)" | |
| # echo "[hub] Baixado: ${WHEEL_PATH}" | |
| # python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true | |
| # if ! check_apex; then | |
| # echo "[flow] apex: import falhou após wheel; compilando" | |
| # #build_apex || true | |
| # fi | |
| #else | |
| # echo "[hub] Nenhuma wheel apex compatível; compilando" | |
| # build_apex || true | |
| #fi | |
| #Q8 (opcional) | |
| echo "[flow] === q8_kernels ===" | |
| HF_OUT="$(install_from_hf_by_prefix "q8_kernels" || true)" | |
| if [ -n "${HF_OUT:-}" ]; then | |
| WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)" | |
| echo "[hub] Baixado: ${WHEEL_PATH}" | |
| python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true | |
| if ! check_q8; then | |
| echo "[flow] q8_kernels: import falhou após wheel; compilando" | |
| build_q8 || true | |
| fi | |
| else | |
| echo "[hub] Nenhuma wheel q8_kernels compatível; compilando" | |
| build_q8 || true | |
| fi | |
| # Upload de wheels produzidas para o HF (cache cross-restarts) | |
| python - <<'PY' | |
| import os | |
| from huggingface_hub import HfApi, HfFolder | |
| repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr") | |
| token = os.getenv("HF_TOKEN") or HfFolder.get_token() | |
| if not token: | |
| raise SystemExit("HF_TOKEN ausente; upload desabilitado") | |
| api = HfApi(token=token) | |
| api.upload_folder( | |
| folder_path="/app/wheels", | |
| repo_id=repo, | |
| repo_type="model", | |
| allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"], | |
| ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"], | |
| ) | |
| print("Upload concluído (wheels + licença).") | |
| PY | |
| chmod -R 777 /app/wheels || true | |
| echo "✅ Builder finalizado." | |