Test

Paused

App Files Files Community

Test / Dockerfile

EuuIia

Update Dockerfile

03c32c6 verified about 1 month ago

raw

history blame contribute delete

4.89 kB

	# =============================================================================
	# ADUC-SDR Video Suite — High-Perf Diffusers for 8× L40S (SM 8.9)
	# CUDA 12.8 \| PyTorch 2.8.0+cu128 \| Ubuntu 22.04
	# =============================================================================
	FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

	LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
	LABEL description="High-performance Diffusers stack with FA2/SDPA, 8×L40S"
	LABEL version="4.4.0"
	LABEL cuda_version="12.8.0"
	LABEL python_version="3.10"
	LABEL pytorch_version="2.8.0+cu128"
	LABEL gpu_optimized_for="8x_NVIDIA_L40S"

	# ---------------- Core env & caches ----------------
	ENV DEBIAN_FRONTEND=noninteractive TZ=UTC LANG=C.UTF-8 LC_ALL=C.UTF-8 \
	PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 \
	PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1

	# GPU/Compute
	ENV NVIDIA_VISIBLE_DEVICES=all
	ENV TORCH_CUDA_ARCH_LIST="8.9"
	ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
	ENV CUDA_DEVICE_MAX_CONNECTIONS=32

	# Threads
	ENV OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 MAX_JOBS=160

	# Alloc/caches
	ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
	ENV CUDA_LAUNCH_BLOCKING=0 CUDA_CACHE_MAXSIZE=2147483648 CUDA_CACHE_DISABLE=0

	# App paths
	ENV APP_HOME=/app
	WORKDIR $APP_HOME

	# Persistent data and caches in /data
	ENV HF_HOME=/data/.cache/huggingface
	ENV TORCH_HOME=/data/.cache/torch
	ENV HF_DATASETS_CACHE=/data/.cache/datasets
	ENV TRANSFORMERS_CACHE=/data/.cache/transformers
	ENV DIFFUSERS_CACHE=/data/.cache/diffusers
	ENV HF_HUB_ENABLE_HF_TRANSFER=1
	ENV TOKENIZERS_PARALLELISM=false

	# Create non-root user and data dirs early, fix ownership
	RUN useradd -m -u 1000 -s /bin/bash appuser && \
	mkdir -p /data /data/models \
	/data/.cache/huggingface /data/.cache/torch \
	/data/.cache/datasets /data/.cache/transformers /data/.cache/diffusers && \
	chown -R appuser:appuser /data

	# Models live in /data/models and are visible at /app/models
	ENV MODELS_DIR=/data/models
	RUN ln -sf /data/models /app/models

	# ---------------- System & Python ----------------
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential gosu tree cmake git git-lfs curl wget ffmpeg ninja-build \
	python3.10 python3.10-dev python3.10-distutils python3-pip \
	ca-certificates libglib2.0-0 libgl1 \
	&& apt-get clean && rm -rf /var/lib/apt/lists/*

	RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
	ln -sf /usr/bin/python3.10 /usr/bin/python && \
	python3 -m pip install --upgrade pip

	# ---------------- PyTorch cu128 (pinned) ----------------
	RUN pip install --index-url https://download.pytorch.org/whl/cu128 \
	torch>=2.8.0+cu128 torchvision>=0.23.0+cu128 torchaudio>=2.8.0+cu128

	# ---------------- Toolchain, Triton, FA2 (no bnb build) ----------------
	RUN pip install packaging ninja cmake pybind11 scikit-build cython hf_transfer "numpy>=1.24.4"

	# Triton 3.x (no triton.ops)
	RUN pip uninstall -y triton \|\| true && \
	pip install -v --no-build-isolation triton==3.4.0


	# FlashAttention 2.8.x
	RUN pip install flash-attn==2.8.3 --no-build-isolation \|\| \
	pip install flash-attn==2.8.2 --no-build-isolation \|\| \
	pip install flash-attn==2.8.1 --no-build-isolation \|\| \
	pip install flash-attn==2.8.0.post2 --no-build-isolation

	# ---------------- App dependencies ----------------
	COPY requirements.txt ./requirements.txt
	RUN pip install --no-cache-dir -r requirements.txt

	# Pin bnb to avoid surprise CUDA/PTX mismatches (adjust as needed)
	RUN pip install --upgrade bitsandbytes

	# Custom .whl (Apex + dropout_layer_norm)
	RUN echo "Installing custom wheels..." && \
	pip install --no-cache-dir \
	"https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl" \
	"https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/dropout_layer_norm-0.1-cp310-cp310-linux_x86_64.whl"

	# ====================================================================
	# Optional: q8_kernels + LTX-Video (enable if needed; ensure wheel ABI)
	RUN pip install --no-cache-dir \
	"https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/q8_kernels-0.0.5-cp310-cp310-linux_x86_64.whl"
	# RUN git clone https://github.com/Lightricks/LTX-Video.git /data/LTX-Video && \
	# cd /data/LTX-Video && python -m pip install -e .[inference]
	# ====================================================================

	# Scripts and app
	COPY info.sh ./app/info.sh
	COPY builder.sh ./app/builder.sh
	COPY start.sh ./app/start.sh
	COPY entrypoint.sh ./app/entrypoint.sh

	# Copy the rest of the source last for better caching
	COPY . .

	# Permissions on app tree
	RUN chown -R appuser:appuser /app /data && \
	chmod 0755 /app/entrypoint.sh /app/start.sh /app/info.sh /app/builder.sh

	VOLUME /data

	ENTRYPOINT ["/app/entrypoint.sh"]
	USER appuser

	# ---------------- Entry ----------------
	CMD ["/app/start.sh"]