ks-version-1-1 / Dockerfile
NIKKI77's picture
Subtitle KIS v1.1 – initial
5181b3c
FROM python:3.12-slim
ENV DEBIAN_FRONTEND=noninteractive PIP_NO_CACHE_DIR=1
# Create non-root user
RUN useradd -m -u 1000 appuser
ENV PATH="/home/appuser/.local/bin:$PATH"
WORKDIR /app
# Minimal OS deps
RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && rm -rf /var/lib/apt/lists/*
# Put caches in user's home + sane threading + pythonpath
ENV HF_HOME=/home/appuser/.cache \
TRANSFORMERS_CACHE=/home/appuser/.cache/transformers \
HUGGINGFACE_HUB_CACHE=/home/appuser/.cache/huggingface \
SENTENCE_TRANSFORMERS_HOME=/home/appuser/.cache/sentence-transformers \
XDG_CACHE_HOME=/home/appuser/.cache \
NLTK_DATA=/home/appuser/nltk_data \
OMP_NUM_THREADS=1 \
OPENBLAS_NUM_THREADS=1 \
MKL_NUM_THREADS=1 \
PYTHONPATH=/app/backend:$PYTHONPATH
# Disable HF fast downloader (avoids missing hf_transfer during build)
ENV HF_HUB_ENABLE_HF_TRANSFER=0
# Copy code as appuser
COPY --chown=appuser:appuser . .
# Switch to non-root BEFORE installs so caches/dirs are writable
USER appuser
# Python deps (user site)
RUN pip install --no-cache-dir --user -r requirements.txt
# NLTK data (you use wordnet + tokenizers). spaCy removed since unused.
RUN python - <<'PY'
import nltk
for pkg in ["punkt","punkt_tab","wordnet","omw-1.4"]:
nltk.download(pkg)
print("NLTK OK")
PY
# Prefetch models (non-fatal if network hiccups)
RUN python - <<'PY'
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForTokenClassification
try:
SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
AutoTokenizer.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
AutoModelForTokenClassification.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
print("HF models cached")
except Exception as e:
print("Prefetch skipped:", e)
PY
# (Optional) smoke test to catch FAISS/torch issues early
RUN python - <<'PY'
import sys
print("PY:", sys.version)
import faiss, torch
print("FAISS:", faiss.__version__)
print("Torch:", torch.__version__, "CUDA:", torch.cuda.is_available())
PY
# Spaces port + gunicorn binding
ENV PORT=7860
EXPOSE 7860
CMD ["bash","-lc","gunicorn -w 1 -k gthread --threads 4 --timeout 300 -b 0.0.0.0:${PORT:-7860} backend.app:app"]