NIKKI77 commited on
Commit
464d4fa
·
1 Parent(s): d6a554f

Disable HF fast downloader (or install hf_transfer) and make model prefetch non-fatal

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -22
Dockerfile CHANGED
@@ -1,20 +1,17 @@
1
  FROM python:3.12-slim
2
 
3
- ENV DEBIAN_FRONTEND=noninteractive \
4
- PIP_NO_CACHE_DIR=1 \
5
- PYTHONUNBUFFERED=1
6
 
7
- # Non-root user so caches are writable at runtime
8
  RUN useradd -m -u 1000 appuser
9
  ENV PATH="/home/appuser/.local/bin:$PATH"
10
 
11
  WORKDIR /app
12
 
13
- # Minimal OS deps (OpenMP runtime)
14
- RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 \
15
- && rm -rf /var/lib/apt/lists/*
16
 
17
- # Put ALL caches in the user's home; plus perf/env knobs
18
  ENV HF_HOME=/home/appuser/.cache \
19
  TRANSFORMERS_CACHE=/home/appuser/.cache/transformers \
20
  HUGGINGFACE_HUB_CACHE=/home/appuser/.cache/huggingface \
@@ -24,19 +21,19 @@ ENV HF_HOME=/home/appuser/.cache \
24
  OMP_NUM_THREADS=1 \
25
  OPENBLAS_NUM_THREADS=1 \
26
  MKL_NUM_THREADS=1 \
27
- HF_HUB_ENABLE_HF_TRANSFER=1 \
28
- PYTHONPATH=/app/backend:$PYTHONPATH
29
 
30
- # Copy code with correct ownership
31
  COPY --chown=appuser:appuser . .
32
 
33
- # Switch to non-root BEFORE installs so caches land in /home/appuser
34
  USER appuser
35
 
36
- # Python deps
37
  RUN pip install --no-cache-dir --user -r requirements.txt
38
 
39
- # Preload NLP data
40
  RUN python -m spacy download en_core_web_sm
41
  RUN python - <<'PY'
42
  import nltk
@@ -45,19 +42,20 @@ for pkg in ["punkt","punkt_tab","wordnet","omw-1.4"]:
45
  print("NLTK OK")
46
  PY
47
 
48
- # Pre-fetch models to avoid cold-start timeouts
49
  RUN python - <<'PY'
50
  from sentence_transformers import SentenceTransformer
51
  from transformers import AutoTokenizer, AutoModelForTokenClassification
52
- # semantic encoder
53
- SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
54
- # punctuation model
55
- AutoTokenizer.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
56
- AutoModelForTokenClassification.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
57
- print("HF models cached")
 
58
  PY
59
 
60
  EXPOSE 7860
61
 
62
- # Give the worker time for initial GPU warmup
63
  CMD ["gunicorn","-w","1","-k","gthread","--threads","4","--timeout","300","-b","0.0.0.0:7860","backend.app:app"]
 
1
  FROM python:3.12-slim
2
 
3
+ ENV DEBIAN_FRONTEND=noninteractive PIP_NO_CACHE_DIR=1
 
 
4
 
5
+ # Create non-root user
6
  RUN useradd -m -u 1000 appuser
7
  ENV PATH="/home/appuser/.local/bin:$PATH"
8
 
9
  WORKDIR /app
10
 
11
+ # Minimal OS deps
12
+ RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && rm -rf /var/lib/apt/lists/*
 
13
 
14
+ # Put caches in user's home + sane threading + pythonpath
15
  ENV HF_HOME=/home/appuser/.cache \
16
  TRANSFORMERS_CACHE=/home/appuser/.cache/transformers \
17
  HUGGINGFACE_HUB_CACHE=/home/appuser/.cache/huggingface \
 
21
  OMP_NUM_THREADS=1 \
22
  OPENBLAS_NUM_THREADS=1 \
23
  MKL_NUM_THREADS=1 \
24
+ PYTHONPATH=/app/backend:$PYTHONPATH \
25
+ HF_HUB_ENABLE_HF_TRANSFER=0 # disable fast downloader to avoid missing package error
26
 
27
+ # Copy code as appuser
28
  COPY --chown=appuser:appuser . .
29
 
30
+ # Switch to non-root BEFORE installs so caches/dirs are writable
31
  USER appuser
32
 
33
+ # Python deps (user site)
34
  RUN pip install --no-cache-dir --user -r requirements.txt
35
 
36
+ # Preload spaCy + NLTK data
37
  RUN python -m spacy download en_core_web_sm
38
  RUN python - <<'PY'
39
  import nltk
 
42
  print("NLTK OK")
43
  PY
44
 
45
+ # Prefetch models (don't fail the build if network hiccups)
46
  RUN python - <<'PY'
47
  from sentence_transformers import SentenceTransformer
48
  from transformers import AutoTokenizer, AutoModelForTokenClassification
49
+ try:
50
+ SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
51
+ AutoTokenizer.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
52
+ AutoModelForTokenClassification.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
53
+ print("HF models cached")
54
+ except Exception as e:
55
+ print("Prefetch skipped:", e)
56
  PY
57
 
58
  EXPOSE 7860
59
 
60
+ # Give cold start more time
61
  CMD ["gunicorn","-w","1","-k","gthread","--threads","4","--timeout","300","-b","0.0.0.0:7860","backend.app:app"]