Spaces:
Sleeping
Sleeping
| # Hugging Face Spaces compatible Dockerfile - V4 GPU INT4 | |
| FROM python:3.10-slim | |
| # Set environment variables for V4 GPU deployment | |
| ENV PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| PYTHONPATH=/app \ | |
| HOME=/tmp \ | |
| USER=user \ | |
| ENABLE_V1_WARMUP=false \ | |
| ENABLE_V2_WARMUP=false \ | |
| ENABLE_V4_WARMUP=true \ | |
| V4_MODEL_ID=Qwen/Qwen2.5-1.5B-Instruct \ | |
| V4_ENABLE_QUANTIZATION=true \ | |
| V4_USE_FP16_FOR_SPEED=true \ | |
| HF_HOME=/tmp/huggingface \ | |
| TRANSFORMERS_NO_TORCHAO=1 \ | |
| OMP_NUM_THREADS=1 | |
| # Set work directory | |
| WORKDIR /app | |
| # Install minimal system dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| curl ca-certificates && rm -rf /var/lib/apt/lists/* | |
| # Copy requirements first for better caching | |
| COPY requirements.txt . | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # Copy application code | |
| COPY app/ ./app/ | |
| COPY pytest.ini . | |
| # Create cache directory for HuggingFace models | |
| RUN mkdir -p /tmp/huggingface && chmod -R 777 /tmp/huggingface | |
| # Expose port (Hugging Face Spaces uses port 7860) | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # Simple startup - V4 model will download during warmup (with GPU INT4 quantization) | |
| CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"] | |