Spaces:
Sleeping
Sleeping
| # syntax=docker/dockerfile:1.4 | |
| FROM python:3.10-slim AS source | |
| ARG HF_API_TOKEN | |
| ARG SRC_URL | |
| # Ensure git and certificates are available for cloning | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git ca-certificates && rm -rf /var/lib/apt/lists/* | |
| # Clone the repository once in its own stage. Files will be moved to /repo | |
| # Use a shallow clone to reduce time and bandwidth and make caching more stable | |
| # This RUN attempts to read a BuildKit secret at /run/secrets/HF_API_TOKEN, and | |
| # falls back to the HF_API_TOKEN environment variable if present. It fails early | |
| # with a clear message when no token is provided. | |
| RUN --mount=type=secret,id=HF_API_TOKEN,required=false --mount=type=secret,id=SRC_URL,required=false sh -c '\ | |
| if [ -f /run/secrets/HF_API_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_API_TOKEN); \ | |
| elif [ -f /run/secrets/HF_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_TOKEN); \ | |
| elif [ -n "$HF_API_TOKEN" ]; then TOKEN=$HF_API_TOKEN; \ | |
| elif [ -n "$HF_TOKEN" ]; then TOKEN=$HF_TOKEN; \ | |
| else echo "ERROR: HF token not provided (set BuildKit secret HF_API_TOKEN/HF_TOKEN or HF_API_TOKEN/HF_TOKEN env)"; exit 1; fi && \ | |
| # Attempt to clone directly into /repo. If the remote creates a single top-level | |
| # directory, detect that and move its contents into /repo so /repo/frontend exists. | |
| mkdir -p /repo && \ | |
| # Determine source URL: secret at /run/secrets/SRC_URL > ARG SRC_URL | |
| if [ -f /run/secrets/SRC_URL ]; then SRC=$(cat /run/secrets/SRC_URL); \ | |
| elif [ -n "$SRC_URL" ]; then SRC=$SRC_URL; \ | |
| else echo "ERROR: SRC_URL not provided (set BuildKit secret SRC_URL or build-arg SRC_URL)"; exit 1; fi && \ | |
| echo "Cloning from $SRC" && \ | |
| # Normalize SRC: remove leading http(s):// if present, then insert token credentials | |
| if echo "$SRC" | grep -qE '^https?://'; then \ | |
| NO_SCHEME=$(echo "$SRC" | sed -E 's#^https?://##'); \ | |
| else \ | |
| NO_SCHEME="$SRC"; \ | |
| fi && \ | |
| CLONE_URL="https://__token__:$TOKEN@$NO_SCHEME" && \ | |
| git clone --depth 1 "$CLONE_URL" /repo_tmp && \ | |
| echo "--- Debug: listing /repo_tmp (show hidden and nested) ---" && \ | |
| ls -la /repo_tmp || true && \ | |
| # If repo_tmp contains exactly one directory and no other files, move its contents up | |
| set -- /repo_tmp/*; count=$#; if [ $count -eq 1 ] && [ -d "$1" ]; then \ | |
| echo "--- Single top-level dir detected: moving its contents into /repo ---" && \ | |
| mv "$1"/* "$1"/.??* /repo/ 2>/dev/null || true; \ | |
| else \ | |
| echo "--- Multiple entries detected: moving all into /repo ---" && \ | |
| mv /repo_tmp/* /repo/ 2>/dev/null || true; \ | |
| mv /repo_tmp/.[!.]* /repo/ 2>/dev/null || true; \ | |
| fi && \ | |
| rm -rf /repo_tmp/.git && rm -rf /repo_tmp' | |
| # Verify the clone succeeded and /repo contains files; fail early with a helpful message | |
| RUN [ -d /repo ] && [ "$(ls -A /repo | wc -c)" -gt 0 ] || (echo "ERROR: clone failed or /repo is empty" && exit 1) | |
| # --- Stage 1: Build React frontend --- | |
| FROM node:20-alpine AS frontend | |
| WORKDIR /app/frontend | |
| # Install dependencies (copied from the cloned source stage) | |
| COPY --from=source /repo/frontend/package*.json ./ | |
| COPY --from=source /repo/frontend/package-lock.json ./ | |
| RUN npm install --frozen-lockfile | |
| # Build frontend (source files copied from the cloned source stage) | |
| COPY --from=source /repo/frontend/ ./ | |
| RUN npm run build | |
| # --- Stage 2: Python backend (CPU only) --- | |
| FROM python:3.10-slim AS backend | |
| # Environment setup | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| PIP_NO_CACHE_DIR=1 \ | |
| HF_HOME=/app/.cache/huggingface | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git curl && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # Create non-root user | |
| RUN useradd -m appuser | |
| # Create necessary directories and set permissions | |
| RUN mkdir -p /app/.cache/huggingface \ | |
| && mkdir -p /app/static \ | |
| && chown -R appuser:appuser /app | |
| # Switch to non-root user | |
| USER appuser | |
| WORKDIR /app | |
| # Upgrade pip and install Python dependencies | |
| COPY --from=source /repo/backend/requirements.txt /app/backend/requirements.txt | |
| RUN python3 -m pip install --upgrade pip && \ | |
| python3 -m pip install -r /app/backend/requirements.txt | |
| # Copy backend code | |
| COPY --from=source /repo/backend/ /app/backend/ | |
| # Fathom-Search-4B files are now part of the backend app directory | |
| # Copy frontend build to static directory | |
| COPY --from=frontend /app/frontend/out/ /app/static/ | |
| # App-specific environment variables | |
| ENV STATIC_DIR=/app/static \ | |
| MODEL_ID=FractalAIResearch/Fathom-R1-14B \ | |
| PIPELINE_TASK=text-generation \ | |
| QUANTIZE=auto \ | |
| PORT_SERPER_HOST=2221 \ | |
| HOST_SERPER_URL=http://0.0.0.0:2221 \ | |
| SERPER_URL=http://0.0.0.0:2221 \ | |
| PYTHONPATH=/app/backend/app:/app/backend \ | |
| MAX_OUTBOUND=256 \ | |
| JINA_CACHE_DIR=/app/.cache/jina_cache \ | |
| SERPER_CACHE_DIR=/app/.cache/serper_cache \ | |
| BOXED_WRAP_WIDTH=130 \ | |
| CRAWL4AI_EP=http://localhost:8080 \ | |
| CURL_CA_BUNDLE="" \ | |
| REQUESTS_CA_BUNDLE="" \ | |
| SSL_VERIFY=false | |
| # Create cache directories | |
| RUN mkdir -p /app/.cache/jina_cache /app/.cache/serper_cache && \ | |
| chown -R appuser:appuser /app/.cache | |
| # Optional: Healthcheck endpoint - check both services | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ | |
| CMD curl -f http://localhost:7860/docs && curl -f http://localhost:2221/health || exit 1 | |
| EXPOSE 7860 2221 | |
| # Create startup script with proper service management | |
| RUN echo '#!/bin/bash\n\ | |
| set -e\n\ | |
| \n\ | |
| # Cleanup function\n\ | |
| cleanup() {\n\ | |
| echo "๐ Shutting down services..."\n\ | |
| if [ ! -z "$SERPER_PID" ] && kill -0 $SERPER_PID 2>/dev/null; then\n\ | |
| kill $SERPER_PID\n\ | |
| echo "โ Serper service stopped"\n\ | |
| fi\n\ | |
| if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then\n\ | |
| kill $BACKEND_PID\n\ | |
| echo "โ Backend service stopped"\n\ | |
| fi\n\ | |
| exit 0\n\ | |
| }\n\ | |
| \n\ | |
| # Set up signal handlers\n\ | |
| trap cleanup SIGTERM SIGINT\n\ | |
| \n\ | |
| echo "๐ Starting FathomPlayground on Hugging Face Spaces"\n\ | |
| echo "โ Environment variables configured:"\n\ | |
| echo " HF_MODEL_URL: configured"\n\ | |
| echo " HOST_SERPER_URL: configured"\n\ | |
| echo " PORT_SERPER_HOST: configured"\n\ | |
| echo " HF_API_TOKEN: SET"\n\ | |
| echo " SERPER_API_KEY: SET"\n\ | |
| echo " OPENAI_API_KEY: SET"\n\ | |
| echo " HF_TOKEN: SET"\n\ | |
| echo " SUMMARY_HF_MODEL_URL: configured"\n\ | |
| echo " CRAWL4AI_EP: configured"\n\ | |
| echo " JINA_API_KEY: SET"\n\ | |
| echo " JINA_CACHE_DIR: configured"\n\ | |
| echo " SERPER_CACHE_DIR: configured"\n\ | |
| \n\ | |
| echo "๐ Starting Serper Host Server..."\n\ | |
| cd /app/backend/app\n\ | |
| python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\ | |
| SERPER_PID=$!\n\ | |
| echo "โ Serper service started"\n\ | |
| \n\ | |
| # Wait for Serper service to be ready\n\ | |
| echo "โณ Waiting for Serper service to be ready..."\n\ | |
| for i in {1..30}; do\n\ | |
| if curl -s http://localhost:2221/health > /dev/null 2>&1; then\n\ | |
| echo "โ Serper service is ready"\n\ | |
| break\n\ | |
| fi\n\ | |
| if [ $i -eq 30 ]; then\n\ | |
| echo "โ Serper service failed to start within 30 seconds"\n\ | |
| cleanup\n\ | |
| exit 1\n\ | |
| fi\n\ | |
| sleep 1\n\ | |
| done\n\ | |
| \n\ | |
| echo "๐ Starting Backend Service..."\n\ | |
| python3 -m uvicorn main:app --host 0.0.0.0 --port 7860 &\n\ | |
| BACKEND_PID=$!\n\ | |
| echo "โ Backend service started on port 7860 (PID: $BACKEND_PID)"\n\ | |
| \n\ | |
| # Monitor both services\n\ | |
| while true; do\n\ | |
| if ! kill -0 $SERPER_PID 2>/dev/null; then\n\ | |
| echo "โ Serper service died, restarting..."\n\ | |
| python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\ | |
| SERPER_PID=$!\n\ | |
| echo "โ Serper service restarted (PID: $SERPER_PID)"\n\ | |
| fi\n\ | |
| if ! kill -0 $BACKEND_PID 2>/dev/null; then\n\ | |
| echo "โ Backend service died, exiting..."\n\ | |
| cleanup\n\ | |
| exit 1\n\ | |
| fi\n\ | |
| sleep 5\n\ | |
| done' > /app/start.sh && \ | |
| chmod +x /app/start.sh && \ | |
| chown appuser:appuser /app/start.sh | |
| ENTRYPOINT ["/app/start.sh"] |