File size: 8,050 Bytes
790fb60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# syntax=docker/dockerfile:1.4
FROM python:3.10-slim AS source

ARG HF_API_TOKEN
ARG SRC_URL

# Ensure git and certificates are available for cloning
RUN apt-get update && apt-get install -y --no-install-recommends \
    git ca-certificates && rm -rf /var/lib/apt/lists/*

# Clone the repository once in its own stage. Files will be moved to /repo
# Use a shallow clone to reduce time and bandwidth and make caching more stable
# This RUN attempts to read a BuildKit secret at /run/secrets/HF_API_TOKEN, and
# falls back to the HF_API_TOKEN environment variable if present. It fails early
# with a clear message when no token is provided.
RUN --mount=type=secret,id=HF_API_TOKEN,required=false --mount=type=secret,id=SRC_URL,required=false sh -c '\
    if [ -f /run/secrets/HF_API_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_API_TOKEN); \
    elif [ -f /run/secrets/HF_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_TOKEN); \
    elif [ -n "$HF_API_TOKEN" ]; then TOKEN=$HF_API_TOKEN; \
    elif [ -n "$HF_TOKEN" ]; then TOKEN=$HF_TOKEN; \
    else echo "ERROR: HF token not provided (set BuildKit secret HF_API_TOKEN/HF_TOKEN or HF_API_TOKEN/HF_TOKEN env)"; exit 1; fi && \
    # Attempt to clone directly into /repo. If the remote creates a single top-level
    # directory, detect that and move its contents into /repo so /repo/frontend exists.
    mkdir -p /repo && \
    # Determine source URL: secret at /run/secrets/SRC_URL > ARG SRC_URL
    if [ -f /run/secrets/SRC_URL ]; then SRC=$(cat /run/secrets/SRC_URL); \
    elif [ -n "$SRC_URL" ]; then SRC=$SRC_URL; \
    else echo "ERROR: SRC_URL not provided (set BuildKit secret SRC_URL or build-arg SRC_URL)"; exit 1; fi && \
    echo "Cloning from $SRC" && \
    # Normalize SRC: remove leading http(s):// if present, then insert token credentials
    if echo "$SRC" | grep -qE '^https?://'; then \
        NO_SCHEME=$(echo "$SRC" | sed -E 's#^https?://##'); \
    else \
        NO_SCHEME="$SRC"; \
    fi && \
    CLONE_URL="https://__token__:$TOKEN@$NO_SCHEME" && \
    git clone --depth 1 "$CLONE_URL" /repo_tmp && \
    echo "--- Debug: listing /repo_tmp (show hidden and nested) ---" && \
    ls -la /repo_tmp || true && \
    # If repo_tmp contains exactly one directory and no other files, move its contents up
    set -- /repo_tmp/*; count=$#; if [ $count -eq 1 ] && [ -d "$1" ]; then \
        echo "--- Single top-level dir detected: moving its contents into /repo ---" && \
        mv "$1"/* "$1"/.??* /repo/ 2>/dev/null || true; \
    else \
        echo "--- Multiple entries detected: moving all into /repo ---" && \
        mv /repo_tmp/* /repo/ 2>/dev/null || true; \
        mv /repo_tmp/.[!.]* /repo/ 2>/dev/null || true; \
    fi && \
    rm -rf /repo_tmp/.git && rm -rf /repo_tmp'

# Verify the clone succeeded and /repo contains files; fail early with a helpful message
RUN [ -d /repo ] && [ "$(ls -A /repo | wc -c)" -gt 0 ] || (echo "ERROR: clone failed or /repo is empty" && exit 1)
    
# --- Stage 1: Build React frontend ---
FROM node:20-alpine AS frontend

WORKDIR /app/frontend

# Install dependencies (copied from the cloned source stage)
COPY --from=source /repo/frontend/package*.json ./
COPY --from=source /repo/frontend/package-lock.json ./
RUN npm install --frozen-lockfile

# Build frontend (source files copied from the cloned source stage)
COPY --from=source /repo/frontend/ ./
RUN npm run build

# --- Stage 2: Python backend (CPU only) ---
FROM python:3.10-slim AS backend

# Environment setup
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    HF_HOME=/app/.cache/huggingface

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git curl && \
    rm -rf /var/lib/apt/lists/*

# Create non-root user
RUN useradd -m appuser

# Create necessary directories and set permissions
RUN mkdir -p /app/.cache/huggingface \
    && mkdir -p /app/static \
    && chown -R appuser:appuser /app

# Switch to non-root user
USER appuser

WORKDIR /app

# Upgrade pip and install Python dependencies
COPY --from=source /repo/backend/requirements.txt /app/backend/requirements.txt
RUN python3 -m pip install --upgrade pip && \
    python3 -m pip install -r /app/backend/requirements.txt

# Copy backend code
COPY --from=source /repo/backend/ /app/backend/

# Fathom-Search-4B files are now part of the backend app directory

# Copy frontend build to static directory
COPY --from=frontend /app/frontend/out/ /app/static/

# App-specific environment variables
ENV STATIC_DIR=/app/static \
    MODEL_ID=FractalAIResearch/Fathom-R1-14B \
    PIPELINE_TASK=text-generation \
    QUANTIZE=auto \
    PORT_SERPER_HOST=2221 \
    HOST_SERPER_URL=http://0.0.0.0:2221 \
    SERPER_URL=http://0.0.0.0:2221 \
    PYTHONPATH=/app/backend/app:/app/backend \
    MAX_OUTBOUND=256 \
    JINA_CACHE_DIR=/app/.cache/jina_cache \
    SERPER_CACHE_DIR=/app/.cache/serper_cache \
    BOXED_WRAP_WIDTH=130 \
    CRAWL4AI_EP=http://localhost:8080 \
    CURL_CA_BUNDLE="" \
    REQUESTS_CA_BUNDLE="" \
    SSL_VERIFY=false

# Create cache directories
RUN mkdir -p /app/.cache/jina_cache /app/.cache/serper_cache && \
    chown -R appuser:appuser /app/.cache

# Optional: Healthcheck endpoint - check both services
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
  CMD curl -f http://localhost:7860/docs && curl -f http://localhost:2221/health || exit 1

EXPOSE 7860 2221

# Create startup script with proper service management
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# Cleanup function\n\
cleanup() {\n\
    echo "๐Ÿ›‘ Shutting down services..."\n\
    if [ ! -z "$SERPER_PID" ] && kill -0 $SERPER_PID 2>/dev/null; then\n\
        kill $SERPER_PID\n\
        echo "โœ… Serper service stopped"\n\
    fi\n\
    if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then\n\
        kill $BACKEND_PID\n\
        echo "โœ… Backend service stopped"\n\
    fi\n\
    exit 0\n\
}\n\
\n\
# Set up signal handlers\n\
trap cleanup SIGTERM SIGINT\n\
\n\
echo "๐Ÿš€ Starting FathomPlayground on Hugging Face Spaces"\n\
echo "โœ… Environment variables configured:"\n\
echo "   HF_MODEL_URL: configured"\n\
echo "   HOST_SERPER_URL: configured"\n\
echo "   PORT_SERPER_HOST: configured"\n\
echo "   HF_API_TOKEN: SET"\n\
echo "   SERPER_API_KEY: SET"\n\
echo "   OPENAI_API_KEY: SET"\n\
echo "   HF_TOKEN: SET"\n\
echo "   SUMMARY_HF_MODEL_URL: configured"\n\
echo "   CRAWL4AI_EP: configured"\n\
echo "   JINA_API_KEY: SET"\n\
echo "   JINA_CACHE_DIR: configured"\n\
echo "   SERPER_CACHE_DIR: configured"\n\
\n\
echo "๐Ÿ” Starting Serper Host Server..."\n\
cd /app/backend/app\n\
python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
SERPER_PID=$!\n\
echo "โœ… Serper service started"\n\
\n\
# Wait for Serper service to be ready\n\
echo "โณ Waiting for Serper service to be ready..."\n\
for i in {1..30}; do\n\
    if curl -s http://localhost:2221/health > /dev/null 2>&1; then\n\
        echo "โœ… Serper service is ready"\n\
        break\n\
    fi\n\
    if [ $i -eq 30 ]; then\n\
        echo "โŒ Serper service failed to start within 30 seconds"\n\
        cleanup\n\
        exit 1\n\
    fi\n\
    sleep 1\n\
done\n\
\n\
echo "๐Ÿš€ Starting Backend Service..."\n\
python3 -m uvicorn main:app --host 0.0.0.0 --port 7860 &\n\
BACKEND_PID=$!\n\
echo "โœ… Backend service started on port 7860 (PID: $BACKEND_PID)"\n\
\n\
# Monitor both services\n\
while true; do\n\
    if ! kill -0 $SERPER_PID 2>/dev/null; then\n\
        echo "โŒ Serper service died, restarting..."\n\
        python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
        SERPER_PID=$!\n\
        echo "โœ… Serper service restarted (PID: $SERPER_PID)"\n\
    fi\n\
    if ! kill -0 $BACKEND_PID 2>/dev/null; then\n\
        echo "โŒ Backend service died, exiting..."\n\
        cleanup\n\
        exit 1\n\
    fi\n\
    sleep 5\n\
done' > /app/start.sh && \
chmod +x /app/start.sh && \
chown appuser:appuser /app/start.sh

ENTRYPOINT ["/app/start.sh"]