Spaces:

dmartincy
/

document-translation

Running

App Files Files Community

dmartincy commited on Feb 2

Commit

b778f8d

1 Parent(s): 39cd3c4

Use HF Serverless inference

Browse files

Files changed (7) hide show

Dockerfile +13 -27
auth-service.js +29 -0
document-authoring.js +68 -56
nginx.conf +6 -8
package.json +6 -0
service-config.yml +7 -8
start-services.sh +5 -48

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.6.3-devel-ubuntu22.04
 # Create non-root user
 RUN useradd -m -u 1000 user
@@ -6,32 +6,17 @@ RUN useradd -m -u 1000 user
 # Set environment variables
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
-    API_AUTH_TOKEN=secret \
     JWT_ALGORITHM=RS256 \
     DASHBOARD_USERNAME=dashboard \
     DASHBOARD_PASSWORD=secret \
-    SECRET_KEY_BASE=secret-key-base \
-    JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2gzhmJ9TDanEzWdP1WG+\n0Ecwbe7f3bv6e5UUpvcT5q68IQJKP47AQdBAnSlFVi4X9SaurbWoXdS6jpmPpk24\nQvitzLNFphHdwjFBelTAOa6taZrSusoFvrtK9x5xsW4zzt/bkpUraNx82Z8MwLwr\nt6HlY7dgO9+xBAabj4t1d2t+0HS8O/ed3CB6T2lj6S8AbLDSEFc9ScO6Uc1XJlSo\nrgyJJSPCpNhSq3AubEZ1wMS1iEtgAzTPRDsQv50qWIbn634HLWxTP/UH6YNJBwzt\n3O6q29kTtjXlMGXCvin37PyX4Jy1IiPFwJm45aWJGKSfVGMDojTJbuUtM+8P9Rrn\nAwIDAQAB\n-----END PUBLIC KEY-----"
 # Install minimal dependencies
 RUN apt-get update && apt-get install -y \
-    wget \
     curl \
-    unzip \
-    clang \
-    cuda-toolkit \
     nginx \
-    build-essential \
-    cmake \
-    git \
-    libcurl4-openssl-dev \
     && rm -rf /var/lib/apt/lists/*
-# Copy llama.cpp server files from official image
-COPY --from=ghcr.io/ggerganov/llama.cpp:server-cuda /app/llama-server $HOME/app/llama-server
-COPY --from=ghcr.io/ggerganov/llama.cpp:server-cuda /app/*.so* $HOME/app/
-RUN chmod +x $HOME/app/llama-server
 # Install Node.js and pnpm
 RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
     apt-get update && \
@@ -43,9 +28,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
 RUN corepack enable && corepack prepare pnpm@latest --activate
 # Create directories and set permissions
-RUN mkdir -p /tmp/llamacpp && \
-    mkdir -p $HOME/models && \
-    mkdir -p $HOME/app && \
     mkdir -p $HOME/app/docauth && \
     mkdir -p $HOME/app/aia && \
     mkdir -p /var/cache/nginx && \
@@ -75,19 +58,22 @@ COPY --chown=user:user index.html $HOME/app/docauth/
 COPY --chown=user:user document-authoring.js $HOME/app/docauth/
 COPY --chown=user:user Sample.docx $HOME/app/docauth/
 # Copy start script
 COPY --chown=user:user start-services.sh $HOME/app/
 RUN chmod +x $HOME/app/start-services.sh
 # Switch to non-root user
 USER user
 WORKDIR $HOME/app
-# Download models
-RUN wget -q https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q8_0.gguf -O $HOME/models/gemma-2b.gguf && \
-    wget -q https://huggingface.co/leliuga/all-MiniLM-L6-v2-GGUF/resolve/main/all-MiniLM-L6-v2.F16.gguf -O $HOME/models/embeddings.gguf
-# Expose (7860, for Hugging Face, 4000 for AI Assistant)
-EXPOSE 7860
 CMD ["./start-services.sh"]

+FROM ubuntu:22.04
 # Create non-root user
 RUN useradd -m -u 1000 user
 # Set environment variables
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     JWT_ALGORITHM=RS256 \
+    API_AUTH_TOKEN=secret \
     DASHBOARD_USERNAME=dashboard \
     DASHBOARD_PASSWORD=secret \
+    SECRET_KEY_BASE=secret-key-base
 # Install minimal dependencies
 RUN apt-get update && apt-get install -y \
     curl \
     nginx \
     && rm -rf /var/lib/apt/lists/*
 # Install Node.js and pnpm
 RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
     apt-get update && \
 RUN corepack enable && corepack prepare pnpm@latest --activate
 # Create directories and set permissions
+RUN mkdir -p $HOME/app && \
     mkdir -p $HOME/app/docauth && \
     mkdir -p $HOME/app/aia && \
     mkdir -p /var/cache/nginx && \
 COPY --chown=user:user document-authoring.js $HOME/app/docauth/
 COPY --chown=user:user Sample.docx $HOME/app/docauth/
+# Copy auth service files
+COPY --chown=user:user auth-service.js $HOME/app/auth/
+COPY --chown=user:user package.json $HOME/app/auth/
 # Copy start script
 COPY --chown=user:user start-services.sh $HOME/app/
 RUN chmod +x $HOME/app/start-services.sh
+# Install auth service dependencies
+RUN cd $HOME/app/auth && pnpm install
 # Switch to non-root user
 USER user
 WORKDIR $HOME/app
+# Expose port 4000 for AI Assistant
+EXPOSE 4000
 CMD ["./start-services.sh"]

auth-service.js ADDED Viewed

	@@ -0,0 +1,29 @@

+const express = require('express');
+const jwt = require('jsonwebtoken');
+const app = express();
+const port = 4001;
+const privateKey = process.env.JWT_PRIVATE_KEY;
+if (!privateKey) {
+  console.error('JWT_PRIVATE_KEY environment variable is required');
+  process.exit(1);
+}
+app.get('/auth-token', (req, res) => {
+  try {
+    const token = jwt.sign({}, privateKey, {
+      algorithm: process.env.JWT_ALGORITHM || 'RS256',
+      expiresIn: '1h'
+    });
+    res.json({ token });
+  } catch (error) {
+    console.error('Error generating token:', error);
+    res.status(500).json({ error: 'Failed to generate token' });
+  }
+});
+app.listen(port, () => {
+  console.log(`Auth service listening on port ${port}`);
+});

document-authoring.js CHANGED Viewed

@@ -5,61 +5,73 @@ const app = document.getElementById('app');
 let retryCount = 0;
 const MAX_RETRIES = 300; // 600 seconds / 2 second interval = 300 attempts
-function checkServicesStatus() {
-  fetch('/inference/api/v1/chat/completions', {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json'
-    },
-    body: JSON.stringify({
-      model: 'gemma-2b',
-      messages: [{role: 'user', content: 'hi'}]
-    })
-  })
-    .then(response => {
-      if (response.ok) {
-        window.servicesReady = true;
-        const translationControls = document.getElementById('translationControls');
-        const statusIndicator = document.getElementById('statusIndicator');
-        const loadingOverlay = document.getElementById('loadingOverlay');
-        if (translationControls) {
-          translationControls.style.display = 'block';
-        }
-        if (statusIndicator) {
-          statusIndicator.style.display = 'none';
-        }
-        if (loadingOverlay) {
-          loadingOverlay.classList.add('hidden');
-        }
-        return true;
-      }
-      throw new Error('Services not ready');
-    })
-    .catch(error => {
-      retryCount++;
       const statusIndicator = document.getElementById('statusIndicator');
       if (statusIndicator) {
-        if (retryCount >= MAX_RETRIES) {
-          statusIndicator.innerHTML = '❌ Failed to initialize AI services. Try restarting the space.';
-          statusIndicator.style.color = '#dc3545';
-          clearInterval(statusInterval);
-        } else {
-          statusIndicator.innerHTML = `<span class="spinner"></span> Initializing AI services...`;
-        }
       }
-      console.log('Waiting for services...', error);
-      return false;
-    });
 }
-// Check status every 2 seconds until ready or max retries reached.
-const statusInterval = setInterval(() => {
-  if (window.servicesReady || retryCount >= MAX_RETRIES) {
-    clearInterval(statusInterval);
-  } else {
-    checkServicesStatus();
-  }
-}, 2000);
 // Load Document Authoring SDK.
 const script = document.createElement('script');
@@ -122,15 +134,17 @@ script.onload = async () => {
   async function translate(content, targetLang, sourceLang = 'English') {
     try {
       const response = await fetch('/inference/api/v1/chat/completions', {
         method: 'POST',
         headers: {
           'Content-Type': 'application/json',
         },
         body: JSON.stringify({
           messages: [
             {
-              role: "system",
               content: `CRITICAL INSTRUCTION: The word "Nutrient" is a company name and must stay EXACTLY as "Nutrient" in the translation - never translate it to "Nutriente" or any other word.
 Translate the following text from ${sourceLang} to ${targetLang}.
@@ -143,11 +157,9 @@ Additional rules:
 Example:
 EN: "Companies use Nutrient to..."
-${targetLang}: "Las empresas usan Nutrient para..."`
-            },
-            {
-              role: "user",
-              content: content
             }
           ],
           model: "gemma-2b",

 let retryCount = 0;
 const MAX_RETRIES = 300; // 600 seconds / 2 second interval = 300 attempts
+// Add function to get JWT token
+async function getAuthToken() {
+  try {
+    const response = await fetch('/api/auth-token');
+    if (!response.ok) {
+      throw new Error('Failed to fetch auth token');
+    }
+    const { token } = await response.json();
+    return token;
+  } catch (error) {
+    console.error('Error getting auth token:', error);
+    throw error;
+  }
+}
+async function checkServicesStatus() {
+  try {
+    const token = await getAuthToken();
+    const response = await fetch('/inference/api/v1/chat/completions', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Token token=${token}`
+      },
+      body: JSON.stringify({
+        model: 'gemma-2b',
+        messages: [{role: 'user', content: 'hi'}]
+      })
+    });
+    if (response.ok) {
+      window.servicesReady = true;
+      const translationControls = document.getElementById('translationControls');
       const statusIndicator = document.getElementById('statusIndicator');
+      const loadingOverlay = document.getElementById('loadingOverlay');
+      if (translationControls) {
+        translationControls.style.display = 'block';
+      }
       if (statusIndicator) {
+        statusIndicator.style.display = 'none';
       }
+      if (loadingOverlay) {
+        loadingOverlay.classList.add('hidden');
+      }
+      return true;
+    }
+    throw new Error('Services not ready');
+  } catch (error) {
+    retryCount++;
+    const statusIndicator = document.getElementById('statusIndicator');
+    if (statusIndicator) {
+      if (retryCount >= MAX_RETRIES) {
+        statusIndicator.innerHTML = '❌ Failed to initialize AI services. Try restarting the space.';
+        statusIndicator.style.color = '#dc3545';
+      } else {
+        statusIndicator.innerHTML = `<span class="spinner"></span> Initializing AI services...`;
+        // Schedule next check only if we haven't exceeded retries
+        setTimeout(checkServicesStatus, 2000);
+      }
+    }
+    console.log('Waiting for services...', error);
+    return false;
+  }
 }
+// Start the first check
+checkServicesStatus();
 // Load Document Authoring SDK.
 const script = document.createElement('script');
   async function translate(content, targetLang, sourceLang = 'English') {
     try {
+      const token = await getAuthToken();
       const response = await fetch('/inference/api/v1/chat/completions', {
         method: 'POST',
         headers: {
           'Content-Type': 'application/json',
+          'Authorization': `Token token=${token}`
         },
         body: JSON.stringify({
           messages: [
             {
+              role: "user",
               content: `CRITICAL INSTRUCTION: The word "Nutrient" is a company name and must stay EXACTLY as "Nutrient" in the translation - never translate it to "Nutriente" or any other word.
 Translate the following text from ${sourceLang} to ${targetLang}.
 Example:
 EN: "Companies use Nutrient to..."
+${targetLang}: "Las empresas usan Nutrient para..."
+${content}`
             }
           ],
           model: "gemma-2b",

nginx.conf CHANGED Viewed

@@ -19,17 +19,15 @@ http {
              proxy_pass http://127.0.0.1:4000;
         }
-        location /v1/embeddings {
-            proxy_pass http://127.0.0.1:8081;
-        }
-        location /v1 {
-            proxy_pass http://127.0.0.1:8082;
-        }
         location /api/license-key {
             default_type application/json;
             return 200 '{"licenseKey": "$DOCAUTH_LICENSE_KEY"}';
         }
     }
 }

              proxy_pass http://127.0.0.1:4000;
         }
         location /api/license-key {
             default_type application/json;
             return 200 '{"licenseKey": "$DOCAUTH_LICENSE_KEY"}';
         }
+        location /api/auth-token {
+            proxy_pass http://localhost:4001/auth-token;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+        }
     }
 }

package.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "dependencies": {
+    "express": "^4.18.2",
+    "jsonwebtoken": "^9.0.2"
+  }
+}

service-config.yml CHANGED Viewed

@@ -3,18 +3,17 @@ version: '1'
 aiServices:
   chat:
     provider:
-      name: 'openai-compat'
-      baseUrl: http://127.0.0.1:7861/v1
-    model: 'gemma-2b'
   textEmbeddings:
     provider:
-      name: 'openai-compat'
-      baseUrl: http://127.0.0.1:7861/v1
-    model: 'all-MiniLM-L6-v2'
   inference:
     - provider:
         name: 'openai-compat'
-        baseUrl: http://127.0.0.1:7861/v1
       model:
-        name: 'gemma-2b'
         id: 'gemma-2b'

 aiServices:
   chat:
     provider:
+      name: 'openai'
+    model: 'gpt-4o'
   textEmbeddings:
     provider:
+      name: 'openai'
+    model: 'text-embedding-3-small'
   inference:
     - provider:
         name: 'openai-compat'
+        baseUrl: https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1
       model:
+        name: 'google/gemma-2-2b-it'
         id: 'gemma-2b'
+        topP: 0.9

start-services.sh CHANGED Viewed

@@ -1,15 +1,6 @@
 #!/bin/bash
 set -e
-# Check GPU status and compute capability
-echo "Checking GPU status..."
-nvidia-smi || echo "Warning: nvidia-smi failed. GPU might not be available"
-echo "GPU Compute Capability:"
-nvidia-smi --query-gpu=compute_cap --format=csv,noheader || echo "Warning: Could not get compute capability"
-# Create temporary directory for llamafiler
-mkdir -p /tmp/llamafiler
 # Start nginx
 echo "Starting nginx..."
 /usr/sbin/nginx -c /etc/nginx/nginx.conf
@@ -25,45 +16,11 @@ if ! ps aux | grep nginx | grep -v grep > /dev/null; then
 fi
 echo "Nginx started successfully"
-# Start the models
-echo "Starting models..."
-TMPDIR=/tmp/llamacpp ./llama-server -m $HOME/models/gemma-2b.gguf -ngl 999 --host 0.0.0.0 --port 8082 &
-GEMMA_PID=$!
-TMPDIR=/tmp/llamacpp ./llama-server --embedding -m $HOME/models/embeddings.gguf -ngl 999 --host 0.0.0.0 --port 8081 &
-EMBEDDINGS_PID=$!
-# Wait for models to be ready
-echo "Waiting for models to be ready..."
-START_TIME=$SECONDS
-TIMEOUT=600  # 10 minutes
-wait_for_models() {
-    CHAT_HEALTH=$(curl -s http://127.0.0.1:8082/health)
-    EMBED_HEALTH=$(curl -s http://127.0.0.1:8081/health)
-    [[ "$CHAT_HEALTH" == *"\"status\":\"ok\""* ]] && [[ "$EMBED_HEALTH" == *"\"status\":\"ok\""* ]]
-}
-until wait_for_models; do
-    ELAPSED=$((SECONDS - START_TIME))
-    if [ $ELAPSED -gt $TIMEOUT ]; then
-        echo "Timeout after ${TIMEOUT} seconds"
-        exit 1
-    fi
-    if ! kill -0 $GEMMA_PID 2>/dev/null || ! kill -0 $EMBEDDINGS_PID 2>/dev/null; then
-        echo "Model process died"
-        exit 1
-    fi
-    echo "Waiting for models... (${ELAPSED}s elapsed)"
-    sleep 2
-done
 # Start AI Assistant
-echo "Models ready after ${ELAPSED}s. Starting AI Assistant..."
 cd $HOME/app/aia
-PORT=4000 node app/main.bundle.js &
-AIA_PID=$!
-# Keep container running
-wait $GEMMA_PID

 #!/bin/bash
 set -e
 # Start nginx
 echo "Starting nginx..."
 /usr/sbin/nginx -c /etc/nginx/nginx.conf
 fi
 echo "Nginx started successfully"
+# Start auth service
+cd $HOME/app/auth
+node auth-service.js &
 # Start AI Assistant
+echo "Starting AI Assistant..."
 cd $HOME/app/aia
+PORT=4000 node app/main.bundle.js