Spaces:
Running
Running
Use HF Serverless inference
Browse files- Dockerfile +13 -27
- auth-service.js +29 -0
- document-authoring.js +68 -56
- nginx.conf +6 -8
- package.json +6 -0
- service-config.yml +7 -8
- start-services.sh +5 -48
Dockerfile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
FROM
|
| 2 |
|
| 3 |
# Create non-root user
|
| 4 |
RUN useradd -m -u 1000 user
|
|
@@ -6,32 +6,17 @@ RUN useradd -m -u 1000 user
|
|
| 6 |
# Set environment variables
|
| 7 |
ENV HOME=/home/user \
|
| 8 |
PATH=/home/user/.local/bin:$PATH \
|
| 9 |
-
API_AUTH_TOKEN=secret \
|
| 10 |
JWT_ALGORITHM=RS256 \
|
|
|
|
| 11 |
DASHBOARD_USERNAME=dashboard \
|
| 12 |
DASHBOARD_PASSWORD=secret \
|
| 13 |
-
SECRET_KEY_BASE=secret-key-base
|
| 14 |
-
JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2gzhmJ9TDanEzWdP1WG+\n0Ecwbe7f3bv6e5UUpvcT5q68IQJKP47AQdBAnSlFVi4X9SaurbWoXdS6jpmPpk24\nQvitzLNFphHdwjFBelTAOa6taZrSusoFvrtK9x5xsW4zzt/bkpUraNx82Z8MwLwr\nt6HlY7dgO9+xBAabj4t1d2t+0HS8O/ed3CB6T2lj6S8AbLDSEFc9ScO6Uc1XJlSo\nrgyJJSPCpNhSq3AubEZ1wMS1iEtgAzTPRDsQv50qWIbn634HLWxTP/UH6YNJBwzt\n3O6q29kTtjXlMGXCvin37PyX4Jy1IiPFwJm45aWJGKSfVGMDojTJbuUtM+8P9Rrn\nAwIDAQAB\n-----END PUBLIC KEY-----"
|
| 15 |
-
|
| 16 |
# Install minimal dependencies
|
| 17 |
RUN apt-get update && apt-get install -y \
|
| 18 |
-
wget \
|
| 19 |
curl \
|
| 20 |
-
unzip \
|
| 21 |
-
clang \
|
| 22 |
-
cuda-toolkit \
|
| 23 |
nginx \
|
| 24 |
-
build-essential \
|
| 25 |
-
cmake \
|
| 26 |
-
git \
|
| 27 |
-
libcurl4-openssl-dev \
|
| 28 |
&& rm -rf /var/lib/apt/lists/*
|
| 29 |
|
| 30 |
-
# Copy llama.cpp server files from official image
|
| 31 |
-
COPY --from=ghcr.io/ggerganov/llama.cpp:server-cuda /app/llama-server $HOME/app/llama-server
|
| 32 |
-
COPY --from=ghcr.io/ggerganov/llama.cpp:server-cuda /app/*.so* $HOME/app/
|
| 33 |
-
RUN chmod +x $HOME/app/llama-server
|
| 34 |
-
|
| 35 |
# Install Node.js and pnpm
|
| 36 |
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
| 37 |
apt-get update && \
|
|
@@ -43,9 +28,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
|
| 43 |
RUN corepack enable && corepack prepare pnpm@latest --activate
|
| 44 |
|
| 45 |
# Create directories and set permissions
|
| 46 |
-
RUN mkdir -p /
|
| 47 |
-
mkdir -p $HOME/models && \
|
| 48 |
-
mkdir -p $HOME/app && \
|
| 49 |
mkdir -p $HOME/app/docauth && \
|
| 50 |
mkdir -p $HOME/app/aia && \
|
| 51 |
mkdir -p /var/cache/nginx && \
|
|
@@ -75,19 +58,22 @@ COPY --chown=user:user index.html $HOME/app/docauth/
|
|
| 75 |
COPY --chown=user:user document-authoring.js $HOME/app/docauth/
|
| 76 |
COPY --chown=user:user Sample.docx $HOME/app/docauth/
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
# Copy start script
|
| 79 |
COPY --chown=user:user start-services.sh $HOME/app/
|
| 80 |
RUN chmod +x $HOME/app/start-services.sh
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
# Switch to non-root user
|
| 83 |
USER user
|
| 84 |
WORKDIR $HOME/app
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
wget -q https://huggingface.co/leliuga/all-MiniLM-L6-v2-GGUF/resolve/main/all-MiniLM-L6-v2.F16.gguf -O $HOME/models/embeddings.gguf
|
| 89 |
-
|
| 90 |
-
# Expose (7860, for Hugging Face, 4000 for AI Assistant)
|
| 91 |
-
EXPOSE 7860
|
| 92 |
|
| 93 |
CMD ["./start-services.sh"]
|
|
|
|
| 1 |
+
FROM ubuntu:22.04
|
| 2 |
|
| 3 |
# Create non-root user
|
| 4 |
RUN useradd -m -u 1000 user
|
|
|
|
| 6 |
# Set environment variables
|
| 7 |
ENV HOME=/home/user \
|
| 8 |
PATH=/home/user/.local/bin:$PATH \
|
|
|
|
| 9 |
JWT_ALGORITHM=RS256 \
|
| 10 |
+
API_AUTH_TOKEN=secret \
|
| 11 |
DASHBOARD_USERNAME=dashboard \
|
| 12 |
DASHBOARD_PASSWORD=secret \
|
| 13 |
+
SECRET_KEY_BASE=secret-key-base
|
|
|
|
|
|
|
| 14 |
# Install minimal dependencies
|
| 15 |
RUN apt-get update && apt-get install -y \
|
|
|
|
| 16 |
curl \
|
|
|
|
|
|
|
|
|
|
| 17 |
nginx \
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
&& rm -rf /var/lib/apt/lists/*
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Install Node.js and pnpm
|
| 21 |
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
| 22 |
apt-get update && \
|
|
|
|
| 28 |
RUN corepack enable && corepack prepare pnpm@latest --activate
|
| 29 |
|
| 30 |
# Create directories and set permissions
|
| 31 |
+
RUN mkdir -p $HOME/app && \
|
|
|
|
|
|
|
| 32 |
mkdir -p $HOME/app/docauth && \
|
| 33 |
mkdir -p $HOME/app/aia && \
|
| 34 |
mkdir -p /var/cache/nginx && \
|
|
|
|
| 58 |
COPY --chown=user:user document-authoring.js $HOME/app/docauth/
|
| 59 |
COPY --chown=user:user Sample.docx $HOME/app/docauth/
|
| 60 |
|
| 61 |
+
# Copy auth service files
|
| 62 |
+
COPY --chown=user:user auth-service.js $HOME/app/auth/
|
| 63 |
+
COPY --chown=user:user package.json $HOME/app/auth/
|
| 64 |
+
|
| 65 |
# Copy start script
|
| 66 |
COPY --chown=user:user start-services.sh $HOME/app/
|
| 67 |
RUN chmod +x $HOME/app/start-services.sh
|
| 68 |
|
| 69 |
+
# Install auth service dependencies
|
| 70 |
+
RUN cd $HOME/app/auth && pnpm install
|
| 71 |
+
|
| 72 |
# Switch to non-root user
|
| 73 |
USER user
|
| 74 |
WORKDIR $HOME/app
|
| 75 |
|
| 76 |
+
# Expose port 4000 for AI Assistant
|
| 77 |
+
EXPOSE 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
CMD ["./start-services.sh"]
|
auth-service.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const express = require('express');
|
| 2 |
+
const jwt = require('jsonwebtoken');
|
| 3 |
+
|
| 4 |
+
const app = express();
|
| 5 |
+
const port = 4001;
|
| 6 |
+
|
| 7 |
+
const privateKey = process.env.JWT_PRIVATE_KEY;
|
| 8 |
+
if (!privateKey) {
|
| 9 |
+
console.error('JWT_PRIVATE_KEY environment variable is required');
|
| 10 |
+
process.exit(1);
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
app.get('/auth-token', (req, res) => {
|
| 14 |
+
try {
|
| 15 |
+
const token = jwt.sign({}, privateKey, {
|
| 16 |
+
algorithm: process.env.JWT_ALGORITHM || 'RS256',
|
| 17 |
+
expiresIn: '1h'
|
| 18 |
+
});
|
| 19 |
+
|
| 20 |
+
res.json({ token });
|
| 21 |
+
} catch (error) {
|
| 22 |
+
console.error('Error generating token:', error);
|
| 23 |
+
res.status(500).json({ error: 'Failed to generate token' });
|
| 24 |
+
}
|
| 25 |
+
});
|
| 26 |
+
|
| 27 |
+
app.listen(port, () => {
|
| 28 |
+
console.log(`Auth service listening on port ${port}`);
|
| 29 |
+
});
|
document-authoring.js
CHANGED
|
@@ -5,61 +5,73 @@ const app = document.getElementById('app');
|
|
| 5 |
let retryCount = 0;
|
| 6 |
const MAX_RETRIES = 300; // 600 seconds / 2 second interval = 300 attempts
|
| 7 |
|
| 8 |
-
function
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
.
|
| 39 |
-
|
|
|
|
| 40 |
const statusIndicator = document.getElementById('statusIndicator');
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if (statusIndicator) {
|
| 42 |
-
|
| 43 |
-
statusIndicator.innerHTML = '❌ Failed to initialize AI services. Try restarting the space.';
|
| 44 |
-
statusIndicator.style.color = '#dc3545';
|
| 45 |
-
clearInterval(statusInterval);
|
| 46 |
-
} else {
|
| 47 |
-
statusIndicator.innerHTML = `<span class="spinner"></span> Initializing AI services...`;
|
| 48 |
-
}
|
| 49 |
}
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
|
| 55 |
-
//
|
| 56 |
-
|
| 57 |
-
if (window.servicesReady || retryCount >= MAX_RETRIES) {
|
| 58 |
-
clearInterval(statusInterval);
|
| 59 |
-
} else {
|
| 60 |
-
checkServicesStatus();
|
| 61 |
-
}
|
| 62 |
-
}, 2000);
|
| 63 |
|
| 64 |
// Load Document Authoring SDK.
|
| 65 |
const script = document.createElement('script');
|
|
@@ -122,15 +134,17 @@ script.onload = async () => {
|
|
| 122 |
|
| 123 |
async function translate(content, targetLang, sourceLang = 'English') {
|
| 124 |
try {
|
|
|
|
| 125 |
const response = await fetch('/inference/api/v1/chat/completions', {
|
| 126 |
method: 'POST',
|
| 127 |
headers: {
|
| 128 |
'Content-Type': 'application/json',
|
|
|
|
| 129 |
},
|
| 130 |
body: JSON.stringify({
|
| 131 |
messages: [
|
| 132 |
{
|
| 133 |
-
role: "
|
| 134 |
content: `CRITICAL INSTRUCTION: The word "Nutrient" is a company name and must stay EXACTLY as "Nutrient" in the translation - never translate it to "Nutriente" or any other word.
|
| 135 |
|
| 136 |
Translate the following text from ${sourceLang} to ${targetLang}.
|
|
@@ -143,11 +157,9 @@ Additional rules:
|
|
| 143 |
|
| 144 |
Example:
|
| 145 |
EN: "Companies use Nutrient to..."
|
| 146 |
-
${targetLang}: "Las empresas usan Nutrient para..."
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
role: "user",
|
| 150 |
-
content: content
|
| 151 |
}
|
| 152 |
],
|
| 153 |
model: "gemma-2b",
|
|
|
|
| 5 |
let retryCount = 0;
|
| 6 |
const MAX_RETRIES = 300; // 600 seconds / 2 second interval = 300 attempts
|
| 7 |
|
| 8 |
+
// Add function to get JWT token
|
| 9 |
+
async function getAuthToken() {
|
| 10 |
+
try {
|
| 11 |
+
const response = await fetch('/api/auth-token');
|
| 12 |
+
if (!response.ok) {
|
| 13 |
+
throw new Error('Failed to fetch auth token');
|
| 14 |
+
}
|
| 15 |
+
const { token } = await response.json();
|
| 16 |
+
return token;
|
| 17 |
+
} catch (error) {
|
| 18 |
+
console.error('Error getting auth token:', error);
|
| 19 |
+
throw error;
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
async function checkServicesStatus() {
|
| 24 |
+
try {
|
| 25 |
+
const token = await getAuthToken();
|
| 26 |
+
const response = await fetch('/inference/api/v1/chat/completions', {
|
| 27 |
+
method: 'POST',
|
| 28 |
+
headers: {
|
| 29 |
+
'Content-Type': 'application/json',
|
| 30 |
+
'Authorization': `Token token=${token}`
|
| 31 |
+
},
|
| 32 |
+
body: JSON.stringify({
|
| 33 |
+
model: 'gemma-2b',
|
| 34 |
+
messages: [{role: 'user', content: 'hi'}]
|
| 35 |
+
})
|
| 36 |
+
});
|
| 37 |
+
|
| 38 |
+
if (response.ok) {
|
| 39 |
+
window.servicesReady = true;
|
| 40 |
+
const translationControls = document.getElementById('translationControls');
|
| 41 |
const statusIndicator = document.getElementById('statusIndicator');
|
| 42 |
+
const loadingOverlay = document.getElementById('loadingOverlay');
|
| 43 |
+
if (translationControls) {
|
| 44 |
+
translationControls.style.display = 'block';
|
| 45 |
+
}
|
| 46 |
if (statusIndicator) {
|
| 47 |
+
statusIndicator.style.display = 'none';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
+
if (loadingOverlay) {
|
| 50 |
+
loadingOverlay.classList.add('hidden');
|
| 51 |
+
}
|
| 52 |
+
return true;
|
| 53 |
+
}
|
| 54 |
+
throw new Error('Services not ready');
|
| 55 |
+
} catch (error) {
|
| 56 |
+
retryCount++;
|
| 57 |
+
const statusIndicator = document.getElementById('statusIndicator');
|
| 58 |
+
if (statusIndicator) {
|
| 59 |
+
if (retryCount >= MAX_RETRIES) {
|
| 60 |
+
statusIndicator.innerHTML = '❌ Failed to initialize AI services. Try restarting the space.';
|
| 61 |
+
statusIndicator.style.color = '#dc3545';
|
| 62 |
+
} else {
|
| 63 |
+
statusIndicator.innerHTML = `<span class="spinner"></span> Initializing AI services...`;
|
| 64 |
+
// Schedule next check only if we haven't exceeded retries
|
| 65 |
+
setTimeout(checkServicesStatus, 2000);
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
console.log('Waiting for services...', error);
|
| 69 |
+
return false;
|
| 70 |
+
}
|
| 71 |
}
|
| 72 |
|
| 73 |
+
// Start the first check
|
| 74 |
+
checkServicesStatus();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
// Load Document Authoring SDK.
|
| 77 |
const script = document.createElement('script');
|
|
|
|
| 134 |
|
| 135 |
async function translate(content, targetLang, sourceLang = 'English') {
|
| 136 |
try {
|
| 137 |
+
const token = await getAuthToken();
|
| 138 |
const response = await fetch('/inference/api/v1/chat/completions', {
|
| 139 |
method: 'POST',
|
| 140 |
headers: {
|
| 141 |
'Content-Type': 'application/json',
|
| 142 |
+
'Authorization': `Token token=${token}`
|
| 143 |
},
|
| 144 |
body: JSON.stringify({
|
| 145 |
messages: [
|
| 146 |
{
|
| 147 |
+
role: "user",
|
| 148 |
content: `CRITICAL INSTRUCTION: The word "Nutrient" is a company name and must stay EXACTLY as "Nutrient" in the translation - never translate it to "Nutriente" or any other word.
|
| 149 |
|
| 150 |
Translate the following text from ${sourceLang} to ${targetLang}.
|
|
|
|
| 157 |
|
| 158 |
Example:
|
| 159 |
EN: "Companies use Nutrient to..."
|
| 160 |
+
${targetLang}: "Las empresas usan Nutrient para..."
|
| 161 |
+
|
| 162 |
+
${content}`
|
|
|
|
|
|
|
| 163 |
}
|
| 164 |
],
|
| 165 |
model: "gemma-2b",
|
nginx.conf
CHANGED
|
@@ -19,17 +19,15 @@ http {
|
|
| 19 |
proxy_pass http://127.0.0.1:4000;
|
| 20 |
}
|
| 21 |
|
| 22 |
-
location /v1/embeddings {
|
| 23 |
-
proxy_pass http://127.0.0.1:8081;
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
location /v1 {
|
| 27 |
-
proxy_pass http://127.0.0.1:8082;
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
location /api/license-key {
|
| 31 |
default_type application/json;
|
| 32 |
return 200 '{"licenseKey": "$DOCAUTH_LICENSE_KEY"}';
|
| 33 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
}
|
|
|
|
| 19 |
proxy_pass http://127.0.0.1:4000;
|
| 20 |
}
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
location /api/license-key {
|
| 23 |
default_type application/json;
|
| 24 |
return 200 '{"licenseKey": "$DOCAUTH_LICENSE_KEY"}';
|
| 25 |
}
|
| 26 |
+
|
| 27 |
+
location /api/auth-token {
|
| 28 |
+
proxy_pass http://localhost:4001/auth-token;
|
| 29 |
+
proxy_set_header Host $host;
|
| 30 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 31 |
+
}
|
| 32 |
}
|
| 33 |
}
|
package.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dependencies": {
|
| 3 |
+
"express": "^4.18.2",
|
| 4 |
+
"jsonwebtoken": "^9.0.2"
|
| 5 |
+
}
|
| 6 |
+
}
|
service-config.yml
CHANGED
|
@@ -3,18 +3,17 @@ version: '1'
|
|
| 3 |
aiServices:
|
| 4 |
chat:
|
| 5 |
provider:
|
| 6 |
-
name: 'openai
|
| 7 |
-
|
| 8 |
-
model: 'gemma-2b'
|
| 9 |
textEmbeddings:
|
| 10 |
provider:
|
| 11 |
-
name: 'openai
|
| 12 |
-
|
| 13 |
-
model: 'all-MiniLM-L6-v2'
|
| 14 |
inference:
|
| 15 |
- provider:
|
| 16 |
name: 'openai-compat'
|
| 17 |
-
baseUrl:
|
| 18 |
model:
|
| 19 |
-
name: 'gemma-2b'
|
| 20 |
id: 'gemma-2b'
|
|
|
|
|
|
| 3 |
aiServices:
|
| 4 |
chat:
|
| 5 |
provider:
|
| 6 |
+
name: 'openai'
|
| 7 |
+
model: 'gpt-4o'
|
|
|
|
| 8 |
textEmbeddings:
|
| 9 |
provider:
|
| 10 |
+
name: 'openai'
|
| 11 |
+
model: 'text-embedding-3-small'
|
|
|
|
| 12 |
inference:
|
| 13 |
- provider:
|
| 14 |
name: 'openai-compat'
|
| 15 |
+
baseUrl: https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1
|
| 16 |
model:
|
| 17 |
+
name: 'google/gemma-2-2b-it'
|
| 18 |
id: 'gemma-2b'
|
| 19 |
+
topP: 0.9
|
start-services.sh
CHANGED
|
@@ -1,15 +1,6 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
set -e
|
| 3 |
|
| 4 |
-
# Check GPU status and compute capability
|
| 5 |
-
echo "Checking GPU status..."
|
| 6 |
-
nvidia-smi || echo "Warning: nvidia-smi failed. GPU might not be available"
|
| 7 |
-
echo "GPU Compute Capability:"
|
| 8 |
-
nvidia-smi --query-gpu=compute_cap --format=csv,noheader || echo "Warning: Could not get compute capability"
|
| 9 |
-
|
| 10 |
-
# Create temporary directory for llamafiler
|
| 11 |
-
mkdir -p /tmp/llamafiler
|
| 12 |
-
|
| 13 |
# Start nginx
|
| 14 |
echo "Starting nginx..."
|
| 15 |
/usr/sbin/nginx -c /etc/nginx/nginx.conf
|
|
@@ -25,45 +16,11 @@ if ! ps aux | grep nginx | grep -v grep > /dev/null; then
|
|
| 25 |
fi
|
| 26 |
echo "Nginx started successfully"
|
| 27 |
|
| 28 |
-
# Start
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
GEMMA_PID=$!
|
| 32 |
-
|
| 33 |
-
TMPDIR=/tmp/llamacpp ./llama-server --embedding -m $HOME/models/embeddings.gguf -ngl 999 --host 0.0.0.0 --port 8081 &
|
| 34 |
-
EMBEDDINGS_PID=$!
|
| 35 |
-
|
| 36 |
-
# Wait for models to be ready
|
| 37 |
-
echo "Waiting for models to be ready..."
|
| 38 |
-
START_TIME=$SECONDS
|
| 39 |
-
TIMEOUT=600 # 10 minutes
|
| 40 |
-
|
| 41 |
-
wait_for_models() {
|
| 42 |
-
CHAT_HEALTH=$(curl -s http://127.0.0.1:8082/health)
|
| 43 |
-
EMBED_HEALTH=$(curl -s http://127.0.0.1:8081/health)
|
| 44 |
-
|
| 45 |
-
[[ "$CHAT_HEALTH" == *"\"status\":\"ok\""* ]] && [[ "$EMBED_HEALTH" == *"\"status\":\"ok\""* ]]
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
until wait_for_models; do
|
| 49 |
-
ELAPSED=$((SECONDS - START_TIME))
|
| 50 |
-
if [ $ELAPSED -gt $TIMEOUT ]; then
|
| 51 |
-
echo "Timeout after ${TIMEOUT} seconds"
|
| 52 |
-
exit 1
|
| 53 |
-
fi
|
| 54 |
-
if ! kill -0 $GEMMA_PID 2>/dev/null || ! kill -0 $EMBEDDINGS_PID 2>/dev/null; then
|
| 55 |
-
echo "Model process died"
|
| 56 |
-
exit 1
|
| 57 |
-
fi
|
| 58 |
-
echo "Waiting for models... (${ELAPSED}s elapsed)"
|
| 59 |
-
sleep 2
|
| 60 |
-
done
|
| 61 |
|
| 62 |
# Start AI Assistant
|
| 63 |
-
echo "
|
| 64 |
cd $HOME/app/aia
|
| 65 |
-
PORT=4000 node app/main.bundle.js
|
| 66 |
-
AIA_PID=$!
|
| 67 |
-
|
| 68 |
-
# Keep container running
|
| 69 |
-
wait $GEMMA_PID
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
set -e
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# Start nginx
|
| 5 |
echo "Starting nginx..."
|
| 6 |
/usr/sbin/nginx -c /etc/nginx/nginx.conf
|
|
|
|
| 16 |
fi
|
| 17 |
echo "Nginx started successfully"
|
| 18 |
|
| 19 |
+
# Start auth service
|
| 20 |
+
cd $HOME/app/auth
|
| 21 |
+
node auth-service.js &
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Start AI Assistant
|
| 24 |
+
echo "Starting AI Assistant..."
|
| 25 |
cd $HOME/app/aia
|
| 26 |
+
PORT=4000 node app/main.bundle.js
|
|
|
|
|
|
|
|
|
|
|
|