Spaces:
Sleeping
Sleeping
Jordi Catafal
commited on
Commit
·
023e423
1
Parent(s):
03eefac
cleaning + readme
Browse files- README.md +259 -334
- app_endpoints.py +0 -308
- app_hybrid_backup.py +0 -189
- app_old.py +0 -159
- app_old_minimal.py +0 -165
- test_api.py +0 -64
- test_hybrid.py +0 -98
README.md
CHANGED
|
@@ -7,54 +7,67 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 11 |
-
|
| 12 |
-
--------------------------------
|
| 13 |
# Multilingual & Legal Embeddings API
|
| 14 |
|
| 15 |
-
A high-performance
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## 🚀 Quick Start
|
| 18 |
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
## 🔗 API Endpoints
|
| 34 |
|
| 35 |
-
###
|
| 36 |
-
```
|
| 37 |
-
POST /embed
|
| 38 |
-
```
|
| 39 |
-
Generate embeddings for up to 50 texts in a single request.
|
| 40 |
|
| 41 |
-
|
| 42 |
-
```
|
| 43 |
-
GET /models
|
| 44 |
-
```
|
| 45 |
-
Get detailed information about available models.
|
| 46 |
|
| 47 |
-
### Health Check
|
| 48 |
```
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
```
|
| 51 |
-
Check API status and model availability.
|
| 52 |
|
| 53 |
-
###
|
|
|
|
| 54 |
```
|
| 55 |
-
GET /
|
|
|
|
|
|
|
| 56 |
```
|
| 57 |
-
Basic API information and status.
|
| 58 |
|
| 59 |
## 📖 Usage Examples
|
| 60 |
|
|
@@ -62,164 +75,100 @@ Basic API information and status.
|
|
| 62 |
|
| 63 |
```python
|
| 64 |
import requests
|
| 65 |
-
import numpy as np
|
| 66 |
|
| 67 |
API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
|
| 68 |
|
| 69 |
-
# Example 1:
|
| 70 |
response = requests.post(
|
| 71 |
-
f"{API_URL}/embed",
|
| 72 |
json={
|
| 73 |
-
"texts": [
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
"normalize": True
|
| 76 |
}
|
| 77 |
)
|
| 78 |
-
|
| 79 |
result = response.json()
|
| 80 |
-
|
| 81 |
-
print(f"Generated {len(embeddings)} embeddings of {result['dimensions']} dimensions")
|
| 82 |
|
| 83 |
-
# Example 2:
|
| 84 |
-
|
| 85 |
-
f"{API_URL}/embed",
|
| 86 |
json={
|
| 87 |
"texts": [
|
| 88 |
-
"
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
-
"Hallo Welt" # German
|
| 92 |
],
|
| 93 |
-
"model": "jina-v3",
|
| 94 |
"normalize": True
|
| 95 |
}
|
| 96 |
)
|
| 97 |
-
|
|
|
|
| 98 |
|
| 99 |
-
# Example 3:
|
| 100 |
-
|
| 101 |
-
f"{API_URL}/embed",
|
| 102 |
json={
|
| 103 |
"texts": [
|
| 104 |
-
"
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
],
|
| 108 |
-
"model": "roberta-ca",
|
| 109 |
"normalize": True
|
| 110 |
}
|
| 111 |
)
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
-
# Example 4:
|
| 115 |
-
|
| 116 |
-
f"{API_URL}/embed",
|
| 117 |
json={
|
| 118 |
"texts": [
|
| 119 |
-
"
|
| 120 |
-
"
|
|
|
|
| 121 |
],
|
| 122 |
-
"model": "robertalex",
|
| 123 |
"normalize": True
|
| 124 |
}
|
| 125 |
)
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
# Example 5:
|
| 128 |
-
|
| 129 |
-
f"{API_URL}/embed",
|
| 130 |
json={
|
| 131 |
"texts": [
|
| 132 |
-
"
|
| 133 |
-
"
|
|
|
|
| 134 |
],
|
| 135 |
-
"model": "legal-bert",
|
| 136 |
"normalize": True
|
| 137 |
}
|
| 138 |
)
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
text_es = "inteligencia artificial"
|
| 142 |
-
text_ca = "intel·ligència artificial"
|
| 143 |
-
models_comparison = {}
|
| 144 |
-
|
| 145 |
-
for model, text in [("jina", text_es), ("roberta-ca", text_ca), ("jina-v3", text_es)]:
|
| 146 |
-
resp = requests.post(
|
| 147 |
-
f"{API_URL}/embed",
|
| 148 |
-
json={"texts": [text], "model": model, "normalize": True}
|
| 149 |
-
)
|
| 150 |
-
models_comparison[model] = resp.json()["dimensions"]
|
| 151 |
-
|
| 152 |
-
print("Embedding dimensions by model:", models_comparison)
|
| 153 |
-
```
|
| 154 |
-
|
| 155 |
-
### cURL
|
| 156 |
-
|
| 157 |
-
```bash
|
| 158 |
-
# Basic embedding generation with Jina v2 Spanish
|
| 159 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
| 160 |
-
-H "Content-Type: application/json" \
|
| 161 |
-
-d '{
|
| 162 |
-
"texts": ["Texto de ejemplo", "Otro texto en español"],
|
| 163 |
-
"model": "jina",
|
| 164 |
-
"normalize": true
|
| 165 |
-
}'
|
| 166 |
-
|
| 167 |
-
# Catalan text with RoBERTa-ca
|
| 168 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
| 169 |
-
-H "Content-Type: application/json" \
|
| 170 |
-
-d '{
|
| 171 |
-
"texts": ["Bon dia", "Com està vostè?", "Catalunya és meravellosa"],
|
| 172 |
-
"model": "roberta-ca",
|
| 173 |
-
"normalize": true
|
| 174 |
-
}'
|
| 175 |
-
|
| 176 |
-
# Using Jina v3 for multilingual embeddings
|
| 177 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
| 178 |
-
-H "Content-Type: application/json" \
|
| 179 |
-
-d '{
|
| 180 |
-
"texts": ["Hello world", "Hola mundo", "Bonjour le monde"],
|
| 181 |
-
"model": "jina-v3",
|
| 182 |
-
"normalize": true
|
| 183 |
-
}'
|
| 184 |
-
|
| 185 |
-
# English legal text with Legal-BERT
|
| 186 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
| 187 |
-
-H "Content-Type: application/json" \
|
| 188 |
-
-d '{
|
| 189 |
-
"texts": ["This agreement is legally binding"],
|
| 190 |
-
"model": "legal-bert",
|
| 191 |
-
"normalize": true
|
| 192 |
-
}'
|
| 193 |
-
|
| 194 |
-
# Spanish legal text with RoBERTalex
|
| 195 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
| 196 |
-
-H "Content-Type: application/json" \
|
| 197 |
-
-d '{
|
| 198 |
-
"texts": ["Artículo primero de la constitución"],
|
| 199 |
-
"model": "robertalex",
|
| 200 |
-
"normalize": true,
|
| 201 |
-
"max_length": 512
|
| 202 |
-
}'
|
| 203 |
-
|
| 204 |
-
# Get all model information
|
| 205 |
-
curl "https://aurasystems-spanish-embeddings-api.hf.space/models"
|
| 206 |
```
|
| 207 |
|
| 208 |
-
### JavaScript/
|
| 209 |
|
| 210 |
```javascript
|
| 211 |
const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
|
| 212 |
|
| 213 |
-
//
|
| 214 |
-
async function getEmbeddings(
|
| 215 |
-
const response = await fetch(`${API_URL}/embed`, {
|
| 216 |
method: 'POST',
|
| 217 |
headers: {
|
| 218 |
'Content-Type': 'application/json',
|
| 219 |
},
|
| 220 |
body: JSON.stringify({
|
| 221 |
texts: texts,
|
| 222 |
-
model: model,
|
| 223 |
normalize: true
|
| 224 |
})
|
| 225 |
});
|
|
@@ -231,104 +180,79 @@ async function getEmbeddings(texts, model = 'jina') {
|
|
| 231 |
return await response.json();
|
| 232 |
}
|
| 233 |
|
| 234 |
-
// Usage
|
| 235 |
try {
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
'Hola mundo',
|
| 238 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
]);
|
| 240 |
-
console.log('
|
| 241 |
-
|
| 242 |
} catch (error) {
|
| 243 |
-
console.error('Error
|
| 244 |
}
|
| 245 |
```
|
| 246 |
|
| 247 |
-
###
|
| 248 |
|
| 249 |
-
```
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
self.api_url,
|
| 270 |
-
json={
|
| 271 |
-
"texts": texts,
|
| 272 |
-
"model": self.model,
|
| 273 |
-
"normalize": True
|
| 274 |
-
}
|
| 275 |
-
)
|
| 276 |
-
response.raise_for_status()
|
| 277 |
-
return response.json()["embeddings"]
|
| 278 |
-
|
| 279 |
-
def embed_query(self, text: str) -> List[float]:
|
| 280 |
-
return self.embed_documents([text])[0]
|
| 281 |
|
| 282 |
-
#
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
#
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
# Multilingual embeddings with Jina v3
|
| 299 |
-
multilingual_embeddings = MultilingualEmbeddings(model="jina-v3")
|
| 300 |
-
mixed_docs = multilingual_embeddings.embed_documents([
|
| 301 |
-
"English document",
|
| 302 |
-
"Documento en español",
|
| 303 |
-
"Document en français",
|
| 304 |
-
"Document en català"
|
| 305 |
-
])
|
| 306 |
-
|
| 307 |
-
# Legal embeddings for English
|
| 308 |
-
legal_embeddings = MultilingualEmbeddings(model="legal-bert")
|
| 309 |
-
legal_docs = legal_embeddings.embed_documents([
|
| 310 |
-
"This contract is governed by English law",
|
| 311 |
-
"The party shall indemnify and hold harmless"
|
| 312 |
-
])
|
| 313 |
-
|
| 314 |
-
# Spanish legal embeddings
|
| 315 |
-
spanish_legal_embeddings = MultilingualEmbeddings(model="robertalex")
|
| 316 |
-
spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
| 317 |
-
"Artículo 1: De los derechos fundamentales",
|
| 318 |
-
"La presente ley entrará en vigor"
|
| 319 |
-
])
|
| 320 |
```
|
| 321 |
|
| 322 |
-
## 📋 Request/Response
|
| 323 |
|
| 324 |
-
### Request Body
|
| 325 |
|
| 326 |
```json
|
| 327 |
{
|
| 328 |
-
"texts": [
|
| 329 |
-
"string"
|
| 330 |
-
],
|
| 331 |
-
"model": "jina",
|
| 332 |
"normalize": true,
|
| 333 |
"max_length": null
|
| 334 |
}
|
|
@@ -336,18 +260,17 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
|
| 336 |
|
| 337 |
| Field | Type | Required | Default | Description |
|
| 338 |
|-------|------|----------|---------|-------------|
|
| 339 |
-
| texts | array[string] | Yes | - |
|
| 340 |
-
|
|
| 341 |
-
|
|
| 342 |
-
| max_length | integer/null | No | null | Maximum tokens per text (null = model default) |
|
| 343 |
|
| 344 |
-
### Response
|
| 345 |
|
| 346 |
```json
|
| 347 |
{
|
| 348 |
-
"embeddings": [[0.123, -0.456, ...]],
|
| 349 |
-
"model_used": "jina",
|
| 350 |
-
"dimensions":
|
| 351 |
"num_texts": 2
|
| 352 |
}
|
| 353 |
```
|
|
@@ -355,166 +278,168 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
|
| 355 |
## ⚡ Performance & Limits
|
| 356 |
|
| 357 |
- **Maximum texts per request**: 50
|
| 358 |
-
- **
|
| 359 |
-
- **
|
| 360 |
-
- **
|
| 361 |
-
- **
|
|
|
|
| 362 |
|
| 363 |
## 🔧 Advanced Usage
|
| 364 |
|
| 365 |
-
###
|
| 366 |
-
|
| 367 |
-
For processing large datasets, implement batching:
|
| 368 |
|
| 369 |
```python
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
|
|
|
|
|
|
|
|
|
| 373 |
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
response = requests.post(
|
| 377 |
-
|
| 378 |
-
json={
|
| 379 |
-
"texts": batch,
|
| 380 |
-
"model": "jina",
|
| 381 |
-
"normalize": True
|
| 382 |
-
}
|
| 383 |
)
|
| 384 |
-
|
|
|
|
| 385 |
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
```
|
| 388 |
|
| 389 |
-
### Semantic Search
|
| 390 |
|
| 391 |
```python
|
| 392 |
import numpy as np
|
| 393 |
from typing import List, Tuple
|
| 394 |
|
| 395 |
-
def semantic_search(
|
| 396 |
-
|
| 397 |
-
documents: List[str],
|
| 398 |
-
top_k: int = 5
|
| 399 |
-
) -> List[Tuple[int, float]]:
|
| 400 |
-
"""Find most similar documents to query"""
|
| 401 |
|
| 402 |
-
# Get embeddings for query and documents
|
| 403 |
response = requests.post(
|
| 404 |
-
"https://aurasystems-spanish-embeddings-api.hf.space/embed",
|
| 405 |
-
json={
|
| 406 |
-
"texts": [query] + documents,
|
| 407 |
-
"model": "jina",
|
| 408 |
-
"normalize": True
|
| 409 |
-
}
|
| 410 |
)
|
| 411 |
|
| 412 |
embeddings = np.array(response.json()["embeddings"])
|
| 413 |
query_embedding = embeddings[0]
|
| 414 |
doc_embeddings = embeddings[1:]
|
| 415 |
|
| 416 |
-
# Calculate similarities
|
| 417 |
similarities = np.dot(doc_embeddings, query_embedding)
|
| 418 |
-
|
| 419 |
-
# Get top-k results
|
| 420 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
| 421 |
|
| 422 |
return [(idx, similarities[idx]) for idx in top_indices]
|
| 423 |
|
| 424 |
-
# Example
|
| 425 |
documents = [
|
| 426 |
-
"Python
|
| 427 |
-
"
|
| 428 |
-
"
|
| 429 |
-
"
|
| 430 |
]
|
| 431 |
|
| 432 |
-
results = semantic_search(
|
| 433 |
-
"inteligencia artificial y programación",
|
| 434 |
-
documents,
|
| 435 |
-
top_k=2
|
| 436 |
-
)
|
| 437 |
-
|
| 438 |
for idx, score in results:
|
| 439 |
-
print(f"
|
| 440 |
-
print(f"Similarity: {score:.4f}\n")
|
| 441 |
```
|
| 442 |
|
| 443 |
## 🚨 Error Handling
|
| 444 |
|
| 445 |
-
|
| 446 |
|
| 447 |
-
|
|
| 448 |
-
|
| 449 |
| 200 | Success |
|
| 450 |
-
| 400 | Bad Request (
|
| 451 |
-
| 422 |
|
| 452 |
-
|
|
| 453 |
-
| 500 | Internal Server Error |
|
| 454 |
|
| 455 |
-
###
|
| 456 |
|
| 457 |
-
```
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
```
|
| 462 |
|
| 463 |
-
|
| 464 |
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
}
|
| 470 |
-
```
|
| 471 |
-
**Solution**: Use a positive integer or omit max_length
|
| 472 |
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
}
|
| 478 |
-
```
|
| 479 |
-
**Solution**: Batch your requests
|
| 480 |
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
}
|
| 486 |
-
```
|
| 487 |
-
**Solution**: Filter out empty strings before sending
|
| 488 |
|
| 489 |
-
## 🔒 Authentication
|
| 490 |
|
| 491 |
-
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
-
##
|
| 494 |
|
| 495 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
|
| 504 |
-
##
|
| 505 |
|
| 506 |
-
- **
|
| 507 |
-
- **
|
| 508 |
-
- **
|
| 509 |
-
- [Jina Embeddings v2 Spanish](https://huggingface.co/jinaai/jina-embeddings-v2-base-es)
|
| 510 |
-
- [RoBERTalex](https://huggingface.co/PlanTL-GOB-ES/RoBERTalex)
|
| 511 |
|
| 512 |
-
##
|
| 513 |
|
| 514 |
-
|
| 515 |
-
-
|
| 516 |
-
-
|
| 517 |
|
| 518 |
---
|
| 519 |
|
| 520 |
-
Built with ❤️ using FastAPI and Hugging Face Transformers
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
# Multilingual & Legal Embeddings API
|
| 11 |
|
| 12 |
+
A high-performance FastAPI application providing access to **5 specialized embedding models** for Spanish, Catalan, English, and multilingual text. Each model has its own dedicated endpoint for optimal performance and clarity.
|
| 13 |
+
|
| 14 |
+
🌐 **Live API**: [https://aurasystems-spanish-embeddings-api.hf.space](https://aurasystems-spanish-embeddings-api.hf.space)
|
| 15 |
+
📖 **Interactive Docs**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
|
| 16 |
|
| 17 |
## 🚀 Quick Start
|
| 18 |
|
| 19 |
+
### Basic Usage
|
| 20 |
+
```bash
|
| 21 |
+
# Test jina-v3 endpoint (multilingual, loads at startup)
|
| 22 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
|
| 23 |
+
-H "Content-Type: application/json" \
|
| 24 |
+
-d '{"texts": ["Hello world", "Hola mundo"], "normalize": true}'
|
| 25 |
|
| 26 |
+
# Test Catalan RoBERTa endpoint
|
| 27 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
|
| 28 |
+
-H "Content-Type: application/json" \
|
| 29 |
+
-d '{"texts": ["Bon dia", "Com estàs?"], "normalize": true}'
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## 📚 Available Models & Endpoints
|
| 33 |
|
| 34 |
+
| Endpoint | Model | Languages | Dimensions | Max Tokens | Loading Strategy |
|
| 35 |
+
|----------|--------|-----------|------------|------------|------------------|
|
| 36 |
+
| `/embed/jina-v3` | jinaai/jina-embeddings-v3 | Multilingual (30+) | 1024 | 8192 | **Startup** |
|
| 37 |
+
| `/embed/roberta-ca` | projecte-aina/roberta-large-ca-v2 | Catalan | 1024 | 512 | On-demand |
|
| 38 |
+
| `/embed/jina` | jinaai/jina-embeddings-v2-base-es | Spanish, English | 768 | 8192 | On-demand |
|
| 39 |
+
| `/embed/robertalex` | PlanTL-GOB-ES/RoBERTalex | Spanish Legal | 768 | 512 | On-demand |
|
| 40 |
+
| `/embed/legal-bert` | nlpaueb/legal-bert-base-uncased | English Legal | 768 | 512 | On-demand |
|
| 41 |
|
| 42 |
+
### Model Recommendations
|
| 43 |
+
|
| 44 |
+
- **🌍 General multilingual**: Use `/embed/jina-v3` - Best overall performance
|
| 45 |
+
- **🇪🇸 Spanish general**: Use `/embed/jina` - Excellent for Spanish/English
|
| 46 |
+
- **🇪🇸 Spanish legal**: Use `/embed/robertalex` - Specialized for legal texts
|
| 47 |
+
- **🏴 Catalan**: Use `/embed/roberta-ca` - Best for Catalan text
|
| 48 |
+
- **🇬🇧 English legal**: Use `/embed/legal-bert` - Specialized for legal documents
|
| 49 |
|
| 50 |
## 🔗 API Endpoints
|
| 51 |
|
| 52 |
+
### Model-Specific Embedding Endpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
Each model has its dedicated endpoint:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
|
|
|
| 56 |
```
|
| 57 |
+
POST /embed/jina-v3 # Multilingual (startup model)
|
| 58 |
+
POST /embed/roberta-ca # Catalan
|
| 59 |
+
POST /embed/jina # Spanish/English
|
| 60 |
+
POST /embed/robertalex # Spanish Legal
|
| 61 |
+
POST /embed/legal-bert # English Legal
|
| 62 |
```
|
|
|
|
| 63 |
|
| 64 |
+
### Utility Endpoints
|
| 65 |
+
|
| 66 |
```
|
| 67 |
+
GET / # API information
|
| 68 |
+
GET /health # Health check and model status
|
| 69 |
+
GET /models # List all models with specifications
|
| 70 |
```
|
|
|
|
| 71 |
|
| 72 |
## 📖 Usage Examples
|
| 73 |
|
|
|
|
| 75 |
|
| 76 |
```python
|
| 77 |
import requests
|
|
|
|
| 78 |
|
| 79 |
API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
|
| 80 |
|
| 81 |
+
# Example 1: Multilingual with Jina v3 (startup model - fastest)
|
| 82 |
response = requests.post(
|
| 83 |
+
f"{API_URL}/embed/jina-v3",
|
| 84 |
json={
|
| 85 |
+
"texts": [
|
| 86 |
+
"Hello world", # English
|
| 87 |
+
"Hola mundo", # Spanish
|
| 88 |
+
"Bonjour monde", # French
|
| 89 |
+
"こんにちは世界" # Japanese
|
| 90 |
+
],
|
| 91 |
"normalize": True
|
| 92 |
}
|
| 93 |
)
|
|
|
|
| 94 |
result = response.json()
|
| 95 |
+
print(f"Jina v3: {result['dimensions']} dimensions") # 1024
|
|
|
|
| 96 |
|
| 97 |
+
# Example 2: Catalan text with RoBERTa-ca
|
| 98 |
+
response = requests.post(
|
| 99 |
+
f"{API_URL}/embed/roberta-ca",
|
| 100 |
json={
|
| 101 |
"texts": [
|
| 102 |
+
"Bon dia, com estàs?",
|
| 103 |
+
"Barcelona és una ciutat meravellosa",
|
| 104 |
+
"M'agrada la cultura catalana"
|
|
|
|
| 105 |
],
|
|
|
|
| 106 |
"normalize": True
|
| 107 |
}
|
| 108 |
)
|
| 109 |
+
catalan_result = response.json()
|
| 110 |
+
print(f"Catalan: {catalan_result['dimensions']} dimensions") # 1024
|
| 111 |
|
| 112 |
+
# Example 3: Spanish legal text with RoBERTalex
|
| 113 |
+
response = requests.post(
|
| 114 |
+
f"{API_URL}/embed/robertalex",
|
| 115 |
json={
|
| 116 |
"texts": [
|
| 117 |
+
"Artículo primero de la constitución",
|
| 118 |
+
"El contrato será válido desde la fecha de firma",
|
| 119 |
+
"La jurisprudencia establece que..."
|
| 120 |
],
|
|
|
|
| 121 |
"normalize": True
|
| 122 |
}
|
| 123 |
)
|
| 124 |
+
legal_result = response.json()
|
| 125 |
+
print(f"Spanish Legal: {legal_result['dimensions']} dimensions") # 768
|
| 126 |
|
| 127 |
+
# Example 4: English legal text with Legal-BERT
|
| 128 |
+
response = requests.post(
|
| 129 |
+
f"{API_URL}/embed/legal-bert",
|
| 130 |
json={
|
| 131 |
"texts": [
|
| 132 |
+
"This agreement is legally binding",
|
| 133 |
+
"The contract shall be governed by English law",
|
| 134 |
+
"The party hereby agrees and covenants"
|
| 135 |
],
|
|
|
|
| 136 |
"normalize": True
|
| 137 |
}
|
| 138 |
)
|
| 139 |
+
english_legal_result = response.json()
|
| 140 |
+
print(f"English Legal: {english_legal_result['dimensions']} dimensions") # 768
|
| 141 |
|
| 142 |
+
# Example 5: Spanish/English bilingual with Jina v2
|
| 143 |
+
response = requests.post(
|
| 144 |
+
f"{API_URL}/embed/jina",
|
| 145 |
json={
|
| 146 |
"texts": [
|
| 147 |
+
"Inteligencia artificial y machine learning",
|
| 148 |
+
"Artificial intelligence and machine learning",
|
| 149 |
+
"Procesamiento de lenguaje natural"
|
| 150 |
],
|
|
|
|
| 151 |
"normalize": True
|
| 152 |
}
|
| 153 |
)
|
| 154 |
+
bilingual_result = response.json()
|
| 155 |
+
print(f"Bilingual: {bilingual_result['dimensions']} dimensions") # 768
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
```
|
| 157 |
|
| 158 |
+
### JavaScript/Node.js
|
| 159 |
|
| 160 |
```javascript
|
| 161 |
const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
|
| 162 |
|
| 163 |
+
// Function to get embeddings from specific endpoint
|
| 164 |
+
async function getEmbeddings(endpoint, texts) {
|
| 165 |
+
const response = await fetch(`${API_URL}/embed/${endpoint}`, {
|
| 166 |
method: 'POST',
|
| 167 |
headers: {
|
| 168 |
'Content-Type': 'application/json',
|
| 169 |
},
|
| 170 |
body: JSON.stringify({
|
| 171 |
texts: texts,
|
|
|
|
| 172 |
normalize: true
|
| 173 |
})
|
| 174 |
});
|
|
|
|
| 180 |
return await response.json();
|
| 181 |
}
|
| 182 |
|
| 183 |
+
// Usage examples
|
| 184 |
try {
|
| 185 |
+
// Multilingual embeddings
|
| 186 |
+
const multilingualResult = await getEmbeddings('jina-v3', [
|
| 187 |
+
'Hello world',
|
| 188 |
'Hola mundo',
|
| 189 |
+
'Ciao mondo'
|
| 190 |
+
]);
|
| 191 |
+
console.log('Multilingual dimensions:', multilingualResult.dimensions);
|
| 192 |
+
|
| 193 |
+
// Catalan embeddings
|
| 194 |
+
const catalanResult = await getEmbeddings('roberta-ca', [
|
| 195 |
+
'Bon dia',
|
| 196 |
+
'Com estàs?'
|
| 197 |
]);
|
| 198 |
+
console.log('Catalan dimensions:', catalanResult.dimensions);
|
| 199 |
+
|
| 200 |
} catch (error) {
|
| 201 |
+
console.error('Error:', error);
|
| 202 |
}
|
| 203 |
```
|
| 204 |
|
| 205 |
+
### cURL Examples
|
| 206 |
|
| 207 |
+
```bash
|
| 208 |
+
# Multilingual with Jina v3 (startup model)
|
| 209 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
|
| 210 |
+
-H "Content-Type: application/json" \
|
| 211 |
+
-d '{
|
| 212 |
+
"texts": ["Hello", "Hola", "Bonjour"],
|
| 213 |
+
"normalize": true
|
| 214 |
+
}'
|
| 215 |
|
| 216 |
+
# Catalan with RoBERTa-ca
|
| 217 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
|
| 218 |
+
-H "Content-Type: application/json" \
|
| 219 |
+
-d '{
|
| 220 |
+
"texts": ["Bon dia", "Com estàs?"],
|
| 221 |
+
"normalize": true
|
| 222 |
+
}'
|
| 223 |
+
|
| 224 |
+
# Spanish legal with RoBERTalex
|
| 225 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/robertalex" \
|
| 226 |
+
-H "Content-Type: application/json" \
|
| 227 |
+
-d '{
|
| 228 |
+
"texts": ["Artículo primero"],
|
| 229 |
+
"normalize": true
|
| 230 |
+
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
+
# English legal with Legal-BERT
|
| 233 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/legal-bert" \
|
| 234 |
+
-H "Content-Type: application/json" \
|
| 235 |
+
-d '{
|
| 236 |
+
"texts": ["This agreement is binding"],
|
| 237 |
+
"normalize": true
|
| 238 |
+
}'
|
| 239 |
+
|
| 240 |
+
# Spanish/English bilingual with Jina v2
|
| 241 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina" \
|
| 242 |
+
-H "Content-Type: application/json" \
|
| 243 |
+
-d '{
|
| 244 |
+
"texts": ["Texto en español", "Text in English"],
|
| 245 |
+
"normalize": true
|
| 246 |
+
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
```
|
| 248 |
|
| 249 |
+
## 📋 Request/Response Schema
|
| 250 |
|
| 251 |
+
### Request Body
|
| 252 |
|
| 253 |
```json
|
| 254 |
{
|
| 255 |
+
"texts": ["text1", "text2", "..."],
|
|
|
|
|
|
|
|
|
|
| 256 |
"normalize": true,
|
| 257 |
"max_length": null
|
| 258 |
}
|
|
|
|
| 260 |
|
| 261 |
| Field | Type | Required | Default | Description |
|
| 262 |
|-------|------|----------|---------|-------------|
|
| 263 |
+
| `texts` | array[string] | ✅ Yes | - | 1-50 texts to embed |
|
| 264 |
+
| `normalize` | boolean | No | `true` | L2-normalize embeddings |
|
| 265 |
+
| `max_length` | integer/null | No | `null` | Max tokens (model-specific limits) |
|
|
|
|
| 266 |
|
| 267 |
+
### Response Body
|
| 268 |
|
| 269 |
```json
|
| 270 |
{
|
| 271 |
+
"embeddings": [[0.123, -0.456, ...], [0.789, -0.012, ...]],
|
| 272 |
+
"model_used": "jina-v3",
|
| 273 |
+
"dimensions": 1024,
|
| 274 |
"num_texts": 2
|
| 275 |
}
|
| 276 |
```
|
|
|
|
| 278 |
## ⚡ Performance & Limits
|
| 279 |
|
| 280 |
- **Maximum texts per request**: 50
|
| 281 |
+
- **Startup model**: `jina-v3` loads at startup (fastest response)
|
| 282 |
+
- **On-demand models**: Load on first request (~30-60s first time)
|
| 283 |
+
- **Typical response time**: 100-300ms after models are loaded
|
| 284 |
+
- **Memory optimization**: Automatic cleanup for large batches
|
| 285 |
+
- **CORS enabled**: Works from any domain
|
| 286 |
|
| 287 |
## 🔧 Advanced Usage
|
| 288 |
|
| 289 |
+
### LangChain Integration
|
|
|
|
|
|
|
| 290 |
|
| 291 |
```python
|
| 292 |
+
from langchain.embeddings.base import Embeddings
|
| 293 |
+
from typing import List
|
| 294 |
+
import requests
|
| 295 |
+
|
| 296 |
+
class MultilingualEmbeddings(Embeddings):
|
| 297 |
+
"""LangChain integration for multilingual embeddings"""
|
| 298 |
|
| 299 |
+
def __init__(self, endpoint: str = "jina-v3"):
|
| 300 |
+
"""
|
| 301 |
+
Initialize with specific endpoint
|
| 302 |
+
|
| 303 |
+
Args:
|
| 304 |
+
endpoint: One of "jina-v3", "roberta-ca", "jina", "robertalex", "legal-bert"
|
| 305 |
+
"""
|
| 306 |
+
self.api_url = f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}"
|
| 307 |
+
self.endpoint = endpoint
|
| 308 |
+
|
| 309 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 310 |
response = requests.post(
|
| 311 |
+
self.api_url,
|
| 312 |
+
json={"texts": texts, "normalize": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
)
|
| 314 |
+
response.raise_for_status()
|
| 315 |
+
return response.json()["embeddings"]
|
| 316 |
|
| 317 |
+
def embed_query(self, text: str) -> List[float]:
|
| 318 |
+
return self.embed_documents([text])[0]
|
| 319 |
+
|
| 320 |
+
# Usage examples
|
| 321 |
+
multilingual_embeddings = MultilingualEmbeddings("jina-v3")
|
| 322 |
+
catalan_embeddings = MultilingualEmbeddings("roberta-ca")
|
| 323 |
+
spanish_legal_embeddings = MultilingualEmbeddings("robertalex")
|
| 324 |
```
|
| 325 |
|
| 326 |
+
### Semantic Search
|
| 327 |
|
| 328 |
```python
|
| 329 |
import numpy as np
|
| 330 |
from typing import List, Tuple
|
| 331 |
|
| 332 |
+
def semantic_search(query: str, documents: List[str], endpoint: str = "jina-v3", top_k: int = 5):
|
| 333 |
+
"""Semantic search using specific model endpoint"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
|
|
|
| 335 |
response = requests.post(
|
| 336 |
+
f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}",
|
| 337 |
+
json={"texts": [query] + documents, "normalize": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
)
|
| 339 |
|
| 340 |
embeddings = np.array(response.json()["embeddings"])
|
| 341 |
query_embedding = embeddings[0]
|
| 342 |
doc_embeddings = embeddings[1:]
|
| 343 |
|
| 344 |
+
# Calculate cosine similarities (already normalized)
|
| 345 |
similarities = np.dot(doc_embeddings, query_embedding)
|
|
|
|
|
|
|
| 346 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
| 347 |
|
| 348 |
return [(idx, similarities[idx]) for idx in top_indices]
|
| 349 |
|
| 350 |
+
# Example: Multilingual search
|
| 351 |
documents = [
|
| 352 |
+
"Python programming language",
|
| 353 |
+
"Lenguaje de programación Python",
|
| 354 |
+
"Llenguatge de programació Python",
|
| 355 |
+
"Language de programmation Python"
|
| 356 |
]
|
| 357 |
|
| 358 |
+
results = semantic_search("código en Python", documents, "jina-v3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
for idx, score in results:
|
| 360 |
+
print(f"{score:.4f}: {documents[idx]}")
|
|
|
|
| 361 |
```
|
| 362 |
|
| 363 |
## 🚨 Error Handling
|
| 364 |
|
| 365 |
+
### HTTP Status Codes
|
| 366 |
|
| 367 |
+
| Code | Description |
|
| 368 |
+
|------|-------------|
|
| 369 |
| 200 | Success |
|
| 370 |
+
| 400 | Bad Request (validation error) |
|
| 371 |
+
| 422 | Unprocessable Entity (schema error) |
|
| 372 |
+
| 500 | Internal Server Error (model loading failed) |
|
|
|
|
| 373 |
|
| 374 |
+
### Common Errors
|
| 375 |
|
| 376 |
+
```python
|
| 377 |
+
# Handle errors properly
|
| 378 |
+
try:
|
| 379 |
+
response = requests.post(
|
| 380 |
+
"https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3",
|
| 381 |
+
json={"texts": ["text"], "normalize": True}
|
| 382 |
+
)
|
| 383 |
+
response.raise_for_status()
|
| 384 |
+
result = response.json()
|
| 385 |
+
except requests.exceptions.HTTPError as e:
|
| 386 |
+
print(f"HTTP error: {e}")
|
| 387 |
+
print(f"Response: {response.text}")
|
| 388 |
+
except requests.exceptions.RequestException as e:
|
| 389 |
+
print(f"Request error: {e}")
|
| 390 |
```
|
| 391 |
|
| 392 |
+
## 📊 Model Status Check
|
| 393 |
|
| 394 |
+
```python
|
| 395 |
+
# Check which models are loaded
|
| 396 |
+
health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
|
| 397 |
+
status = health.json()
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
print(f"API Status: {status['status']}")
|
| 400 |
+
print(f"Startup model loaded: {status['startup_model_loaded']}")
|
| 401 |
+
print(f"Available models: {status['available_models']}")
|
| 402 |
+
print(f"Models loaded: {status['models_count']}/5")
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
+
# Check endpoint status
|
| 405 |
+
for model, endpoint_status in status['endpoints'].items():
|
| 406 |
+
print(f"{model}: {endpoint_status}")
|
| 407 |
+
```
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
+
## 🔒 Authentication & Rate Limits
|
| 410 |
|
| 411 |
+
- **Authentication**: None required (open API)
|
| 412 |
+
- **Rate limits**: Generous limits on Hugging Face Spaces
|
| 413 |
+
- **CORS**: Enabled for all origins
|
| 414 |
+
- **Usage**: Free for research and commercial use
|
| 415 |
|
| 416 |
+
## 🏗️ Architecture
|
| 417 |
|
| 418 |
+
### Endpoint-Per-Model Design
|
| 419 |
+
- **Startup model**: `jina-v3` loads at application startup for fastest response
|
| 420 |
+
- **On-demand loading**: Other models load when first requested
|
| 421 |
+
- **Memory optimization**: Progressive loading reduces startup time
|
| 422 |
+
- **Model caching**: Once loaded, models remain in memory for fast inference
|
| 423 |
|
| 424 |
+
### Technical Stack
|
| 425 |
+
- **FastAPI**: Modern async web framework
|
| 426 |
+
- **Transformers**: Hugging Face model library
|
| 427 |
+
- **PyTorch**: Deep learning backend
|
| 428 |
+
- **Docker**: Containerized deployment
|
| 429 |
+
- **Hugging Face Spaces**: Cloud hosting platform
|
| 430 |
|
| 431 |
+
## 📄 Model Licenses
|
| 432 |
|
| 433 |
+
- **Jina models**: Apache 2.0
|
| 434 |
+
- **RoBERTa models**: MIT/Apache 2.0
|
| 435 |
+
- **Legal-BERT**: Apache 2.0
|
|
|
|
|
|
|
| 436 |
|
| 437 |
+
## 🤝 Support & Contributing
|
| 438 |
|
| 439 |
+
- **Issues**: [GitHub Issues](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
|
| 440 |
+
- **Interactive Docs**: [FastAPI Swagger UI](https://aurasystems-spanish-embeddings-api.hf.space/docs)
|
| 441 |
+
- **Model Papers**: Check individual model pages on Hugging Face
|
| 442 |
|
| 443 |
---
|
| 444 |
|
| 445 |
+
Built with ❤️ using **FastAPI** and **Hugging Face Transformers**
|
app_endpoints.py
DELETED
|
@@ -1,308 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI, HTTPException
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from contextlib import asynccontextmanager
|
| 4 |
-
from typing import List
|
| 5 |
-
import torch
|
| 6 |
-
import uvicorn
|
| 7 |
-
|
| 8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
| 9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
| 10 |
-
|
| 11 |
-
# Global model cache
|
| 12 |
-
models_cache = {}
|
| 13 |
-
|
| 14 |
-
# Load jina-v3 at startup (most important model)
|
| 15 |
-
STARTUP_MODEL = "jina-v3"
|
| 16 |
-
|
| 17 |
-
@asynccontextmanager
|
| 18 |
-
async def lifespan(app: FastAPI):
|
| 19 |
-
"""Application lifespan handler for startup and shutdown"""
|
| 20 |
-
# Startup - load jina-v3 model
|
| 21 |
-
try:
|
| 22 |
-
global models_cache
|
| 23 |
-
print(f"Loading startup model: {STARTUP_MODEL}...")
|
| 24 |
-
models_cache = load_models([STARTUP_MODEL])
|
| 25 |
-
print(f"Startup model loaded successfully: {list(models_cache.keys())}")
|
| 26 |
-
yield
|
| 27 |
-
except Exception as e:
|
| 28 |
-
print(f"Failed to load startup model: {str(e)}")
|
| 29 |
-
# Continue anyway - jina-v3 can be loaded on demand if startup fails
|
| 30 |
-
yield
|
| 31 |
-
finally:
|
| 32 |
-
# Shutdown - cleanup resources
|
| 33 |
-
cleanup_memory()
|
| 34 |
-
|
| 35 |
-
def ensure_model_loaded(model_name: str, max_length_limit: int):
|
| 36 |
-
"""Load a specific model on demand if not already loaded"""
|
| 37 |
-
global models_cache
|
| 38 |
-
if model_name not in models_cache:
|
| 39 |
-
try:
|
| 40 |
-
print(f"Loading model on demand: {model_name}...")
|
| 41 |
-
new_models = load_models([model_name])
|
| 42 |
-
models_cache.update(new_models)
|
| 43 |
-
print(f"Model {model_name} loaded successfully!")
|
| 44 |
-
except Exception as e:
|
| 45 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
| 46 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
| 47 |
-
|
| 48 |
-
def validate_request_for_model(request: EmbeddingRequest, model_name: str, max_length_limit: int):
|
| 49 |
-
"""Validate request parameters for specific model"""
|
| 50 |
-
if not request.texts:
|
| 51 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
| 52 |
-
|
| 53 |
-
if len(request.texts) > 50:
|
| 54 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
| 55 |
-
|
| 56 |
-
if request.max_length is not None and request.max_length > max_length_limit:
|
| 57 |
-
raise HTTPException(status_code=400, detail=f"Max length for {model_name} is {max_length_limit}")
|
| 58 |
-
|
| 59 |
-
app = FastAPI(
|
| 60 |
-
title="Multilingual & Legal Embedding API",
|
| 61 |
-
description="Multi-model embedding API with dedicated endpoints per model",
|
| 62 |
-
version="4.0.0",
|
| 63 |
-
lifespan=lifespan
|
| 64 |
-
)
|
| 65 |
-
|
| 66 |
-
# Add CORS middleware to allow cross-origin requests
|
| 67 |
-
app.add_middleware(
|
| 68 |
-
CORSMiddleware,
|
| 69 |
-
allow_origins=["*"], # In production, specify actual domains
|
| 70 |
-
allow_credentials=True,
|
| 71 |
-
allow_methods=["*"],
|
| 72 |
-
allow_headers=["*"],
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
@app.get("/")
|
| 76 |
-
async def root():
|
| 77 |
-
return {
|
| 78 |
-
"message": "Multilingual & Legal Embedding API - Endpoint Per Model",
|
| 79 |
-
"version": "4.0.0",
|
| 80 |
-
"status": "running",
|
| 81 |
-
"docs": "/docs",
|
| 82 |
-
"startup_model": STARTUP_MODEL,
|
| 83 |
-
"available_endpoints": {
|
| 84 |
-
"jina-v3": "/embed/jina-v3",
|
| 85 |
-
"roberta-ca": "/embed/roberta-ca",
|
| 86 |
-
"jina": "/embed/jina",
|
| 87 |
-
"robertalex": "/embed/robertalex",
|
| 88 |
-
"legal-bert": "/embed/legal-bert"
|
| 89 |
-
}
|
| 90 |
-
}
|
| 91 |
-
|
| 92 |
-
# Jina v3 - Multilingual (loads at startup)
|
| 93 |
-
@app.post("/embed/jina-v3", response_model=EmbeddingResponse)
|
| 94 |
-
async def embed_jina_v3(request: EmbeddingRequest):
|
| 95 |
-
"""Generate embeddings using Jina v3 model (multilingual)"""
|
| 96 |
-
try:
|
| 97 |
-
ensure_model_loaded("jina-v3", 8192)
|
| 98 |
-
validate_request_for_model(request, "jina-v3", 8192)
|
| 99 |
-
|
| 100 |
-
embeddings = get_embeddings(
|
| 101 |
-
request.texts,
|
| 102 |
-
"jina-v3",
|
| 103 |
-
models_cache,
|
| 104 |
-
request.normalize,
|
| 105 |
-
request.max_length
|
| 106 |
-
)
|
| 107 |
-
|
| 108 |
-
return EmbeddingResponse(
|
| 109 |
-
embeddings=embeddings,
|
| 110 |
-
model_used="jina-v3",
|
| 111 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 112 |
-
num_texts=len(request.texts)
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
except ValueError as e:
|
| 116 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 117 |
-
except Exception as e:
|
| 118 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 119 |
-
|
| 120 |
-
# Catalan RoBERTa
|
| 121 |
-
@app.post("/embed/roberta-ca", response_model=EmbeddingResponse)
|
| 122 |
-
async def embed_roberta_ca(request: EmbeddingRequest):
|
| 123 |
-
"""Generate embeddings using Catalan RoBERTa model"""
|
| 124 |
-
try:
|
| 125 |
-
ensure_model_loaded("roberta-ca", 512)
|
| 126 |
-
validate_request_for_model(request, "roberta-ca", 512)
|
| 127 |
-
|
| 128 |
-
embeddings = get_embeddings(
|
| 129 |
-
request.texts,
|
| 130 |
-
"roberta-ca",
|
| 131 |
-
models_cache,
|
| 132 |
-
request.normalize,
|
| 133 |
-
request.max_length
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
return EmbeddingResponse(
|
| 137 |
-
embeddings=embeddings,
|
| 138 |
-
model_used="roberta-ca",
|
| 139 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 140 |
-
num_texts=len(request.texts)
|
| 141 |
-
)
|
| 142 |
-
|
| 143 |
-
except ValueError as e:
|
| 144 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 145 |
-
except Exception as e:
|
| 146 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 147 |
-
|
| 148 |
-
# Jina v2 - Spanish/English
|
| 149 |
-
@app.post("/embed/jina", response_model=EmbeddingResponse)
|
| 150 |
-
async def embed_jina(request: EmbeddingRequest):
|
| 151 |
-
"""Generate embeddings using Jina v2 Spanish/English model"""
|
| 152 |
-
try:
|
| 153 |
-
ensure_model_loaded("jina", 8192)
|
| 154 |
-
validate_request_for_model(request, "jina", 8192)
|
| 155 |
-
|
| 156 |
-
embeddings = get_embeddings(
|
| 157 |
-
request.texts,
|
| 158 |
-
"jina",
|
| 159 |
-
models_cache,
|
| 160 |
-
request.normalize,
|
| 161 |
-
request.max_length
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
return EmbeddingResponse(
|
| 165 |
-
embeddings=embeddings,
|
| 166 |
-
model_used="jina",
|
| 167 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 168 |
-
num_texts=len(request.texts)
|
| 169 |
-
)
|
| 170 |
-
|
| 171 |
-
except ValueError as e:
|
| 172 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 173 |
-
except Exception as e:
|
| 174 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 175 |
-
|
| 176 |
-
# RoBERTalex - Spanish Legal
|
| 177 |
-
@app.post("/embed/robertalex", response_model=EmbeddingResponse)
|
| 178 |
-
async def embed_robertalex(request: EmbeddingRequest):
|
| 179 |
-
"""Generate embeddings using RoBERTalex Spanish legal model"""
|
| 180 |
-
try:
|
| 181 |
-
ensure_model_loaded("robertalex", 512)
|
| 182 |
-
validate_request_for_model(request, "robertalex", 512)
|
| 183 |
-
|
| 184 |
-
embeddings = get_embeddings(
|
| 185 |
-
request.texts,
|
| 186 |
-
"robertalex",
|
| 187 |
-
models_cache,
|
| 188 |
-
request.normalize,
|
| 189 |
-
request.max_length
|
| 190 |
-
)
|
| 191 |
-
|
| 192 |
-
return EmbeddingResponse(
|
| 193 |
-
embeddings=embeddings,
|
| 194 |
-
model_used="robertalex",
|
| 195 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 196 |
-
num_texts=len(request.texts)
|
| 197 |
-
)
|
| 198 |
-
|
| 199 |
-
except ValueError as e:
|
| 200 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 201 |
-
except Exception as e:
|
| 202 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 203 |
-
|
| 204 |
-
# Legal BERT - English Legal
|
| 205 |
-
@app.post("/embed/legal-bert", response_model=EmbeddingResponse)
|
| 206 |
-
async def embed_legal_bert(request: EmbeddingRequest):
|
| 207 |
-
"""Generate embeddings using Legal BERT English model"""
|
| 208 |
-
try:
|
| 209 |
-
ensure_model_loaded("legal-bert", 512)
|
| 210 |
-
validate_request_for_model(request, "legal-bert", 512)
|
| 211 |
-
|
| 212 |
-
embeddings = get_embeddings(
|
| 213 |
-
request.texts,
|
| 214 |
-
"legal-bert",
|
| 215 |
-
models_cache,
|
| 216 |
-
request.normalize,
|
| 217 |
-
request.max_length
|
| 218 |
-
)
|
| 219 |
-
|
| 220 |
-
return EmbeddingResponse(
|
| 221 |
-
embeddings=embeddings,
|
| 222 |
-
model_used="legal-bert",
|
| 223 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 224 |
-
num_texts=len(request.texts)
|
| 225 |
-
)
|
| 226 |
-
|
| 227 |
-
except ValueError as e:
|
| 228 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 229 |
-
except Exception as e:
|
| 230 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 231 |
-
|
| 232 |
-
@app.get("/models", response_model=List[ModelInfo])
|
| 233 |
-
async def list_models():
|
| 234 |
-
"""List available models and their specifications"""
|
| 235 |
-
return [
|
| 236 |
-
ModelInfo(
|
| 237 |
-
model_id="jina-v3",
|
| 238 |
-
name="jinaai/jina-embeddings-v3",
|
| 239 |
-
dimensions=1024,
|
| 240 |
-
max_sequence_length=8192,
|
| 241 |
-
languages=["Multilingual"],
|
| 242 |
-
model_type="multilingual",
|
| 243 |
-
description="Latest Jina v3 with superior multilingual performance - loaded at startup"
|
| 244 |
-
),
|
| 245 |
-
ModelInfo(
|
| 246 |
-
model_id="roberta-ca",
|
| 247 |
-
name="projecte-aina/roberta-large-ca-v2",
|
| 248 |
-
dimensions=1024,
|
| 249 |
-
max_sequence_length=512,
|
| 250 |
-
languages=["Catalan"],
|
| 251 |
-
model_type="general",
|
| 252 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
| 253 |
-
),
|
| 254 |
-
ModelInfo(
|
| 255 |
-
model_id="jina",
|
| 256 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
| 257 |
-
dimensions=768,
|
| 258 |
-
max_sequence_length=8192,
|
| 259 |
-
languages=["Spanish", "English"],
|
| 260 |
-
model_type="bilingual",
|
| 261 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
| 262 |
-
),
|
| 263 |
-
ModelInfo(
|
| 264 |
-
model_id="robertalex",
|
| 265 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
| 266 |
-
dimensions=768,
|
| 267 |
-
max_sequence_length=512,
|
| 268 |
-
languages=["Spanish"],
|
| 269 |
-
model_type="legal domain",
|
| 270 |
-
description="Spanish legal domain specialized embeddings"
|
| 271 |
-
),
|
| 272 |
-
ModelInfo(
|
| 273 |
-
model_id="legal-bert",
|
| 274 |
-
name="nlpaueb/legal-bert-base-uncased",
|
| 275 |
-
dimensions=768,
|
| 276 |
-
max_sequence_length=512,
|
| 277 |
-
languages=["English"],
|
| 278 |
-
model_type="legal domain",
|
| 279 |
-
description="English legal domain BERT model"
|
| 280 |
-
)
|
| 281 |
-
]
|
| 282 |
-
|
| 283 |
-
@app.get("/health")
|
| 284 |
-
async def health_check():
|
| 285 |
-
"""Health check endpoint"""
|
| 286 |
-
startup_loaded = STARTUP_MODEL in models_cache
|
| 287 |
-
|
| 288 |
-
return {
|
| 289 |
-
"status": "healthy" if startup_loaded else "partial",
|
| 290 |
-
"startup_model": STARTUP_MODEL,
|
| 291 |
-
"startup_model_loaded": startup_loaded,
|
| 292 |
-
"available_models": list(models_cache.keys()),
|
| 293 |
-
"models_count": len(models_cache),
|
| 294 |
-
"endpoints": {
|
| 295 |
-
"jina-v3": f"/embed/jina-v3 {'(ready)' if 'jina-v3' in models_cache else '(loads on demand)'}",
|
| 296 |
-
"roberta-ca": f"/embed/roberta-ca {'(ready)' if 'roberta-ca' in models_cache else '(loads on demand)'}",
|
| 297 |
-
"jina": f"/embed/jina {'(ready)' if 'jina' in models_cache else '(loads on demand)'}",
|
| 298 |
-
"robertalex": f"/embed/robertalex {'(ready)' if 'robertalex' in models_cache else '(loads on demand)'}",
|
| 299 |
-
"legal-bert": f"/embed/legal-bert {'(ready)' if 'legal-bert' in models_cache else '(loads on demand)'}"
|
| 300 |
-
}
|
| 301 |
-
}
|
| 302 |
-
|
| 303 |
-
if __name__ == "__main__":
|
| 304 |
-
# Set multi-threading for CPU
|
| 305 |
-
torch.set_num_threads(8)
|
| 306 |
-
torch.set_num_interop_threads(1)
|
| 307 |
-
|
| 308 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_hybrid_backup.py
DELETED
|
@@ -1,189 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI, HTTPException
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from contextlib import asynccontextmanager
|
| 4 |
-
from typing import List
|
| 5 |
-
import torch
|
| 6 |
-
import uvicorn
|
| 7 |
-
|
| 8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
| 9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
| 10 |
-
|
| 11 |
-
# Global model cache
|
| 12 |
-
models_cache = {}
|
| 13 |
-
|
| 14 |
-
# Models to load at startup (most frequently used)
|
| 15 |
-
STARTUP_MODELS = ["jina-v3", "roberta-ca"]
|
| 16 |
-
# Models to load on demand
|
| 17 |
-
ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
|
| 18 |
-
|
| 19 |
-
@asynccontextmanager
|
| 20 |
-
async def lifespan(app: FastAPI):
|
| 21 |
-
"""Application lifespan handler for startup and shutdown"""
|
| 22 |
-
# Startup - load priority models
|
| 23 |
-
try:
|
| 24 |
-
global models_cache
|
| 25 |
-
print(f"Loading startup models: {STARTUP_MODELS}...")
|
| 26 |
-
models_cache = load_models(STARTUP_MODELS)
|
| 27 |
-
print(f"Startup models loaded successfully: {list(models_cache.keys())}")
|
| 28 |
-
yield
|
| 29 |
-
except Exception as e:
|
| 30 |
-
print(f"Failed to load startup models: {str(e)}")
|
| 31 |
-
# Continue anyway - models can be loaded on demand
|
| 32 |
-
yield
|
| 33 |
-
finally:
|
| 34 |
-
# Shutdown - cleanup resources
|
| 35 |
-
cleanup_memory()
|
| 36 |
-
|
| 37 |
-
def ensure_model_loaded(model_name: str):
|
| 38 |
-
"""Load a specific model on demand if not already loaded"""
|
| 39 |
-
global models_cache
|
| 40 |
-
if model_name not in models_cache:
|
| 41 |
-
if model_name in ON_DEMAND_MODELS:
|
| 42 |
-
try:
|
| 43 |
-
print(f"Loading model on demand: {model_name}...")
|
| 44 |
-
new_models = load_models([model_name])
|
| 45 |
-
models_cache.update(new_models)
|
| 46 |
-
print(f"Model {model_name} loaded successfully!")
|
| 47 |
-
except Exception as e:
|
| 48 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
| 49 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
| 50 |
-
else:
|
| 51 |
-
raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
|
| 52 |
-
|
| 53 |
-
app = FastAPI(
|
| 54 |
-
title="Multilingual & Legal Embedding API",
|
| 55 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
| 56 |
-
version="3.0.0",
|
| 57 |
-
lifespan=lifespan
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
# Add CORS middleware to allow cross-origin requests
|
| 61 |
-
app.add_middleware(
|
| 62 |
-
CORSMiddleware,
|
| 63 |
-
allow_origins=["*"], # In production, specify actual domains
|
| 64 |
-
allow_credentials=True,
|
| 65 |
-
allow_methods=["*"],
|
| 66 |
-
allow_headers=["*"],
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
@app.get("/")
|
| 70 |
-
async def root():
|
| 71 |
-
return {
|
| 72 |
-
"message": "Multilingual & Legal Embedding API",
|
| 73 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
| 74 |
-
"status": "running",
|
| 75 |
-
"docs": "/docs",
|
| 76 |
-
"total_models": 5
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
| 80 |
-
async def create_embeddings(request: EmbeddingRequest):
|
| 81 |
-
"""Generate embeddings for input texts"""
|
| 82 |
-
try:
|
| 83 |
-
# Load specific model on demand if needed
|
| 84 |
-
ensure_model_loaded(request.model)
|
| 85 |
-
|
| 86 |
-
if not request.texts:
|
| 87 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
| 88 |
-
|
| 89 |
-
if len(request.texts) > 50: # Rate limiting
|
| 90 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
| 91 |
-
|
| 92 |
-
embeddings = get_embeddings(
|
| 93 |
-
request.texts,
|
| 94 |
-
request.model,
|
| 95 |
-
models_cache,
|
| 96 |
-
request.normalize,
|
| 97 |
-
request.max_length
|
| 98 |
-
)
|
| 99 |
-
|
| 100 |
-
# Cleanup memory after large batches
|
| 101 |
-
if len(request.texts) > 20:
|
| 102 |
-
cleanup_memory()
|
| 103 |
-
|
| 104 |
-
return EmbeddingResponse(
|
| 105 |
-
embeddings=embeddings,
|
| 106 |
-
model_used=request.model,
|
| 107 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 108 |
-
num_texts=len(request.texts)
|
| 109 |
-
)
|
| 110 |
-
|
| 111 |
-
except ValueError as e:
|
| 112 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 113 |
-
except Exception as e:
|
| 114 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 115 |
-
|
| 116 |
-
@app.get("/models", response_model=List[ModelInfo])
|
| 117 |
-
async def list_models():
|
| 118 |
-
"""List available models and their specifications"""
|
| 119 |
-
return [
|
| 120 |
-
ModelInfo(
|
| 121 |
-
model_id="jina",
|
| 122 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
| 123 |
-
dimensions=768,
|
| 124 |
-
max_sequence_length=8192,
|
| 125 |
-
languages=["Spanish", "English"],
|
| 126 |
-
model_type="bilingual",
|
| 127 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
| 128 |
-
),
|
| 129 |
-
ModelInfo(
|
| 130 |
-
model_id="robertalex",
|
| 131 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
| 132 |
-
dimensions=768,
|
| 133 |
-
max_sequence_length=512,
|
| 134 |
-
languages=["Spanish"],
|
| 135 |
-
model_type="legal domain",
|
| 136 |
-
description="Spanish legal domain specialized embeddings"
|
| 137 |
-
),
|
| 138 |
-
ModelInfo(
|
| 139 |
-
model_id="jina-v3",
|
| 140 |
-
name="jinaai/jina-embeddings-v3",
|
| 141 |
-
dimensions=1024,
|
| 142 |
-
max_sequence_length=8192,
|
| 143 |
-
languages=["Multilingual"],
|
| 144 |
-
model_type="multilingual",
|
| 145 |
-
description="Latest Jina v3 with superior multilingual performance"
|
| 146 |
-
),
|
| 147 |
-
ModelInfo(
|
| 148 |
-
model_id="legal-bert",
|
| 149 |
-
name="nlpaueb/legal-bert-base-uncased",
|
| 150 |
-
dimensions=768,
|
| 151 |
-
max_sequence_length=512,
|
| 152 |
-
languages=["English"],
|
| 153 |
-
model_type="legal domain",
|
| 154 |
-
description="English legal domain BERT model"
|
| 155 |
-
),
|
| 156 |
-
ModelInfo(
|
| 157 |
-
model_id="roberta-ca",
|
| 158 |
-
name="projecte-aina/roberta-large-ca-v2",
|
| 159 |
-
dimensions=1024,
|
| 160 |
-
max_sequence_length=512,
|
| 161 |
-
languages=["Catalan"],
|
| 162 |
-
model_type="general",
|
| 163 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
| 164 |
-
)
|
| 165 |
-
]
|
| 166 |
-
|
| 167 |
-
@app.get("/health")
|
| 168 |
-
async def health_check():
|
| 169 |
-
"""Health check endpoint"""
|
| 170 |
-
startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
|
| 171 |
-
all_models_loaded = len(models_cache) == 5
|
| 172 |
-
|
| 173 |
-
return {
|
| 174 |
-
"status": "healthy" if startup_models_loaded else "partial",
|
| 175 |
-
"startup_models_loaded": startup_models_loaded,
|
| 176 |
-
"all_models_loaded": all_models_loaded,
|
| 177 |
-
"available_models": list(models_cache.keys()),
|
| 178 |
-
"startup_models": STARTUP_MODELS,
|
| 179 |
-
"on_demand_models": ON_DEMAND_MODELS,
|
| 180 |
-
"models_count": len(models_cache),
|
| 181 |
-
"note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
|
| 182 |
-
}
|
| 183 |
-
|
| 184 |
-
if __name__ == "__main__":
|
| 185 |
-
# Set multi-threading for CPU
|
| 186 |
-
torch.set_num_threads(8)
|
| 187 |
-
torch.set_num_interop_threads(1)
|
| 188 |
-
|
| 189 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_old.py
DELETED
|
@@ -1,159 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI, HTTPException
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from contextlib import asynccontextmanager
|
| 4 |
-
from typing import List
|
| 5 |
-
import torch
|
| 6 |
-
import uvicorn
|
| 7 |
-
|
| 8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
| 9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
| 10 |
-
|
| 11 |
-
# Global model cache
|
| 12 |
-
models_cache = {}
|
| 13 |
-
|
| 14 |
-
@asynccontextmanager
|
| 15 |
-
async def lifespan(app: FastAPI):
|
| 16 |
-
"""Application lifespan handler for startup and shutdown"""
|
| 17 |
-
# Startup
|
| 18 |
-
try:
|
| 19 |
-
global models_cache
|
| 20 |
-
print("Loading models...")
|
| 21 |
-
models_cache = load_models()
|
| 22 |
-
print("All models loaded successfully!")
|
| 23 |
-
yield
|
| 24 |
-
except Exception as e:
|
| 25 |
-
print(f"Failed to load models: {str(e)}")
|
| 26 |
-
raise
|
| 27 |
-
finally:
|
| 28 |
-
# Shutdown - cleanup resources
|
| 29 |
-
cleanup_memory()
|
| 30 |
-
|
| 31 |
-
app = FastAPI(
|
| 32 |
-
title="Multilingual & Legal Embedding API",
|
| 33 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
| 34 |
-
version="3.0.0",
|
| 35 |
-
lifespan=lifespan
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
# Add CORS middleware to allow cross-origin requests
|
| 39 |
-
app.add_middleware(
|
| 40 |
-
CORSMiddleware,
|
| 41 |
-
allow_origins=["*"], # In production, specify actual domains
|
| 42 |
-
allow_credentials=True,
|
| 43 |
-
allow_methods=["*"],
|
| 44 |
-
allow_headers=["*"],
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
@app.get("/")
|
| 48 |
-
async def root():
|
| 49 |
-
return {
|
| 50 |
-
"message": "Multilingual & Legal Embedding API",
|
| 51 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
| 52 |
-
"status": "running",
|
| 53 |
-
"docs": "/docs",
|
| 54 |
-
"total_models": 5
|
| 55 |
-
}
|
| 56 |
-
|
| 57 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
| 58 |
-
async def create_embeddings(request: EmbeddingRequest):
|
| 59 |
-
"""Generate embeddings for input texts"""
|
| 60 |
-
try:
|
| 61 |
-
if not request.texts:
|
| 62 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
| 63 |
-
|
| 64 |
-
if len(request.texts) > 50: # Rate limiting
|
| 65 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
| 66 |
-
|
| 67 |
-
embeddings = get_embeddings(
|
| 68 |
-
request.texts,
|
| 69 |
-
request.model,
|
| 70 |
-
models_cache,
|
| 71 |
-
request.normalize,
|
| 72 |
-
request.max_length
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
# Cleanup memory after large batches
|
| 76 |
-
if len(request.texts) > 20:
|
| 77 |
-
cleanup_memory()
|
| 78 |
-
|
| 79 |
-
return EmbeddingResponse(
|
| 80 |
-
embeddings=embeddings,
|
| 81 |
-
model_used=request.model,
|
| 82 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 83 |
-
num_texts=len(request.texts)
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
except ValueError as e:
|
| 87 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 88 |
-
except Exception as e:
|
| 89 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 90 |
-
|
| 91 |
-
@app.get("/models", response_model=List[ModelInfo])
|
| 92 |
-
async def list_models():
|
| 93 |
-
"""List available models and their specifications"""
|
| 94 |
-
return [
|
| 95 |
-
ModelInfo(
|
| 96 |
-
model_id="jina",
|
| 97 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
| 98 |
-
dimensions=768,
|
| 99 |
-
max_sequence_length=8192,
|
| 100 |
-
languages=["Spanish", "English"],
|
| 101 |
-
model_type="bilingual",
|
| 102 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
| 103 |
-
),
|
| 104 |
-
ModelInfo(
|
| 105 |
-
model_id="robertalex",
|
| 106 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
| 107 |
-
dimensions=768,
|
| 108 |
-
max_sequence_length=512,
|
| 109 |
-
languages=["Spanish"],
|
| 110 |
-
model_type="legal domain",
|
| 111 |
-
description="Spanish legal domain specialized embeddings"
|
| 112 |
-
),
|
| 113 |
-
ModelInfo(
|
| 114 |
-
model_id="jina-v3",
|
| 115 |
-
name="jinaai/jina-embeddings-v3",
|
| 116 |
-
dimensions=1024,
|
| 117 |
-
max_sequence_length=8192,
|
| 118 |
-
languages=["Multilingual"],
|
| 119 |
-
model_type="multilingual",
|
| 120 |
-
description="Latest Jina v3 with superior multilingual performance"
|
| 121 |
-
),
|
| 122 |
-
ModelInfo(
|
| 123 |
-
model_id="legal-bert",
|
| 124 |
-
name="nlpaueb/legal-bert-base-uncased",
|
| 125 |
-
dimensions=768,
|
| 126 |
-
max_sequence_length=512,
|
| 127 |
-
languages=["English"],
|
| 128 |
-
model_type="legal domain",
|
| 129 |
-
description="English legal domain BERT model"
|
| 130 |
-
),
|
| 131 |
-
ModelInfo(
|
| 132 |
-
model_id="roberta-ca",
|
| 133 |
-
name="projecte-aina/roberta-large-ca-v2",
|
| 134 |
-
dimensions=1024,
|
| 135 |
-
max_sequence_length=512,
|
| 136 |
-
languages=["Catalan"],
|
| 137 |
-
model_type="general",
|
| 138 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
| 139 |
-
)
|
| 140 |
-
]
|
| 141 |
-
|
| 142 |
-
@app.get("/health")
|
| 143 |
-
async def health_check():
|
| 144 |
-
"""Health check endpoint"""
|
| 145 |
-
models_loaded = len(models_cache) == 5
|
| 146 |
-
return {
|
| 147 |
-
"status": "healthy" if models_loaded else "degraded",
|
| 148 |
-
"models_loaded": models_loaded,
|
| 149 |
-
"available_models": list(models_cache.keys()),
|
| 150 |
-
"expected_models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
| 151 |
-
"models_count": len(models_cache)
|
| 152 |
-
}
|
| 153 |
-
|
| 154 |
-
if __name__ == "__main__":
|
| 155 |
-
# Set multi-threading for CPU
|
| 156 |
-
torch.set_num_threads(8)
|
| 157 |
-
torch.set_num_interop_threads(1)
|
| 158 |
-
|
| 159 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_old_minimal.py
DELETED
|
@@ -1,165 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI, HTTPException
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from typing import List
|
| 4 |
-
import torch
|
| 5 |
-
import uvicorn
|
| 6 |
-
|
| 7 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
| 8 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
| 9 |
-
|
| 10 |
-
# Global model cache - completely on-demand loading
|
| 11 |
-
models_cache = {}
|
| 12 |
-
|
| 13 |
-
# All models load on demand to test deployment
|
| 14 |
-
ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
|
| 15 |
-
|
| 16 |
-
def ensure_model_loaded(model_name: str):
|
| 17 |
-
"""Load a specific model on demand if not already loaded"""
|
| 18 |
-
global models_cache
|
| 19 |
-
if model_name not in models_cache:
|
| 20 |
-
if model_name in ON_DEMAND_MODELS:
|
| 21 |
-
try:
|
| 22 |
-
print(f"Loading model on demand: {model_name}...")
|
| 23 |
-
new_models = load_models([model_name])
|
| 24 |
-
models_cache.update(new_models)
|
| 25 |
-
print(f"Model {model_name} loaded successfully!")
|
| 26 |
-
except Exception as e:
|
| 27 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
| 28 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
| 29 |
-
else:
|
| 30 |
-
raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
|
| 31 |
-
|
| 32 |
-
app = FastAPI(
|
| 33 |
-
title="Multilingual & Legal Embedding API",
|
| 34 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
| 35 |
-
version="3.0.0"
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
# Add CORS middleware to allow cross-origin requests
|
| 39 |
-
app.add_middleware(
|
| 40 |
-
CORSMiddleware,
|
| 41 |
-
allow_origins=["*"], # In production, specify actual domains
|
| 42 |
-
allow_credentials=True,
|
| 43 |
-
allow_methods=["*"],
|
| 44 |
-
allow_headers=["*"],
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
@app.get("/")
|
| 48 |
-
async def root():
|
| 49 |
-
return {
|
| 50 |
-
"message": "Multilingual & Legal Embedding API - Minimal Version",
|
| 51 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
| 52 |
-
"status": "running",
|
| 53 |
-
"docs": "/docs",
|
| 54 |
-
"total_models": 5,
|
| 55 |
-
"note": "All models load on first request"
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
| 59 |
-
async def create_embeddings(request: EmbeddingRequest):
|
| 60 |
-
"""Generate embeddings for input texts"""
|
| 61 |
-
try:
|
| 62 |
-
# Load specific model on demand
|
| 63 |
-
ensure_model_loaded(request.model)
|
| 64 |
-
|
| 65 |
-
if not request.texts:
|
| 66 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
| 67 |
-
|
| 68 |
-
if len(request.texts) > 50: # Rate limiting
|
| 69 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
| 70 |
-
|
| 71 |
-
embeddings = get_embeddings(
|
| 72 |
-
request.texts,
|
| 73 |
-
request.model,
|
| 74 |
-
models_cache,
|
| 75 |
-
request.normalize,
|
| 76 |
-
request.max_length
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
# Cleanup memory after large batches
|
| 80 |
-
if len(request.texts) > 20:
|
| 81 |
-
cleanup_memory()
|
| 82 |
-
|
| 83 |
-
return EmbeddingResponse(
|
| 84 |
-
embeddings=embeddings,
|
| 85 |
-
model_used=request.model,
|
| 86 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
| 87 |
-
num_texts=len(request.texts)
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
except ValueError as e:
|
| 91 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 92 |
-
except Exception as e:
|
| 93 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 94 |
-
|
| 95 |
-
@app.get("/models", response_model=List[ModelInfo])
|
| 96 |
-
async def list_models():
|
| 97 |
-
"""List available models and their specifications"""
|
| 98 |
-
return [
|
| 99 |
-
ModelInfo(
|
| 100 |
-
model_id="jina",
|
| 101 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
| 102 |
-
dimensions=768,
|
| 103 |
-
max_sequence_length=8192,
|
| 104 |
-
languages=["Spanish", "English"],
|
| 105 |
-
model_type="bilingual",
|
| 106 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
| 107 |
-
),
|
| 108 |
-
ModelInfo(
|
| 109 |
-
model_id="robertalex",
|
| 110 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
| 111 |
-
dimensions=768,
|
| 112 |
-
max_sequence_length=512,
|
| 113 |
-
languages=["Spanish"],
|
| 114 |
-
model_type="legal domain",
|
| 115 |
-
description="Spanish legal domain specialized embeddings"
|
| 116 |
-
),
|
| 117 |
-
ModelInfo(
|
| 118 |
-
model_id="jina-v3",
|
| 119 |
-
name="jinaai/jina-embeddings-v3",
|
| 120 |
-
dimensions=1024,
|
| 121 |
-
max_sequence_length=8192,
|
| 122 |
-
languages=["Multilingual"],
|
| 123 |
-
model_type="multilingual",
|
| 124 |
-
description="Latest Jina v3 with superior multilingual performance"
|
| 125 |
-
),
|
| 126 |
-
ModelInfo(
|
| 127 |
-
model_id="legal-bert",
|
| 128 |
-
name="nlpaueb/legal-bert-base-uncased",
|
| 129 |
-
dimensions=768,
|
| 130 |
-
max_sequence_length=512,
|
| 131 |
-
languages=["English"],
|
| 132 |
-
model_type="legal domain",
|
| 133 |
-
description="English legal domain BERT model"
|
| 134 |
-
),
|
| 135 |
-
ModelInfo(
|
| 136 |
-
model_id="roberta-ca",
|
| 137 |
-
name="projecte-aina/roberta-large-ca-v2",
|
| 138 |
-
dimensions=1024,
|
| 139 |
-
max_sequence_length=512,
|
| 140 |
-
languages=["Catalan"],
|
| 141 |
-
model_type="general",
|
| 142 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
| 143 |
-
)
|
| 144 |
-
]
|
| 145 |
-
|
| 146 |
-
@app.get("/health")
|
| 147 |
-
async def health_check():
|
| 148 |
-
"""Health check endpoint"""
|
| 149 |
-
all_models_loaded = len(models_cache) == 5
|
| 150 |
-
|
| 151 |
-
return {
|
| 152 |
-
"status": "healthy",
|
| 153 |
-
"all_models_loaded": all_models_loaded,
|
| 154 |
-
"available_models": list(models_cache.keys()),
|
| 155 |
-
"on_demand_models": ON_DEMAND_MODELS,
|
| 156 |
-
"models_count": len(models_cache),
|
| 157 |
-
"note": "All models load on first embedding request - minimal deployment version"
|
| 158 |
-
}
|
| 159 |
-
|
| 160 |
-
if __name__ == "__main__":
|
| 161 |
-
# Set multi-threading for CPU
|
| 162 |
-
torch.set_num_threads(8)
|
| 163 |
-
torch.set_num_interop_threads(1)
|
| 164 |
-
|
| 165 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_api.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Simple test script for the embedding API
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import requests
|
| 7 |
-
import json
|
| 8 |
-
import time
|
| 9 |
-
|
| 10 |
-
def test_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
|
| 11 |
-
"""Test the API endpoints"""
|
| 12 |
-
|
| 13 |
-
print(f"Testing API at {base_url}")
|
| 14 |
-
|
| 15 |
-
# Test root endpoint
|
| 16 |
-
try:
|
| 17 |
-
response = requests.get(f"{base_url}/")
|
| 18 |
-
print(f"✓ Root endpoint: {response.status_code}")
|
| 19 |
-
print(f" Response: {response.json()}")
|
| 20 |
-
except Exception as e:
|
| 21 |
-
print(f"✗ Root endpoint failed: {e}")
|
| 22 |
-
return False
|
| 23 |
-
|
| 24 |
-
# Test health endpoint
|
| 25 |
-
try:
|
| 26 |
-
response = requests.get(f"{base_url}/health")
|
| 27 |
-
print(f"✓ Health endpoint: {response.status_code}")
|
| 28 |
-
health_data = response.json()
|
| 29 |
-
print(f" Models loaded: {health_data.get('models_loaded', False)}")
|
| 30 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
| 31 |
-
except Exception as e:
|
| 32 |
-
print(f"✗ Health endpoint failed: {e}")
|
| 33 |
-
|
| 34 |
-
# Test models endpoint
|
| 35 |
-
try:
|
| 36 |
-
response = requests.get(f"{base_url}/models")
|
| 37 |
-
print(f"✓ Models endpoint: {response.status_code}")
|
| 38 |
-
models = response.json()
|
| 39 |
-
print(f" Found {len(models)} model definitions")
|
| 40 |
-
except Exception as e:
|
| 41 |
-
print(f"✗ Models endpoint failed: {e}")
|
| 42 |
-
|
| 43 |
-
# Test embedding endpoint
|
| 44 |
-
try:
|
| 45 |
-
payload = {
|
| 46 |
-
"texts": ["Hello world", "Test text"],
|
| 47 |
-
"model": "jina",
|
| 48 |
-
"normalize": True
|
| 49 |
-
}
|
| 50 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
| 51 |
-
print(f"✓ Embed endpoint: {response.status_code}")
|
| 52 |
-
if response.status_code == 200:
|
| 53 |
-
data = response.json()
|
| 54 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
| 55 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
| 56 |
-
else:
|
| 57 |
-
print(f" Error: {response.text}")
|
| 58 |
-
except Exception as e:
|
| 59 |
-
print(f"✗ Embed endpoint failed: {e}")
|
| 60 |
-
|
| 61 |
-
return True
|
| 62 |
-
|
| 63 |
-
if __name__ == "__main__":
|
| 64 |
-
test_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_hybrid.py
DELETED
|
@@ -1,98 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script for hybrid model loading
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import requests
|
| 7 |
-
import json
|
| 8 |
-
import time
|
| 9 |
-
|
| 10 |
-
def test_hybrid_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
|
| 11 |
-
"""Test the hybrid API"""
|
| 12 |
-
|
| 13 |
-
print(f"Testing hybrid API at {base_url}")
|
| 14 |
-
|
| 15 |
-
# Test health endpoint first
|
| 16 |
-
try:
|
| 17 |
-
response = requests.get(f"{base_url}/health")
|
| 18 |
-
print(f"✓ Health endpoint: {response.status_code}")
|
| 19 |
-
if response.status_code == 200:
|
| 20 |
-
health_data = response.json()
|
| 21 |
-
print(f" Startup models loaded: {health_data.get('startup_models_loaded', False)}")
|
| 22 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
| 23 |
-
print(f" Note: {health_data.get('note', 'N/A')}")
|
| 24 |
-
else:
|
| 25 |
-
print(f" Error: {response.text}")
|
| 26 |
-
except Exception as e:
|
| 27 |
-
print(f"✗ Health endpoint failed: {e}")
|
| 28 |
-
return False
|
| 29 |
-
|
| 30 |
-
# Test startup model (jina-v3)
|
| 31 |
-
try:
|
| 32 |
-
payload = {
|
| 33 |
-
"texts": ["Hola mundo", "Bonjour le monde"],
|
| 34 |
-
"model": "jina-v3",
|
| 35 |
-
"normalize": True
|
| 36 |
-
}
|
| 37 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
| 38 |
-
print(f"✓ Startup model (jina-v3): {response.status_code}")
|
| 39 |
-
if response.status_code == 200:
|
| 40 |
-
data = response.json()
|
| 41 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
| 42 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
| 43 |
-
else:
|
| 44 |
-
print(f" Error: {response.text}")
|
| 45 |
-
except Exception as e:
|
| 46 |
-
print(f"✗ Startup model test failed: {e}")
|
| 47 |
-
|
| 48 |
-
# Test startup model (roberta-ca)
|
| 49 |
-
try:
|
| 50 |
-
payload = {
|
| 51 |
-
"texts": ["Bon dia", "Com estàs?"],
|
| 52 |
-
"model": "roberta-ca",
|
| 53 |
-
"normalize": True
|
| 54 |
-
}
|
| 55 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
| 56 |
-
print(f"✓ Startup model (roberta-ca): {response.status_code}")
|
| 57 |
-
if response.status_code == 200:
|
| 58 |
-
data = response.json()
|
| 59 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
| 60 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
| 61 |
-
else:
|
| 62 |
-
print(f" Error: {response.text}")
|
| 63 |
-
except Exception as e:
|
| 64 |
-
print(f"✗ Startup model test failed: {e}")
|
| 65 |
-
|
| 66 |
-
# Test on-demand model (jina)
|
| 67 |
-
try:
|
| 68 |
-
payload = {
|
| 69 |
-
"texts": ["Texto en español"],
|
| 70 |
-
"model": "jina",
|
| 71 |
-
"normalize": True
|
| 72 |
-
}
|
| 73 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
| 74 |
-
print(f"✓ On-demand model (jina): {response.status_code}")
|
| 75 |
-
if response.status_code == 200:
|
| 76 |
-
data = response.json()
|
| 77 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
| 78 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
| 79 |
-
else:
|
| 80 |
-
print(f" Error: {response.text}")
|
| 81 |
-
except Exception as e:
|
| 82 |
-
print(f"✗ On-demand model test failed: {e}")
|
| 83 |
-
|
| 84 |
-
# Check health again to see all models
|
| 85 |
-
try:
|
| 86 |
-
response = requests.get(f"{base_url}/health")
|
| 87 |
-
if response.status_code == 200:
|
| 88 |
-
health_data = response.json()
|
| 89 |
-
print(f"✓ Final health check:")
|
| 90 |
-
print(f" All models loaded: {health_data.get('all_models_loaded', False)}")
|
| 91 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
| 92 |
-
except Exception as e:
|
| 93 |
-
print(f"✗ Final health check failed: {e}")
|
| 94 |
-
|
| 95 |
-
return True
|
| 96 |
-
|
| 97 |
-
if __name__ == "__main__":
|
| 98 |
-
test_hybrid_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|