Spaces:

AuraSystems
/

spanish-embeddings-api

Sleeping

App Files Files Community

Jordi Catafal commited on Jun 3

Commit

023e423

1 Parent(s): 03eefac

cleaning + readme

Browse files

Files changed (7) hide show

README.md +259 -334
app_endpoints.py +0 -308
app_hybrid_backup.py +0 -189
app_old.py +0 -159
app_old_minimal.py +0 -165
test_api.py +0 -64
test_hybrid.py +0 -98

README.md CHANGED Viewed

@@ -7,54 +7,67 @@ sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
---------------------------------
 # Multilingual & Legal Embeddings API
-A high-performance API for generating embeddings from Spanish, Catalan, English, and multilingual text using state-of-the-art models. This API provides access to five specialized models optimized for different use cases and languages.
 ## 🚀 Quick Start
-**Base URL**: `https://aurasystems-spanish-embeddings-api.hf.space`
-**Interactive Documentation**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
-## 📚 Available Models
-| Model | Max Tokens | Languages | Dimensions | Best Use Case |
-|-------|------------|-----------|------------|---------------|
-| **jina** | 8,192 | Spanish, English | 768 | General purpose, long documents, cross-lingual tasks |
-| **robertalex** | 512 | Spanish | 768 | Spanish legal documents, formal Spanish |
-| **jina-v3** | 8,192 | Multilingual (30+ languages) | 1,024 | Superior multilingual embeddings, long context |
-| **legal-bert** | 512 | English | 768 | English legal documents, contracts, law texts |
-| **roberta-ca** | 512 | Catalan | 1,024 | Catalan text, general purpose, RoBERTa-large architecture |
 ## 🔗 API Endpoints
-### Generate Embeddings
-```
-POST /embed
-```
-Generate embeddings for up to 50 texts in a single request.
-### List Models
-```
-GET /models
-```
-Get detailed information about available models.
-### Health Check
 ```
-GET /health
 ```
-Check API status and model availability.
-### API Info
 ```
-GET /
 ```
-Basic API information and status.
 ## 📖 Usage Examples
@@ -62,164 +75,100 @@ Basic API information and status.
 ```python
 import requests
-import numpy as np
 API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
-# Example 1: Basic usage with Jina v2 Spanish
 response = requests.post(
-    f"{API_URL}/embed",
     json={
-        "texts": ["Hola, ¿cómo estás?", "Me gusta programar en Python"],
-        "model": "jina",
         "normalize": True
     }
 )
 result = response.json()
-embeddings = result["embeddings"]
-print(f"Generated {len(embeddings)} embeddings of {result['dimensions']} dimensions")
-# Example 2: Using Jina v3 for multilingual texts
-multilingual_response = requests.post(
-    f"{API_URL}/embed",
     json={
         "texts": [
-            "Hello world",  # English
-            "Hola mundo",   # Spanish
-            "Bonjour le monde",  # French
-            "Hallo Welt"    # German
         ],
-        "model": "jina-v3",
         "normalize": True
     }
 )
-print(f"Jina v3 dimensions: {multilingual_response.json()['dimensions']}")  # 1024 dims
-# Example 3: Catalan text with RoBERTa-ca
-catalan_response = requests.post(
-    f"{API_URL}/embed",
     json={
         "texts": [
-            "Bon dia, com estàs?",
-            "M'agrada programar en Python",
-            "Barcelona és una ciutat meravellosa"
         ],
-        "model": "roberta-ca",
         "normalize": True
     }
 )
-print(f"Catalan RoBERTa dimensions: {catalan_response.json()['dimensions']}")  # 1024 dims
-# Example 4: Legal text with RoBERTalex (Spanish)
-spanish_legal_response = requests.post(
-    f"{API_URL}/embed",
     json={
         "texts": [
-            "El contrato será válido desde la fecha de firma",
-            "La validez contractual inicia en el momento de suscripción"
         ],
-        "model": "robertalex",
         "normalize": True
     }
 )
-# Example 5: Legal text with Legal-BERT (English)
-english_legal_response = requests.post(
-    f"{API_URL}/embed",
     json={
         "texts": [
-            "The contract shall be valid from the date of signature",
-            "This agreement is governed by the laws of the state"
         ],
-        "model": "legal-bert",
         "normalize": True
     }
 )
-# Example 6: Compare similarity across models
-text_es = "inteligencia artificial"
-text_ca = "intel·ligència artificial"
-models_comparison = {}
-for model, text in [("jina", text_es), ("roberta-ca", text_ca), ("jina-v3", text_es)]:
-    resp = requests.post(
-        f"{API_URL}/embed",
-        json={"texts": [text], "model": model, "normalize": True}
-    )
-    models_comparison[model] = resp.json()["dimensions"]
-print("Embedding dimensions by model:", models_comparison)
-```
-### cURL
-```bash
-# Basic embedding generation with Jina v2 Spanish
-curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
-     -H "Content-Type: application/json" \
-     -d '{
-       "texts": ["Texto de ejemplo", "Otro texto en español"],
-       "model": "jina",
-       "normalize": true
-     }'
-# Catalan text with RoBERTa-ca
-curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
-     -H "Content-Type: application/json" \
-     -d '{
-       "texts": ["Bon dia", "Com està vostè?", "Catalunya és meravellosa"],
-       "model": "roberta-ca",
-       "normalize": true
-     }'
-# Using Jina v3 for multilingual embeddings
-curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
-     -H "Content-Type: application/json" \
-     -d '{
-       "texts": ["Hello world", "Hola mundo", "Bonjour le monde"],
-       "model": "jina-v3",
-       "normalize": true
-     }'
-# English legal text with Legal-BERT
-curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
-     -H "Content-Type: application/json" \
-     -d '{
-       "texts": ["This agreement is legally binding"],
-       "model": "legal-bert",
-       "normalize": true
-     }'
-# Spanish legal text with RoBERTalex
-curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
-     -H "Content-Type: application/json" \
-     -d '{
-       "texts": ["Artículo primero de la constitución"],
-       "model": "robertalex",
-       "normalize": true,
-       "max_length": 512
-     }'
-# Get all model information
-curl "https://aurasystems-spanish-embeddings-api.hf.space/models"
 ```
-### JavaScript/TypeScript
 ```javascript
 const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
-// Basic function to get embeddings
-async function getEmbeddings(texts, model = 'jina') {
-    const response = await fetch(`${API_URL}/embed`, {
         method: 'POST',
         headers: {
             'Content-Type': 'application/json',
         },
         body: JSON.stringify({
             texts: texts,
-            model: model,
             normalize: true
         })
     });
@@ -231,104 +180,79 @@ async function getEmbeddings(texts, model = 'jina') {
     return await response.json();
 }
-// Usage example
 try {
-    const result = await getEmbeddings([
         'Hola mundo',
-        'Programación en JavaScript'
     ]);
-    console.log('Embeddings:', result.embeddings);
-    console.log('Dimensions:', result.dimensions);
 } catch (error) {
-    console.error('Error generating embeddings:', error);
 }
 ```
-### Using with LangChain
-```python
-from langchain.embeddings.base import Embeddings
-from typing import List
-import requests
-class MultilingualEmbeddings(Embeddings):
-    """Custom LangChain embeddings class for multilingual text"""
-    def __init__(self, model: str = "jina-v3"):
-        """
-        Initialize embeddings
-        Args:
-            model: One of "jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"
-        """
-        self.api_url = "https://aurasystems-spanish-embeddings-api.hf.space/embed"
-        self.model = model
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        response = requests.post(
-            self.api_url,
-            json={
-                "texts": texts,
-                "model": self.model,
-                "normalize": True
-            }
-        )
-        response.raise_for_status()
-        return response.json()["embeddings"]
-    def embed_query(self, text: str) -> List[float]:
-        return self.embed_documents([text])[0]
-# Usage examples with different models
-# Spanish embeddings
-spanish_embeddings = MultilingualEmbeddings(model="jina")
-spanish_docs = spanish_embeddings.embed_documents([
-    "Primer documento en español",
-    "Segundo documento en español"
-])
-# Catalan embeddings
-catalan_embeddings = MultilingualEmbeddings(model="roberta-ca")
-catalan_docs = catalan_embeddings.embed_documents([
-    "Primer document en català",
-    "Segon document en català",
-    "La cultura catalana és rica i diversa"
-])
-# Multilingual embeddings with Jina v3
-multilingual_embeddings = MultilingualEmbeddings(model="jina-v3")
-mixed_docs = multilingual_embeddings.embed_documents([
-    "English document",
-    "Documento en español",
-    "Document en français",
-    "Document en català"
-])
-# Legal embeddings for English
-legal_embeddings = MultilingualEmbeddings(model="legal-bert")
-legal_docs = legal_embeddings.embed_documents([
-    "This contract is governed by English law",
-    "The party shall indemnify and hold harmless"
-])
-# Spanish legal embeddings
-spanish_legal_embeddings = MultilingualEmbeddings(model="robertalex")
-spanish_legal_docs = spanish_legal_embeddings.embed_documents([
-    "Artículo 1: De los derechos fundamentales",
-    "La presente ley entrará en vigor"
-])
 ```
-## 📋 Request/Response Formats
-### Request Body Schema
 ```json
 {
-    "texts": [
-        "string"
-    ],
-    "model": "jina",
     "normalize": true,
     "max_length": null
 }
@@ -336,18 +260,17 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| texts | array[string] | Yes | - | List of texts to embed (1-50 texts) |
-| model | string | No | "jina" | Model to use: "jina" or "robertalex" |
-| normalize | boolean | No | true | Whether to L2-normalize embeddings |
-| max_length | integer/null | No | null | Maximum tokens per text (null = model default) |
-### Response Schema
 ```json
 {
-    "embeddings": [[0.123, -0.456, ...]],
-    "model_used": "jina",
-    "dimensions": 768,
     "num_texts": 2
 }
 ```
@@ -355,166 +278,168 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
 ## ⚡ Performance & Limits
 - **Maximum texts per request**: 50
-- **Maximum concurrent requests**: 4 (on free tier)
-- **Typical response time**: 100-200ms for 10 texts
-- **Embedding dimensions**: 768 (both models)
-- **API availability**: 24/7 on Hugging Face Spaces
 ## 🔧 Advanced Usage
-### Batch Processing
-For processing large datasets, implement batching:
 ```python
-def process_large_dataset(texts, batch_size=50):
-    """Process large text dataset in batches"""
-    embeddings = []
-    for i in range(0, len(texts), batch_size):
-        batch = texts[i:i + batch_size]
         response = requests.post(
-            "https://aurasystems-spanish-embeddings-api.hf.space/embed",
-            json={
-                "texts": batch,
-                "model": "jina",
-                "normalize": True
-            }
         )
-        embeddings.extend(response.json()["embeddings"])
-    return embeddings
 ```
-### Semantic Search Example
 ```python
 import numpy as np
 from typing import List, Tuple
-def semantic_search(
-    query: str,
-    documents: List[str],
-    top_k: int = 5
-) -> List[Tuple[int, float]]:
-    """Find most similar documents to query"""
-    # Get embeddings for query and documents
     response = requests.post(
-        "https://aurasystems-spanish-embeddings-api.hf.space/embed",
-        json={
-            "texts": [query] + documents,
-            "model": "jina",
-            "normalize": True
-        }
     )
     embeddings = np.array(response.json()["embeddings"])
     query_embedding = embeddings[0]
     doc_embeddings = embeddings[1:]
-    # Calculate similarities
     similarities = np.dot(doc_embeddings, query_embedding)
-    # Get top-k results
     top_indices = np.argsort(similarities)[::-1][:top_k]
     return [(idx, similarities[idx]) for idx in top_indices]
-# Example usage
 documents = [
-    "Python es un lenguaje de programación",
-    "Madrid es la capital de España",
-    "El machine learning está revolucionando la tecnología",
-    "La paella es un plato típico español"
 ]
-results = semantic_search(
-    "inteligencia artificial y programación",
-    documents,
-    top_k=2
-)
 for idx, score in results:
-    print(f"Document: {documents[idx]}")
-    print(f"Similarity: {score:.4f}\n")
 ```
 ## 🚨 Error Handling
-The API returns standard HTTP status codes:
-| Status Code | Description |
-|-------------|-------------|
 | 200 | Success |
-| 400 | Bad Request (invalid parameters) |
-| 422 | Validation Error (check request format) |
-| 429 | Too Many Requests (rate limit exceeded) |
-| 500 | Internal Server Error |
-### Error Response Format
-```json
-{
-    "detail": "Error message description"
-}
 ```
-### Common Errors and Solutions
-1. **Invalid max_length**
-   ```json
-   {
-     "detail": "Value error, Max length must be positive"
-   }
-   ```
-   **Solution**: Use a positive integer or omit max_length
-2. **Too many texts**
-   ```json
-   {
-     "detail": "Maximum 50 texts per request"
-   }
-   ```
-   **Solution**: Batch your requests
-3. **Empty texts**
-   ```json
-   {
-     "detail": "Empty texts are not allowed"
-   }
-   ```
-   **Solution**: Filter out empty strings before sending
-## 🔒 Authentication
-This API is currently **open and does not require authentication**. It's hosted on Hugging Face Spaces and is free to use within the rate limits.
-## 📊 Monitoring
-Check API status and health:
-```python
-# Health check
-health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
-print(health.json())
-# Output: {'status': 'healthy', 'models_loaded': True, 'available_models': ['jina', 'robertalex']}
-```
-## 🤝 Support
-- **Issues**: Create an issue in the [Hugging Face Space discussions](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
-- **Documentation**: Visit the [interactive API docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
-- **Model Information**:
-  - [Jina Embeddings v2 Spanish](https://huggingface.co/jinaai/jina-embeddings-v2-base-es)
-  - [RoBERTalex](https://huggingface.co/PlanTL-GOB-ES/RoBERTalex)
-## 📄 License
-This API is provided as-is for research and commercial use. The underlying models have their own licenses:
-- Jina models: Apache 2.0
-- RoBERTalex: Apache 2.0
 ---
-Built with ❤️ using FastAPI and Hugging Face Transformers

 pinned: false
 ---
 # Multilingual & Legal Embeddings API
+A high-performance FastAPI application providing access to **5 specialized embedding models** for Spanish, Catalan, English, and multilingual text. Each model has its own dedicated endpoint for optimal performance and clarity.
+🌐 **Live API**: [https://aurasystems-spanish-embeddings-api.hf.space](https://aurasystems-spanish-embeddings-api.hf.space)
+📖 **Interactive Docs**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
 ## 🚀 Quick Start
+### Basic Usage
+```bash
+# Test jina-v3 endpoint (multilingual, loads at startup)
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
+     -H "Content-Type: application/json" \
+     -d '{"texts": ["Hello world", "Hola mundo"], "normalize": true}'
+# Test Catalan RoBERTa endpoint
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
+     -H "Content-Type: application/json" \
+     -d '{"texts": ["Bon dia", "Com estàs?"], "normalize": true}'
+```
+## 📚 Available Models & Endpoints
+| Endpoint | Model | Languages | Dimensions | Max Tokens | Loading Strategy |
+|----------|--------|-----------|------------|------------|------------------|
+| `/embed/jina-v3` | jinaai/jina-embeddings-v3 | Multilingual (30+) | 1024 | 8192 | **Startup** |
+| `/embed/roberta-ca` | projecte-aina/roberta-large-ca-v2 | Catalan | 1024 | 512 | On-demand |
+| `/embed/jina` | jinaai/jina-embeddings-v2-base-es | Spanish, English | 768 | 8192 | On-demand |
+| `/embed/robertalex` | PlanTL-GOB-ES/RoBERTalex | Spanish Legal | 768 | 512 | On-demand |
+| `/embed/legal-bert` | nlpaueb/legal-bert-base-uncased | English Legal | 768 | 512 | On-demand |
+### Model Recommendations
+- **🌍 General multilingual**: Use `/embed/jina-v3` - Best overall performance
+- **🇪🇸 Spanish general**: Use `/embed/jina` - Excellent for Spanish/English
+- **🇪🇸 Spanish legal**: Use `/embed/robertalex` - Specialized for legal texts
+- **🏴󠁧󠁢󠁣󠁡󠁴󠁿 Catalan**: Use `/embed/roberta-ca` - Best for Catalan text
+- **🇬🇧 English legal**: Use `/embed/legal-bert` - Specialized for legal documents
 ## 🔗 API Endpoints
+### Model-Specific Embedding Endpoints
+Each model has its dedicated endpoint:
 ```
+POST /embed/jina-v3      # Multilingual (startup model)
+POST /embed/roberta-ca   # Catalan
+POST /embed/jina         # Spanish/English
+POST /embed/robertalex   # Spanish Legal
+POST /embed/legal-bert   # English Legal
 ```
+### Utility Endpoints
 ```
+GET /                    # API information
+GET /health             # Health check and model status
+GET /models             # List all models with specifications
 ```
 ## 📖 Usage Examples
 ```python
 import requests
 API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
+# Example 1: Multilingual with Jina v3 (startup model - fastest)
 response = requests.post(
+    f"{API_URL}/embed/jina-v3",
     json={
+        "texts": [
+            "Hello world",      # English
+            "Hola mundo",       # Spanish
+            "Bonjour monde",    # French
+            "こんにちは世界"     # Japanese
+        ],
         "normalize": True
     }
 )
 result = response.json()
+print(f"Jina v3: {result['dimensions']} dimensions")  # 1024
+# Example 2: Catalan text with RoBERTa-ca
+response = requests.post(
+    f"{API_URL}/embed/roberta-ca",
     json={
         "texts": [
+            "Bon dia, com estàs?",
+            "Barcelona és una ciutat meravellosa",
+            "M'agrada la cultura catalana"
         ],
         "normalize": True
     }
 )
+catalan_result = response.json()
+print(f"Catalan: {catalan_result['dimensions']} dimensions")  # 1024
+# Example 3: Spanish legal text with RoBERTalex
+response = requests.post(
+    f"{API_URL}/embed/robertalex",
     json={
         "texts": [
+            "Artículo primero de la constitución",
+            "El contrato será válido desde la fecha de firma",
+            "La jurisprudencia establece que..."
         ],
         "normalize": True
     }
 )
+legal_result = response.json()
+print(f"Spanish Legal: {legal_result['dimensions']} dimensions")  # 768
+# Example 4: English legal text with Legal-BERT
+response = requests.post(
+    f"{API_URL}/embed/legal-bert",
     json={
         "texts": [
+            "This agreement is legally binding",
+            "The contract shall be governed by English law",
+            "The party hereby agrees and covenants"
         ],
         "normalize": True
     }
 )
+english_legal_result = response.json()
+print(f"English Legal: {english_legal_result['dimensions']} dimensions")  # 768
+# Example 5: Spanish/English bilingual with Jina v2
+response = requests.post(
+    f"{API_URL}/embed/jina",
     json={
         "texts": [
+            "Inteligencia artificial y machine learning",
+            "Artificial intelligence and machine learning",
+            "Procesamiento de lenguaje natural"
         ],
         "normalize": True
     }
 )
+bilingual_result = response.json()
+print(f"Bilingual: {bilingual_result['dimensions']} dimensions")  # 768
 ```
+### JavaScript/Node.js
 ```javascript
 const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
+// Function to get embeddings from specific endpoint
+async function getEmbeddings(endpoint, texts) {
+    const response = await fetch(`${API_URL}/embed/${endpoint}`, {
         method: 'POST',
         headers: {
             'Content-Type': 'application/json',
         },
         body: JSON.stringify({
             texts: texts,
             normalize: true
         })
     });
     return await response.json();
 }
+// Usage examples
 try {
+    // Multilingual embeddings
+    const multilingualResult = await getEmbeddings('jina-v3', [
+        'Hello world',
         'Hola mundo',
+        'Ciao mondo'
+    ]);
+    console.log('Multilingual dimensions:', multilingualResult.dimensions);
+    // Catalan embeddings
+    const catalanResult = await getEmbeddings('roberta-ca', [
+        'Bon dia',
+        'Com estàs?'
     ]);
+    console.log('Catalan dimensions:', catalanResult.dimensions);
 } catch (error) {
+    console.error('Error:', error);
 }
 ```
+### cURL Examples
+```bash
+# Multilingual with Jina v3 (startup model)
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["Hello", "Hola", "Bonjour"],
+       "normalize": true
+     }'
+# Catalan with RoBERTa-ca
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["Bon dia", "Com estàs?"],
+       "normalize": true
+     }'
+# Spanish legal with RoBERTalex
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/robertalex" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["Artículo primero"],
+       "normalize": true
+     }'
+# English legal with Legal-BERT
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/legal-bert" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["This agreement is binding"],
+       "normalize": true
+     }'
+# Spanish/English bilingual with Jina v2
+curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["Texto en español", "Text in English"],
+       "normalize": true
+     }'
 ```
+## 📋 Request/Response Schema
+### Request Body
 ```json
 {
+    "texts": ["text1", "text2", "..."],
     "normalize": true,
     "max_length": null
 }
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
+| `texts` | array[string] | ✅ Yes | - | 1-50 texts to embed |
+| `normalize` | boolean | No | `true` | L2-normalize embeddings |
+| `max_length` | integer/null | No | `null` | Max tokens (model-specific limits) |
+### Response Body
 ```json
 {
+    "embeddings": [[0.123, -0.456, ...], [0.789, -0.012, ...]],
+    "model_used": "jina-v3",
+    "dimensions": 1024,
     "num_texts": 2
 }
 ```
 ## ⚡ Performance & Limits
 - **Maximum texts per request**: 50
+- **Startup model**: `jina-v3` loads at startup (fastest response)
+- **On-demand models**: Load on first request (~30-60s first time)
+- **Typical response time**: 100-300ms after models are loaded
+- **Memory optimization**: Automatic cleanup for large batches
+- **CORS enabled**: Works from any domain
 ## 🔧 Advanced Usage
+### LangChain Integration
 ```python
+from langchain.embeddings.base import Embeddings
+from typing import List
+import requests
+class MultilingualEmbeddings(Embeddings):
+    """LangChain integration for multilingual embeddings"""
+    def __init__(self, endpoint: str = "jina-v3"):
+        """
+        Initialize with specific endpoint
+        Args:
+            endpoint: One of "jina-v3", "roberta-ca", "jina", "robertalex", "legal-bert"
+        """
+        self.api_url = f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}"
+        self.endpoint = endpoint
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
         response = requests.post(
+            self.api_url,
+            json={"texts": texts, "normalize": True}
         )
+        response.raise_for_status()
+        return response.json()["embeddings"]
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]
+# Usage examples
+multilingual_embeddings = MultilingualEmbeddings("jina-v3")
+catalan_embeddings = MultilingualEmbeddings("roberta-ca")
+spanish_legal_embeddings = MultilingualEmbeddings("robertalex")
 ```
+### Semantic Search
 ```python
 import numpy as np
 from typing import List, Tuple
+def semantic_search(query: str, documents: List[str], endpoint: str = "jina-v3", top_k: int = 5):
+    """Semantic search using specific model endpoint"""
     response = requests.post(
+        f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}",
+        json={"texts": [query] + documents, "normalize": True}
     )
     embeddings = np.array(response.json()["embeddings"])
     query_embedding = embeddings[0]
     doc_embeddings = embeddings[1:]
+    # Calculate cosine similarities (already normalized)
     similarities = np.dot(doc_embeddings, query_embedding)
     top_indices = np.argsort(similarities)[::-1][:top_k]
     return [(idx, similarities[idx]) for idx in top_indices]
+# Example: Multilingual search
 documents = [
+    "Python programming language",
+    "Lenguaje de programación Python",
+    "Llenguatge de programació Python",
+    "Language de programmation Python"
 ]
+results = semantic_search("código en Python", documents, "jina-v3")
 for idx, score in results:
+    print(f"{score:.4f}: {documents[idx]}")
 ```
 ## 🚨 Error Handling
+### HTTP Status Codes
+| Code | Description |
+|------|-------------|
 | 200 | Success |
+| 400 | Bad Request (validation error) |
+| 422 | Unprocessable Entity (schema error) |
+| 500 | Internal Server Error (model loading failed) |
+### Common Errors
+```python
+# Handle errors properly
+try:
+    response = requests.post(
+        "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3",
+        json={"texts": ["text"], "normalize": True}
+    )
+    response.raise_for_status()
+    result = response.json()
+except requests.exceptions.HTTPError as e:
+    print(f"HTTP error: {e}")
+    print(f"Response: {response.text}")
+except requests.exceptions.RequestException as e:
+    print(f"Request error: {e}")
 ```
+## 📊 Model Status Check
+```python
+# Check which models are loaded
+health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
+status = health.json()
+print(f"API Status: {status['status']}")
+print(f"Startup model loaded: {status['startup_model_loaded']}")
+print(f"Available models: {status['available_models']}")
+print(f"Models loaded: {status['models_count']}/5")
+# Check endpoint status
+for model, endpoint_status in status['endpoints'].items():
+    print(f"{model}: {endpoint_status}")
+```
+## 🔒 Authentication & Rate Limits
+- **Authentication**: None required (open API)
+- **Rate limits**: Generous limits on Hugging Face Spaces
+- **CORS**: Enabled for all origins
+- **Usage**: Free for research and commercial use
+## 🏗️ Architecture
+### Endpoint-Per-Model Design
+- **Startup model**: `jina-v3` loads at application startup for fastest response
+- **On-demand loading**: Other models load when first requested
+- **Memory optimization**: Progressive loading reduces startup time
+- **Model caching**: Once loaded, models remain in memory for fast inference
+### Technical Stack
+- **FastAPI**: Modern async web framework
+- **Transformers**: Hugging Face model library
+- **PyTorch**: Deep learning backend
+- **Docker**: Containerized deployment
+- **Hugging Face Spaces**: Cloud hosting platform
+## 📄 Model Licenses
+- **Jina models**: Apache 2.0
+- **RoBERTa models**: MIT/Apache 2.0
+- **Legal-BERT**: Apache 2.0
+## 🤝 Support & Contributing
+- **Issues**: [GitHub Issues](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
+- **Interactive Docs**: [FastAPI Swagger UI](https://aurasystems-spanish-embeddings-api.hf.space/docs)
+- **Model Papers**: Check individual model pages on Hugging Face
 ---
+Built with ❤️ using **FastAPI** and **Hugging Face Transformers**

app_endpoints.py DELETED Viewed

@@ -1,308 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
-from typing import List
-import torch
-import uvicorn
-from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
-from utils.helpers import load_models, get_embeddings, cleanup_memory
-# Global model cache
-models_cache = {}
-# Load jina-v3 at startup (most important model)
-STARTUP_MODEL = "jina-v3"
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifespan handler for startup and shutdown"""
-    # Startup - load jina-v3 model
-    try:
-        global models_cache
-        print(f"Loading startup model: {STARTUP_MODEL}...")
-        models_cache = load_models([STARTUP_MODEL])
-        print(f"Startup model loaded successfully: {list(models_cache.keys())}")
-        yield
-    except Exception as e:
-        print(f"Failed to load startup model: {str(e)}")
-        # Continue anyway - jina-v3 can be loaded on demand if startup fails
-        yield
-    finally:
-        # Shutdown - cleanup resources
-        cleanup_memory()
-def ensure_model_loaded(model_name: str, max_length_limit: int):
-    """Load a specific model on demand if not already loaded"""
-    global models_cache
-    if model_name not in models_cache:
-        try:
-            print(f"Loading model on demand: {model_name}...")
-            new_models = load_models([model_name])
-            models_cache.update(new_models)
-            print(f"Model {model_name} loaded successfully!")
-        except Exception as e:
-            print(f"Failed to load model {model_name}: {str(e)}")
-            raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
-def validate_request_for_model(request: EmbeddingRequest, model_name: str, max_length_limit: int):
-    """Validate request parameters for specific model"""
-    if not request.texts:
-        raise HTTPException(status_code=400, detail="No texts provided")
-    if len(request.texts) > 50:
-        raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
-    if request.max_length is not None and request.max_length > max_length_limit:
-        raise HTTPException(status_code=400, detail=f"Max length for {model_name} is {max_length_limit}")
-app = FastAPI(
-    title="Multilingual & Legal Embedding API",
-    description="Multi-model embedding API with dedicated endpoints per model",
-    version="4.0.0",
-    lifespan=lifespan
-)
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # In production, specify actual domains
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/")
-async def root():
-    return {
-        "message": "Multilingual & Legal Embedding API - Endpoint Per Model",
-        "version": "4.0.0",
-        "status": "running",
-        "docs": "/docs",
-        "startup_model": STARTUP_MODEL,
-        "available_endpoints": {
-            "jina-v3": "/embed/jina-v3",
-            "roberta-ca": "/embed/roberta-ca",
-            "jina": "/embed/jina",
-            "robertalex": "/embed/robertalex",
-            "legal-bert": "/embed/legal-bert"
-        }
-    }
-# Jina v3 - Multilingual (loads at startup)
-@app.post("/embed/jina-v3", response_model=EmbeddingResponse)
-async def embed_jina_v3(request: EmbeddingRequest):
-    """Generate embeddings using Jina v3 model (multilingual)"""
-    try:
-        ensure_model_loaded("jina-v3", 8192)
-        validate_request_for_model(request, "jina-v3", 8192)
-        embeddings = get_embeddings(
-            request.texts,
-            "jina-v3",
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used="jina-v3",
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-# Catalan RoBERTa
-@app.post("/embed/roberta-ca", response_model=EmbeddingResponse)
-async def embed_roberta_ca(request: EmbeddingRequest):
-    """Generate embeddings using Catalan RoBERTa model"""
-    try:
-        ensure_model_loaded("roberta-ca", 512)
-        validate_request_for_model(request, "roberta-ca", 512)
-        embeddings = get_embeddings(
-            request.texts,
-            "roberta-ca",
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used="roberta-ca",
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-# Jina v2 - Spanish/English
-@app.post("/embed/jina", response_model=EmbeddingResponse)
-async def embed_jina(request: EmbeddingRequest):
-    """Generate embeddings using Jina v2 Spanish/English model"""
-    try:
-        ensure_model_loaded("jina", 8192)
-        validate_request_for_model(request, "jina", 8192)
-        embeddings = get_embeddings(
-            request.texts,
-            "jina",
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used="jina",
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-# RoBERTalex - Spanish Legal
-@app.post("/embed/robertalex", response_model=EmbeddingResponse)
-async def embed_robertalex(request: EmbeddingRequest):
-    """Generate embeddings using RoBERTalex Spanish legal model"""
-    try:
-        ensure_model_loaded("robertalex", 512)
-        validate_request_for_model(request, "robertalex", 512)
-        embeddings = get_embeddings(
-            request.texts,
-            "robertalex",
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used="robertalex",
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-# Legal BERT - English Legal
-@app.post("/embed/legal-bert", response_model=EmbeddingResponse)
-async def embed_legal_bert(request: EmbeddingRequest):
-    """Generate embeddings using Legal BERT English model"""
-    try:
-        ensure_model_loaded("legal-bert", 512)
-        validate_request_for_model(request, "legal-bert", 512)
-        embeddings = get_embeddings(
-            request.texts,
-            "legal-bert",
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used="legal-bert",
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-@app.get("/models", response_model=List[ModelInfo])
-async def list_models():
-    """List available models and their specifications"""
-    return [
-        ModelInfo(
-            model_id="jina-v3",
-            name="jinaai/jina-embeddings-v3",
-            dimensions=1024,
-            max_sequence_length=8192,
-            languages=["Multilingual"],
-            model_type="multilingual",
-            description="Latest Jina v3 with superior multilingual performance - loaded at startup"
-        ),
-        ModelInfo(
-            model_id="roberta-ca",
-            name="projecte-aina/roberta-large-ca-v2",
-            dimensions=1024,
-            max_sequence_length=512,
-            languages=["Catalan"],
-            model_type="general",
-            description="Catalan RoBERTa-large model trained on large corpus"
-        ),
-        ModelInfo(
-            model_id="jina",
-            name="jinaai/jina-embeddings-v2-base-es",
-            dimensions=768,
-            max_sequence_length=8192,
-            languages=["Spanish", "English"],
-            model_type="bilingual",
-            description="Bilingual Spanish-English embeddings with long context support"
-        ),
-        ModelInfo(
-            model_id="robertalex",
-            name="PlanTL-GOB-ES/RoBERTalex",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["Spanish"],
-            model_type="legal domain",
-            description="Spanish legal domain specialized embeddings"
-        ),
-        ModelInfo(
-            model_id="legal-bert",
-            name="nlpaueb/legal-bert-base-uncased",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["English"],
-            model_type="legal domain",
-            description="English legal domain BERT model"
-        )
-    ]
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    startup_loaded = STARTUP_MODEL in models_cache
-    return {
-        "status": "healthy" if startup_loaded else "partial",
-        "startup_model": STARTUP_MODEL,
-        "startup_model_loaded": startup_loaded,
-        "available_models": list(models_cache.keys()),
-        "models_count": len(models_cache),
-        "endpoints": {
-            "jina-v3": f"/embed/jina-v3 {'(ready)' if 'jina-v3' in models_cache else '(loads on demand)'}",
-            "roberta-ca": f"/embed/roberta-ca {'(ready)' if 'roberta-ca' in models_cache else '(loads on demand)'}",
-            "jina": f"/embed/jina {'(ready)' if 'jina' in models_cache else '(loads on demand)'}",
-            "robertalex": f"/embed/robertalex {'(ready)' if 'robertalex' in models_cache else '(loads on demand)'}",
-            "legal-bert": f"/embed/legal-bert {'(ready)' if 'legal-bert' in models_cache else '(loads on demand)'}"
-        }
-    }
-if __name__ == "__main__":
-    # Set multi-threading for CPU
-    torch.set_num_threads(8)
-    torch.set_num_interop_threads(1)
-    uvicorn.run(app, host="0.0.0.0", port=7860)

app_hybrid_backup.py DELETED Viewed

@@ -1,189 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
-from typing import List
-import torch
-import uvicorn
-from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
-from utils.helpers import load_models, get_embeddings, cleanup_memory
-# Global model cache
-models_cache = {}
-# Models to load at startup (most frequently used)
-STARTUP_MODELS = ["jina-v3", "roberta-ca"]
-# Models to load on demand
-ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifespan handler for startup and shutdown"""
-    # Startup - load priority models
-    try:
-        global models_cache
-        print(f"Loading startup models: {STARTUP_MODELS}...")
-        models_cache = load_models(STARTUP_MODELS)
-        print(f"Startup models loaded successfully: {list(models_cache.keys())}")
-        yield
-    except Exception as e:
-        print(f"Failed to load startup models: {str(e)}")
-        # Continue anyway - models can be loaded on demand
-        yield
-    finally:
-        # Shutdown - cleanup resources
-        cleanup_memory()
-def ensure_model_loaded(model_name: str):
-    """Load a specific model on demand if not already loaded"""
-    global models_cache
-    if model_name not in models_cache:
-        if model_name in ON_DEMAND_MODELS:
-            try:
-                print(f"Loading model on demand: {model_name}...")
-                new_models = load_models([model_name])
-                models_cache.update(new_models)
-                print(f"Model {model_name} loaded successfully!")
-            except Exception as e:
-                print(f"Failed to load model {model_name}: {str(e)}")
-                raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
-        else:
-            raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
-app = FastAPI(
-    title="Multilingual & Legal Embedding API",
-    description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
-    version="3.0.0",
-    lifespan=lifespan
-)
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # In production, specify actual domains
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/")
-async def root():
-    return {
-        "message": "Multilingual & Legal Embedding API",
-        "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
-        "status": "running",
-        "docs": "/docs",
-        "total_models": 5
-    }
-@app.post("/embed", response_model=EmbeddingResponse)
-async def create_embeddings(request: EmbeddingRequest):
-    """Generate embeddings for input texts"""
-    try:
-        # Load specific model on demand if needed
-        ensure_model_loaded(request.model)
-        if not request.texts:
-            raise HTTPException(status_code=400, detail="No texts provided")
-        if len(request.texts) > 50:  # Rate limiting
-            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
-        embeddings = get_embeddings(
-            request.texts,
-            request.model,
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        # Cleanup memory after large batches
-        if len(request.texts) > 20:
-            cleanup_memory()
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used=request.model,
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-@app.get("/models", response_model=List[ModelInfo])
-async def list_models():
-    """List available models and their specifications"""
-    return [
-        ModelInfo(
-            model_id="jina",
-            name="jinaai/jina-embeddings-v2-base-es",
-            dimensions=768,
-            max_sequence_length=8192,
-            languages=["Spanish", "English"],
-            model_type="bilingual",
-            description="Bilingual Spanish-English embeddings with long context support"
-        ),
-        ModelInfo(
-            model_id="robertalex",
-            name="PlanTL-GOB-ES/RoBERTalex",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["Spanish"],
-            model_type="legal domain",
-            description="Spanish legal domain specialized embeddings"
-        ),
-        ModelInfo(
-            model_id="jina-v3",
-            name="jinaai/jina-embeddings-v3",
-            dimensions=1024,
-            max_sequence_length=8192,
-            languages=["Multilingual"],
-            model_type="multilingual",
-            description="Latest Jina v3 with superior multilingual performance"
-        ),
-        ModelInfo(
-            model_id="legal-bert",
-            name="nlpaueb/legal-bert-base-uncased",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["English"],
-            model_type="legal domain",
-            description="English legal domain BERT model"
-        ),
-        ModelInfo(
-            model_id="roberta-ca",
-            name="projecte-aina/roberta-large-ca-v2",
-            dimensions=1024,
-            max_sequence_length=512,
-            languages=["Catalan"],
-            model_type="general",
-            description="Catalan RoBERTa-large model trained on large corpus"
-        )
-    ]
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
-    all_models_loaded = len(models_cache) == 5
-    return {
-        "status": "healthy" if startup_models_loaded else "partial",
-        "startup_models_loaded": startup_models_loaded,
-        "all_models_loaded": all_models_loaded,
-        "available_models": list(models_cache.keys()),
-        "startup_models": STARTUP_MODELS,
-        "on_demand_models": ON_DEMAND_MODELS,
-        "models_count": len(models_cache),
-        "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
-    }
-if __name__ == "__main__":
-    # Set multi-threading for CPU
-    torch.set_num_threads(8)
-    torch.set_num_interop_threads(1)
-    uvicorn.run(app, host="0.0.0.0", port=7860)

app_old.py DELETED Viewed

@@ -1,159 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
-from typing import List
-import torch
-import uvicorn
-from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
-from utils.helpers import load_models, get_embeddings, cleanup_memory
-# Global model cache
-models_cache = {}
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifespan handler for startup and shutdown"""
-    # Startup
-    try:
-        global models_cache
-        print("Loading models...")
-        models_cache = load_models()
-        print("All models loaded successfully!")
-        yield
-    except Exception as e:
-        print(f"Failed to load models: {str(e)}")
-        raise
-    finally:
-        # Shutdown - cleanup resources
-        cleanup_memory()
-app = FastAPI(
-    title="Multilingual & Legal Embedding API",
-    description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
-    version="3.0.0",
-    lifespan=lifespan
-)
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # In production, specify actual domains
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/")
-async def root():
-    return {
-        "message": "Multilingual & Legal Embedding API",
-        "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
-        "status": "running",
-        "docs": "/docs",
-        "total_models": 5
-    }
-@app.post("/embed", response_model=EmbeddingResponse)
-async def create_embeddings(request: EmbeddingRequest):
-    """Generate embeddings for input texts"""
-    try:
-        if not request.texts:
-            raise HTTPException(status_code=400, detail="No texts provided")
-        if len(request.texts) > 50:  # Rate limiting
-            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
-        embeddings = get_embeddings(
-            request.texts,
-            request.model,
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        # Cleanup memory after large batches
-        if len(request.texts) > 20:
-            cleanup_memory()
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used=request.model,
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-@app.get("/models", response_model=List[ModelInfo])
-async def list_models():
-    """List available models and their specifications"""
-    return [
-        ModelInfo(
-            model_id="jina",
-            name="jinaai/jina-embeddings-v2-base-es",
-            dimensions=768,
-            max_sequence_length=8192,
-            languages=["Spanish", "English"],
-            model_type="bilingual",
-            description="Bilingual Spanish-English embeddings with long context support"
-        ),
-        ModelInfo(
-            model_id="robertalex",
-            name="PlanTL-GOB-ES/RoBERTalex",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["Spanish"],
-            model_type="legal domain",
-            description="Spanish legal domain specialized embeddings"
-        ),
-        ModelInfo(
-            model_id="jina-v3",
-            name="jinaai/jina-embeddings-v3",
-            dimensions=1024,
-            max_sequence_length=8192,
-            languages=["Multilingual"],
-            model_type="multilingual",
-            description="Latest Jina v3 with superior multilingual performance"
-        ),
-        ModelInfo(
-            model_id="legal-bert",
-            name="nlpaueb/legal-bert-base-uncased",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["English"],
-            model_type="legal domain",
-            description="English legal domain BERT model"
-        ),
-        ModelInfo(
-            model_id="roberta-ca",
-            name="projecte-aina/roberta-large-ca-v2",
-            dimensions=1024,
-            max_sequence_length=512,
-            languages=["Catalan"],
-            model_type="general",
-            description="Catalan RoBERTa-large model trained on large corpus"
-        )
-    ]
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    models_loaded = len(models_cache) == 5
-    return {
-        "status": "healthy" if models_loaded else "degraded",
-        "models_loaded": models_loaded,
-        "available_models": list(models_cache.keys()),
-        "expected_models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
-        "models_count": len(models_cache)
-    }
-if __name__ == "__main__":
-    # Set multi-threading for CPU
-    torch.set_num_threads(8)
-    torch.set_num_interop_threads(1)
-    uvicorn.run(app, host="0.0.0.0", port=7860)

app_old_minimal.py DELETED Viewed

@@ -1,165 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from typing import List
-import torch
-import uvicorn
-from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
-from utils.helpers import load_models, get_embeddings, cleanup_memory
-# Global model cache - completely on-demand loading
-models_cache = {}
-# All models load on demand to test deployment
-ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
-def ensure_model_loaded(model_name: str):
-    """Load a specific model on demand if not already loaded"""
-    global models_cache
-    if model_name not in models_cache:
-        if model_name in ON_DEMAND_MODELS:
-            try:
-                print(f"Loading model on demand: {model_name}...")
-                new_models = load_models([model_name])
-                models_cache.update(new_models)
-                print(f"Model {model_name} loaded successfully!")
-            except Exception as e:
-                print(f"Failed to load model {model_name}: {str(e)}")
-                raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
-        else:
-            raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
-app = FastAPI(
-    title="Multilingual & Legal Embedding API",
-    description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
-    version="3.0.0"
-)
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # In production, specify actual domains
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/")
-async def root():
-    return {
-        "message": "Multilingual & Legal Embedding API - Minimal Version",
-        "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
-        "status": "running",
-        "docs": "/docs",
-        "total_models": 5,
-        "note": "All models load on first request"
-    }
-@app.post("/embed", response_model=EmbeddingResponse)
-async def create_embeddings(request: EmbeddingRequest):
-    """Generate embeddings for input texts"""
-    try:
-        # Load specific model on demand
-        ensure_model_loaded(request.model)
-        if not request.texts:
-            raise HTTPException(status_code=400, detail="No texts provided")
-        if len(request.texts) > 50:  # Rate limiting
-            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
-        embeddings = get_embeddings(
-            request.texts,
-            request.model,
-            models_cache,
-            request.normalize,
-            request.max_length
-        )
-        # Cleanup memory after large batches
-        if len(request.texts) > 20:
-            cleanup_memory()
-        return EmbeddingResponse(
-            embeddings=embeddings,
-            model_used=request.model,
-            dimensions=len(embeddings[0]) if embeddings else 0,
-            num_texts=len(request.texts)
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
-@app.get("/models", response_model=List[ModelInfo])
-async def list_models():
-    """List available models and their specifications"""
-    return [
-        ModelInfo(
-            model_id="jina",
-            name="jinaai/jina-embeddings-v2-base-es",
-            dimensions=768,
-            max_sequence_length=8192,
-            languages=["Spanish", "English"],
-            model_type="bilingual",
-            description="Bilingual Spanish-English embeddings with long context support"
-        ),
-        ModelInfo(
-            model_id="robertalex",
-            name="PlanTL-GOB-ES/RoBERTalex",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["Spanish"],
-            model_type="legal domain",
-            description="Spanish legal domain specialized embeddings"
-        ),
-        ModelInfo(
-            model_id="jina-v3",
-            name="jinaai/jina-embeddings-v3",
-            dimensions=1024,
-            max_sequence_length=8192,
-            languages=["Multilingual"],
-            model_type="multilingual",
-            description="Latest Jina v3 with superior multilingual performance"
-        ),
-        ModelInfo(
-            model_id="legal-bert",
-            name="nlpaueb/legal-bert-base-uncased",
-            dimensions=768,
-            max_sequence_length=512,
-            languages=["English"],
-            model_type="legal domain",
-            description="English legal domain BERT model"
-        ),
-        ModelInfo(
-            model_id="roberta-ca",
-            name="projecte-aina/roberta-large-ca-v2",
-            dimensions=1024,
-            max_sequence_length=512,
-            languages=["Catalan"],
-            model_type="general",
-            description="Catalan RoBERTa-large model trained on large corpus"
-        )
-    ]
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    all_models_loaded = len(models_cache) == 5
-    return {
-        "status": "healthy",
-        "all_models_loaded": all_models_loaded,
-        "available_models": list(models_cache.keys()),
-        "on_demand_models": ON_DEMAND_MODELS,
-        "models_count": len(models_cache),
-        "note": "All models load on first embedding request - minimal deployment version"
-    }
-if __name__ == "__main__":
-    # Set multi-threading for CPU
-    torch.set_num_threads(8)
-    torch.set_num_interop_threads(1)
-    uvicorn.run(app, host="0.0.0.0", port=7860)

test_api.py DELETED Viewed

@@ -1,64 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple test script for the embedding API
-"""
-import requests
-import json
-import time
-def test_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
-    """Test the API endpoints"""
-    print(f"Testing API at {base_url}")
-    # Test root endpoint
-    try:
-        response = requests.get(f"{base_url}/")
-        print(f"✓ Root endpoint: {response.status_code}")
-        print(f"  Response: {response.json()}")
-    except Exception as e:
-        print(f"✗ Root endpoint failed: {e}")
-        return False
-    # Test health endpoint
-    try:
-        response = requests.get(f"{base_url}/health")
-        print(f"✓ Health endpoint: {response.status_code}")
-        health_data = response.json()
-        print(f"  Models loaded: {health_data.get('models_loaded', False)}")
-        print(f"  Available models: {health_data.get('available_models', [])}")
-    except Exception as e:
-        print(f"✗ Health endpoint failed: {e}")
-    # Test models endpoint
-    try:
-        response = requests.get(f"{base_url}/models")
-        print(f"✓ Models endpoint: {response.status_code}")
-        models = response.json()
-        print(f"  Found {len(models)} model definitions")
-    except Exception as e:
-        print(f"✗ Models endpoint failed: {e}")
-    # Test embedding endpoint
-    try:
-        payload = {
-            "texts": ["Hello world", "Test text"],
-            "model": "jina",
-            "normalize": True
-        }
-        response = requests.post(f"{base_url}/embed", json=payload)
-        print(f"✓ Embed endpoint: {response.status_code}")
-        if response.status_code == 200:
-            data = response.json()
-            print(f"  Generated {data.get('num_texts', 0)} embeddings")
-            print(f"  Dimensions: {data.get('dimensions', 0)}")
-        else:
-            print(f"  Error: {response.text}")
-    except Exception as e:
-        print(f"✗ Embed endpoint failed: {e}")
-    return True
-if __name__ == "__main__":
-    test_api()

test_hybrid.py DELETED Viewed

@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for hybrid model loading
-"""
-import requests
-import json
-import time
-def test_hybrid_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
-    """Test the hybrid API"""
-    print(f"Testing hybrid API at {base_url}")
-    # Test health endpoint first
-    try:
-        response = requests.get(f"{base_url}/health")
-        print(f"✓ Health endpoint: {response.status_code}")
-        if response.status_code == 200:
-            health_data = response.json()
-            print(f"  Startup models loaded: {health_data.get('startup_models_loaded', False)}")
-            print(f"  Available models: {health_data.get('available_models', [])}")
-            print(f"  Note: {health_data.get('note', 'N/A')}")
-        else:
-            print(f"  Error: {response.text}")
-    except Exception as e:
-        print(f"✗ Health endpoint failed: {e}")
-        return False
-    # Test startup model (jina-v3)
-    try:
-        payload = {
-            "texts": ["Hola mundo", "Bonjour le monde"],
-            "model": "jina-v3",
-            "normalize": True
-        }
-        response = requests.post(f"{base_url}/embed", json=payload)
-        print(f"✓ Startup model (jina-v3): {response.status_code}")
-        if response.status_code == 200:
-            data = response.json()
-            print(f"  Generated {data.get('num_texts', 0)} embeddings")
-            print(f"  Dimensions: {data.get('dimensions', 0)}")
-        else:
-            print(f"  Error: {response.text}")
-    except Exception as e:
-        print(f"✗ Startup model test failed: {e}")
-    # Test startup model (roberta-ca)
-    try:
-        payload = {
-            "texts": ["Bon dia", "Com estàs?"],
-            "model": "roberta-ca",
-            "normalize": True
-        }
-        response = requests.post(f"{base_url}/embed", json=payload)
-        print(f"✓ Startup model (roberta-ca): {response.status_code}")
-        if response.status_code == 200:
-            data = response.json()
-            print(f"  Generated {data.get('num_texts', 0)} embeddings")
-            print(f"  Dimensions: {data.get('dimensions', 0)}")
-        else:
-            print(f"  Error: {response.text}")
-    except Exception as e:
-        print(f"✗ Startup model test failed: {e}")
-    # Test on-demand model (jina)
-    try:
-        payload = {
-            "texts": ["Texto en español"],
-            "model": "jina",
-            "normalize": True
-        }
-        response = requests.post(f"{base_url}/embed", json=payload)
-        print(f"✓ On-demand model (jina): {response.status_code}")
-        if response.status_code == 200:
-            data = response.json()
-            print(f"  Generated {data.get('num_texts', 0)} embeddings")
-            print(f"  Dimensions: {data.get('dimensions', 0)}")
-        else:
-            print(f"  Error: {response.text}")
-    except Exception as e:
-        print(f"✗ On-demand model test failed: {e}")
-    # Check health again to see all models
-    try:
-        response = requests.get(f"{base_url}/health")
-        if response.status_code == 200:
-            health_data = response.json()
-            print(f"✓ Final health check:")
-            print(f"  All models loaded: {health_data.get('all_models_loaded', False)}")
-            print(f"  Available models: {health_data.get('available_models', [])}")
-    except Exception as e:
-        print(f"✗ Final health check failed: {e}")
-    return True
-if __name__ == "__main__":
-    test_hybrid_api()