Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,9 +5,14 @@ from sentence_transformers import SentenceTransformer
|
|
| 5 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 6 |
from qdrant_client import models
|
| 7 |
import logging
|
|
|
|
| 8 |
|
| 9 |
# --- Setup Logging ---
|
| 10 |
-
logging
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
# --- Configuration ---
|
|
@@ -53,14 +58,15 @@ async def load_models():
|
|
| 53 |
This ensures models are loaded only once.
|
| 54 |
"""
|
| 55 |
global dense_model, splade_tokenizer, splade_model
|
| 56 |
-
logger.info(
|
|
|
|
| 57 |
try:
|
| 58 |
dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
|
| 59 |
splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
|
| 60 |
splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
|
| 61 |
-
logger.info("
|
| 62 |
except Exception as e:
|
| 63 |
-
logger.
|
| 64 |
# In a real-world scenario, you might want the app to fail startup if models don't load.
|
| 65 |
raise e
|
| 66 |
|
|
@@ -107,27 +113,36 @@ async def vectorize_query(request: QueryRequest):
|
|
| 107 |
Returns:
|
| 108 |
A JSON response containing the dense and sparse vectors.
|
| 109 |
"""
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
# 1. Generate Dense Vector
|
| 113 |
-
logger.info("
|
| 114 |
dense_query_vector = dense_model.encode(request.query_text).tolist()
|
| 115 |
-
logger.info("
|
|
|
|
| 116 |
|
| 117 |
# 2. Generate Sparse Vector
|
| 118 |
-
logger.info("
|
| 119 |
sparse_query_vector = compute_splade_vector(request.query_text)
|
| 120 |
-
logger.info("Sparse vector
|
|
|
|
| 121 |
|
| 122 |
# 3. Construct and return the response
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
| 124 |
dense_vector=dense_query_vector,
|
| 125 |
sparse_vector=SparseVectorResponse(
|
| 126 |
indices=sparse_query_vector.indices,
|
| 127 |
values=sparse_query_vector.values
|
| 128 |
)
|
| 129 |
)
|
|
|
|
| 130 |
|
| 131 |
@app.get("/", include_in_schema=False)
|
| 132 |
async def root():
|
| 133 |
-
return {"message": "Vector Generation API is running. -- VERSION 2 --"}
|
|
|
|
| 5 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 6 |
from qdrant_client import models
|
| 7 |
import logging
|
| 8 |
+
import json
|
| 9 |
|
| 10 |
# --- Setup Logging ---
|
| 11 |
+
# Configure logging to be more descriptive
|
| 12 |
+
logging.basicConfig(
|
| 13 |
+
level=logging.INFO,
|
| 14 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 15 |
+
)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
# --- Configuration ---
|
|
|
|
| 58 |
This ensures models are loaded only once.
|
| 59 |
"""
|
| 60 |
global dense_model, splade_tokenizer, splade_model
|
| 61 |
+
logger.info("Server is starting up... Time to load the ML models.")
|
| 62 |
+
logger.info(f"I'll be using the '{DEVICE}' for processing.")
|
| 63 |
try:
|
| 64 |
dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
|
| 65 |
splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
|
| 66 |
splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
|
| 67 |
+
logger.info("Great news! All models have been loaded successfully.")
|
| 68 |
except Exception as e:
|
| 69 |
+
logger.critical(f"Oh no, a critical error occurred while loading models: {e}", exc_info=True)
|
| 70 |
# In a real-world scenario, you might want the app to fail startup if models don't load.
|
| 71 |
raise e
|
| 72 |
|
|
|
|
| 113 |
Returns:
|
| 114 |
A JSON response containing the dense and sparse vectors.
|
| 115 |
"""
|
| 116 |
+
# --- n8n Logging ---
|
| 117 |
+
logger.info("=========================================================")
|
| 118 |
+
logger.info("A new request just arrived! Let's see what we've got.")
|
| 119 |
+
logger.info(f"The incoming search query from n8n is: '{request.query_text}'")
|
| 120 |
+
|
| 121 |
# 1. Generate Dense Vector
|
| 122 |
+
logger.info("First, I'm generating the dense vector for semantic meaning...")
|
| 123 |
dense_query_vector = dense_model.encode(request.query_text).tolist()
|
| 124 |
+
logger.info("Done with the dense vector. It has %d dimensions.", len(dense_query_vector))
|
| 125 |
+
logger.info("Here's a small sample of the dense vector: %s...", str(dense_query_vector[:4]))
|
| 126 |
|
| 127 |
# 2. Generate Sparse Vector
|
| 128 |
+
logger.info("Next up, creating the sparse vector for keyword matching...")
|
| 129 |
sparse_query_vector = compute_splade_vector(request.query_text)
|
| 130 |
+
logger.info("Sparse vector is ready. It contains %d important terms.", len(sparse_query_vector.indices))
|
| 131 |
+
logger.info("Here's a sample of the sparse vector indices: %s...", str(sparse_query_vector.indices[:4]))
|
| 132 |
|
| 133 |
# 3. Construct and return the response
|
| 134 |
+
logger.info("Everything looks good. I'm packaging up the vectors to send back.")
|
| 135 |
+
logger.info("=========================================================")
|
| 136 |
+
|
| 137 |
+
final_response = VectorResponse(
|
| 138 |
dense_vector=dense_query_vector,
|
| 139 |
sparse_vector=SparseVectorResponse(
|
| 140 |
indices=sparse_query_vector.indices,
|
| 141 |
values=sparse_query_vector.values
|
| 142 |
)
|
| 143 |
)
|
| 144 |
+
return final_response
|
| 145 |
|
| 146 |
@app.get("/", include_in_schema=False)
|
| 147 |
async def root():
|
| 148 |
+
return {"message": "Vector Generation API is running. -- VERSION 2 --"}
|