Spaces:
Running
Running
| import time | |
| from loguru import logger | |
| from fastapi import FastAPI, HTTPException | |
| from contextlib import asynccontextmanager | |
| from models import RerankRequest, RerankResponse, RerankResult | |
| from core import ModelManager | |
| model_manager = None | |
| async def lifespan(app: FastAPI): | |
| """Application lifespan manager with model preloading.""" | |
| global model_manager | |
| # Startup | |
| logger.info("Starting reranking API...") | |
| try: | |
| model_manager = ModelManager("config.yaml") | |
| await model_manager.preload_all_models() | |
| logger.success("Reranking API startup complete!") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize models: {e}") | |
| raise | |
| yield | |
| # Shutdown | |
| logger.info("Shutting down reranking API...") | |
| app = FastAPI( | |
| title="Reranking API", | |
| description=""" | |
| High-performance API for document reranking using multiple state-of-the-art models. | |
| ✅ **Supported Models:** | |
| - **Qwen/Qwen3-Reranker-0.6B** | |
| - **BAAI/bge-reranker-v2-m3** | |
| - **jinaai/jina-reranker-v2-base-multilingual** | |
| 🚀 **Features:** | |
| - Multiple reranking models preloaded at startup | |
| - List all available models | |
| - Optional instruction-based reranking (Qwen3) | |
| ⚠️ **Warning**: Not for production use!. | |
| """, | |
| version="1.0.0", | |
| lifespan=lifespan | |
| ) | |
| async def rerank_documents(request: RerankRequest): | |
| """ | |
| Rerank documents based on relevance to query. | |
| This endpoint takes a query and list of documents, then returns them | |
| ranked by relevance using the specified reranking model. | |
| Args: | |
| request: RerankRequest containing query, documents, and model info | |
| Returns: | |
| RerankResponse with ranked documents, scores, and metadata | |
| Example: | |
| ```json | |
| { | |
| "query": "machine learning algorithms", | |
| "documents": [ | |
| "Deep learning uses neural networks", | |
| "Weather forecast for tomorrow", | |
| "Supervised learning with labeled data" | |
| ], | |
| "model_id": "jina-reranker-v2" | |
| } | |
| ``` | |
| """ | |
| if not request.query.strip(): | |
| raise HTTPException(400, "Query cannot be empty") | |
| if not request.documents: | |
| raise HTTPException(400, "Documents list cannot be empty") | |
| valid_docs = [(i, doc.strip()) for i, doc in enumerate(request.documents) if doc.strip()] | |
| if not valid_docs: | |
| raise HTTPException(400, "No valid documents found after filtering empty strings") | |
| try: | |
| start_time = time.time() | |
| model = model_manager.get_model(request.model_id) | |
| original_indices, documents = zip(*valid_docs) | |
| scores = model.rerank( | |
| query=request.query.strip(), | |
| documents=list(documents), | |
| instruction=request.instruction | |
| ) | |
| results = [] | |
| for i, (orig_idx, doc, score) in enumerate(zip(original_indices, documents, scores)): | |
| results.append(RerankResult( | |
| text=doc, | |
| score=score, | |
| index=orig_idx | |
| )) | |
| results.sort(key=lambda x: x.score, reverse=True) | |
| if request.top_k: | |
| results = results[:request.top_k] | |
| processing_time = time.time() - start_time | |
| logger.info( | |
| f"Reranked {len(documents)} documents in {processing_time:.3f}s " | |
| f"using {request.model_id}" | |
| ) | |
| return RerankResponse( | |
| results=results, | |
| query=request.query.strip(), | |
| model_id=request.model_id, | |
| processing_time=processing_time, | |
| total_documents=len(request.documents), | |
| returned_documents=len(results) | |
| ) | |
| except ValueError as e: | |
| raise HTTPException(400, str(e)) | |
| except Exception as e: | |
| logger.error(f"Reranking failed: {e}") | |
| raise HTTPException(500, f"Reranking failed: {str(e)}") | |
| async def list_models(): | |
| """ | |
| List all available reranking models. | |
| Returns information about all configured models including their | |
| loading status and capabilities. | |
| Returns: | |
| List of model information dictionaries | |
| """ | |
| try: | |
| return model_manager.list_models() | |
| except Exception as e: | |
| logger.error(f"Failed to list models: {e}") | |
| raise HTTPException(500, str(e)) | |
| async def health_check(): | |
| """ | |
| Check API health and model status. | |
| Returns comprehensive health information including model loading | |
| status and system metrics. | |
| Returns: | |
| Health status dictionary | |
| """ | |
| try: | |
| models = model_manager.list_models() | |
| loaded_models = [m for m in models if m['loaded']] | |
| return { | |
| "status": "ok", | |
| "total_models": len(models), | |
| "loaded_models": len(loaded_models), | |
| "available_models": [m['id'] for m in loaded_models], | |
| "models_info": models | |
| } | |
| except Exception as e: | |
| logger.error(f"Health check failed: {e}") | |
| return { | |
| "status": "error", | |
| "error": str(e) | |
| } | |
| async def root(): | |
| return { | |
| "message": "Welcome to Reranking API. Visit https://fahmiaziz-api-rerank-model.hf.space/docs for API documentation. And we also have Embedding API! Visit https://fahmiaziz-api-embedding.hf.space/docs", | |
| "version": "1.0.0" | |
| } | |