Spaces:

minhvtt
/

ChatbotRAG

Sleeping

App Files Files Community

minhvtt commited on Oct 7

Commit

6c982a7

verified ·

1 Parent(s): adec8cd

Upload 6 files

Browse files

Files changed (6) hide show

chatbot_rag.py +351 -0
chatbot_rag_api.py +467 -0
embedding_service.py +173 -0
main.py +352 -0
qdrant_service.py +447 -0
requirements.txt +27 -0

chatbot_rag.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+from pymongo import MongoClient
+from datetime import datetime
+from typing import List, Dict
+import numpy as np
+from embedding_service import JinaClipEmbeddingService
+from qdrant_service import QdrantVectorService
+class ChatbotRAG:
+    """
+    Chatbot RAG với:
+    - LLM: GPT-OSS-20B (Hugging Face)
+    - Embeddings: Jina CLIP v2
+    - Vector DB: Qdrant
+    - Document Store: MongoDB
+    """
+    def __init__(
+        self,
+        mongodb_uri: str = "mongodb+srv://truongtn7122003:[email protected]/",
+        db_name: str = "chatbot_rag",
+        collection_name: str = "documents"
+    ):
+        """
+        Initialize ChatbotRAG
+        Args:
+            mongodb_uri: MongoDB connection string
+            db_name: Database name
+            collection_name: Collection name for documents
+        """
+        print("Initializing ChatbotRAG...")
+        # MongoDB client
+        self.mongo_client = MongoClient(mongodb_uri)
+        self.db = self.mongo_client[db_name]
+        self.documents_collection = self.db[collection_name]
+        self.chat_history_collection = self.db["chat_history"]
+        # Embedding service (Jina CLIP v2)
+        self.embedding_service = JinaClipEmbeddingService(
+            model_path="jinaai/jina-clip-v2"
+        )
+        # Qdrant vector service
+        self.qdrant_service = QdrantVectorService(
+            collection_name="chatbot_rag_vectors",
+            vector_size=self.embedding_service.get_embedding_dimension()
+        )
+        print("✓ ChatbotRAG initialized successfully")
+    def add_document(self, text: str, metadata: Dict = None) -> str:
+        """
+        Add document to MongoDB and Qdrant
+        Args:
+            text: Document text
+            metadata: Additional metadata
+        Returns:
+            Document ID
+        """
+        # Save to MongoDB
+        doc_data = {
+            "text": text,
+            "metadata": metadata or {},
+            "created_at": datetime.utcnow()
+        }
+        result = self.documents_collection.insert_one(doc_data)
+        doc_id = str(result.inserted_id)
+        # Generate embedding
+        embedding = self.embedding_service.encode_text(text)
+        # Index to Qdrant
+        self.qdrant_service.index_data(
+            doc_id=doc_id,
+            embedding=embedding,
+            metadata={
+                "text": text,
+                "source": "user_upload",
+                **(metadata or {})
+            }
+        )
+        return doc_id
+    def retrieve_context(self, query: str, top_k: int = 3) -> List[Dict]:
+        """
+        Retrieve relevant context from vector DB
+        Args:
+            query: User query
+            top_k: Number of results to retrieve
+        Returns:
+            List of relevant documents
+        """
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        # Search in Qdrant
+        results = self.qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=top_k,
+            score_threshold=0.5  # Only get relevant results
+        )
+        return results
+    def save_chat_history(self, user_message: str, assistant_response: str, context_used: List[Dict]):
+        """
+        Save chat interaction to MongoDB
+        Args:
+            user_message: User's message
+            assistant_response: Assistant's response
+            context_used: Context retrieved from RAG
+        """
+        chat_data = {
+            "user_message": user_message,
+            "assistant_response": assistant_response,
+            "context_used": context_used,
+            "timestamp": datetime.utcnow()
+        }
+        self.chat_history_collection.insert_one(chat_data)
+    def respond(
+        self,
+        message: str,
+        history: List[Dict[str, str]],
+        system_message: str,
+        max_tokens: int,
+        temperature: float,
+        top_p: float,
+        use_rag: bool,
+        hf_token: gr.OAuthToken,
+    ):
+        """
+        Generate response with RAG
+        Args:
+            message: User message
+            history: Chat history
+            system_message: System prompt
+            max_tokens: Max tokens to generate
+            temperature: Temperature for generation
+            top_p: Top-p sampling
+            use_rag: Whether to use RAG retrieval
+            hf_token: Hugging Face token
+        Yields:
+            Generated response
+        """
+        # Initialize LLM client
+        client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
+        # Prepare context from RAG
+        context_text = ""
+        context_used = []
+        if use_rag:
+            # Retrieve relevant context
+            retrieved_docs = self.retrieve_context(message, top_k=3)
+            context_used = retrieved_docs
+            if retrieved_docs:
+                context_text = "\n\n**Relevant Context:**\n"
+                for i, doc in enumerate(retrieved_docs, 1):
+                    doc_text = doc["metadata"].get("text", "")
+                    confidence = doc["confidence"]
+                    context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
+                # Add context to system message
+                system_message = f"{system_message}\n\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
+        # Build messages for LLM
+        messages = [{"role": "system", "content": system_message}]
+        messages.extend(history)
+        messages.append({"role": "user", "content": message})
+        # Generate response
+        response = ""
+        try:
+            for msg in client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+            ):
+                choices = msg.choices
+                token = ""
+                if len(choices) and choices[0].delta.content:
+                    token = choices[0].delta.content
+                response += token
+                yield response
+            # Save to chat history
+            self.save_chat_history(message, response, context_used)
+        except Exception as e:
+            error_msg = f"Error generating response: {str(e)}"
+            yield error_msg
+# Initialize ChatbotRAG
+chatbot_rag = ChatbotRAG()
+def respond_wrapper(
+    message,
+    history,
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    use_rag,
+    hf_token,
+):
+    """Wrapper for Gradio ChatInterface"""
+    yield from chatbot_rag.respond(
+        message=message,
+        history=history,
+        system_message=system_message,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        use_rag=use_rag,
+        hf_token=hf_token,
+    )
+def add_document_to_rag(text: str) -> str:
+    """
+    Add document to RAG knowledge base
+    Args:
+        text: Document text
+    Returns:
+        Success message
+    """
+    try:
+        doc_id = chatbot_rag.add_document(text)
+        return f"✓ Document added successfully! ID: {doc_id}"
+    except Exception as e:
+        return f"✗ Error adding document: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="ChatbotRAG - GPT-OSS-20B + Jina CLIP v2 + MongoDB") as demo:
+    gr.Markdown("""
+    # 🤖 ChatbotRAG
+    **Features:**
+    - 💬 LLM: GPT-OSS-20B
+    - 🔍 Embeddings: Jina CLIP v2 (Vietnamese support)
+    - 📊 Vector DB: Qdrant Cloud
+    - 🗄️ Document Store: MongoDB
+    **How to use:**
+    1. Add documents to knowledge base (optional)
+    2. Toggle "Use RAG" to enable context retrieval
+    3. Chat with the bot!
+    """)
+    with gr.Sidebar():
+        gr.LoginButton()
+        gr.Markdown("### ⚙️ Settings")
+        use_rag = gr.Checkbox(
+            label="Use RAG",
+            value=True,
+            info="Enable RAG to retrieve relevant context from knowledge base"
+        )
+        system_message = gr.Textbox(
+            value="You are a helpful AI assistant. Answer questions based on the provided context when available.",
+            label="System message",
+            lines=3
+        )
+        max_tokens = gr.Slider(
+            minimum=1,
+            maximum=2048,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        )
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        )
+        top_p = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)"
+        )
+    # Chat interface
+    chatbot = gr.ChatInterface(
+        respond_wrapper,
+        type="messages",
+        additional_inputs=[
+            system_message,
+            max_tokens,
+            temperature,
+            top_p,
+            use_rag,
+        ],
+    )
+    # Document management
+    with gr.Accordion("📚 Knowledge Base Management", open=False):
+        gr.Markdown("### Add Documents to Knowledge Base")
+        doc_text = gr.Textbox(
+            label="Document Text",
+            placeholder="Enter document text here...",
+            lines=5
+        )
+        add_btn = gr.Button("Add Document", variant="primary")
+        output_msg = gr.Textbox(label="Status", interactive=False)
+        add_btn.click(
+            fn=add_document_to_rag,
+            inputs=[doc_text],
+            outputs=[output_msg]
+        )
+    chatbot.render()
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

chatbot_rag_api.py ADDED Viewed

	@@ -0,0 +1,467 @@

+from fastapi import FastAPI, HTTPException, File, UploadFile, Form
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from pymongo import MongoClient
+from datetime import datetime
+import numpy as np
+import os
+from huggingface_hub import InferenceClient
+from embedding_service import JinaClipEmbeddingService
+from qdrant_service import QdrantVectorService
+# Pydantic models
+class ChatRequest(BaseModel):
+    message: str
+    use_rag: bool = True
+    top_k: int = 3
+    system_message: Optional[str] = "You are a helpful AI assistant."
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.95
+    hf_token: Optional[str] = None  # Hugging Face token (optional, sẽ dùng env nếu không truyền)
+class ChatResponse(BaseModel):
+    response: str
+    context_used: List[Dict]
+    timestamp: str
+class AddDocumentRequest(BaseModel):
+    text: str
+    metadata: Optional[Dict] = None
+class AddDocumentResponse(BaseModel):
+    success: bool
+    doc_id: str
+    message: str
+class SearchRequest(BaseModel):
+    query: str
+    top_k: int = 5
+    score_threshold: Optional[float] = 0.5
+class SearchResponse(BaseModel):
+    results: List[Dict]
+# Initialize FastAPI
+app = FastAPI(
+    title="ChatbotRAG API",
+    description="API for RAG Chatbot with GPT-OSS-20B + Jina CLIP v2 + MongoDB + Qdrant",
+    version="1.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Cho phép tất cả origins (có thể giới hạn trong production)
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ChatbotRAG Service
+class ChatbotRAGService:
+    """
+    ChatbotRAG Service cho API
+    """
+    def __init__(
+        self,
+        mongodb_uri: str = "mongodb+srv://truongtn7122003:[email protected]/",
+        db_name: str = "chatbot_rag",
+        collection_name: str = "documents",
+        hf_token: Optional[str] = None
+    ):
+        print("Initializing ChatbotRAG Service...")
+        # MongoDB
+        self.mongo_client = MongoClient(mongodb_uri)
+        self.db = self.mongo_client[db_name]
+        self.documents_collection = self.db[collection_name]
+        self.chat_history_collection = self.db["chat_history"]
+        # Embedding service
+        self.embedding_service = JinaClipEmbeddingService(
+            model_path="jinaai/jina-clip-v2"
+        )
+        # Qdrant
+        self.qdrant_service = QdrantVectorService(
+            collection_name="chatbot_rag_vectors",
+            vector_size=self.embedding_service.get_embedding_dimension()
+        )
+        # Hugging Face token (từ env hoặc truyền vào)
+        self.hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN")
+        if self.hf_token:
+            print("✓ Hugging Face token configured")
+        else:
+            print("⚠ No Hugging Face token - LLM generation will use placeholder")
+        print("✓ ChatbotRAG Service initialized")
+    def add_document(self, text: str, metadata: Dict = None) -> str:
+        """Add document to knowledge base"""
+        # Save to MongoDB
+        doc_data = {
+            "text": text,
+            "metadata": metadata or {},
+            "created_at": datetime.utcnow()
+        }
+        result = self.documents_collection.insert_one(doc_data)
+        doc_id = str(result.inserted_id)
+        # Generate embedding
+        embedding = self.embedding_service.encode_text(text)
+        # Index to Qdrant
+        self.qdrant_service.index_data(
+            doc_id=doc_id,
+            embedding=embedding,
+            metadata={
+                "text": text,
+                "source": "api",
+                **(metadata or {})
+            }
+        )
+        return doc_id
+    def retrieve_context(self, query: str, top_k: int = 3, score_threshold: float = 0.5) -> List[Dict]:
+        """Retrieve relevant context from vector DB"""
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        # Search in Qdrant
+        results = self.qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=top_k,
+            score_threshold=score_threshold
+        )
+        return results
+    def generate_response(
+        self,
+        message: str,
+        context: List[Dict],
+        system_message: str,
+        max_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.95,
+        hf_token: Optional[str] = None
+    ) -> str:
+        """
+        Generate response using Hugging Face LLM
+        """
+        # Build context text
+        context_text = ""
+        if context:
+            context_text = "\n\nRelevant Context:\n"
+            for i, doc in enumerate(context, 1):
+                doc_text = doc["metadata"].get("text", "")
+                confidence = doc["confidence"]
+                context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
+            # Add context to system message
+            system_message = f"{system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
+        # Use token from request or fallback to service token
+        token = hf_token or self.hf_token
+        # If no token available, return placeholder
+        if not token:
+            return f"""[LLM Response Placeholder]
+Context retrieved: {len(context)} documents
+User question: {message}
+To enable actual LLM generation:
+1. Set HUGGINGFACE_TOKEN environment variable, OR
+2. Pass hf_token in request body
+Example:
+{{
+  "message": "Your question",
+  "hf_token": "hf_xxxxxxxxxxxxx"
+}}
+"""
+        # Initialize HF Inference Client
+        try:
+            client = InferenceClient(
+                token=token,
+                model="openai/gpt-oss-20b"
+            )
+            # Build messages
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": message}
+            ]
+            # Generate response (non-streaming for API)
+            response = ""
+            for msg in client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+            ):
+                choices = msg.choices
+                if len(choices) and choices[0].delta.content:
+                    response += choices[0].delta.content
+            return response
+        except Exception as e:
+            return f"Error generating response with LLM: {str(e)}\n\nContext was retrieved successfully, but LLM generation failed."
+    def save_chat_history(self, user_message: str, assistant_response: str, context_used: List[Dict]):
+        """Save chat to MongoDB"""
+        chat_data = {
+            "user_message": user_message,
+            "assistant_response": assistant_response,
+            "context_used": context_used,
+            "timestamp": datetime.utcnow()
+        }
+        self.chat_history_collection.insert_one(chat_data)
+    def get_stats(self) -> Dict:
+        """Get statistics"""
+        return {
+            "documents_count": self.documents_collection.count_documents({}),
+            "chat_history_count": self.chat_history_collection.count_documents({}),
+            "qdrant_info": self.qdrant_service.get_collection_info()
+        }
+# Initialize service
+rag_service = ChatbotRAGService()
+# API Endpoints
+@app.get("/")
+async def root():
+    """Health check"""
+    return {
+        "status": "running",
+        "service": "ChatbotRAG API",
+        "version": "1.0.0",
+        "endpoints": {
+            "POST /chat": "Chat with RAG",
+            "POST /documents": "Add document to knowledge base",
+            "POST /search": "Search in knowledge base",
+            "GET /stats": "Get statistics",
+            "GET /history": "Get chat history"
+        }
+    }
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """
+    Chat endpoint with RAG
+    Body:
+    - message: User message
+    - use_rag: Enable RAG retrieval (default: true)
+    - top_k: Number of documents to retrieve (default: 3)
+    - system_message: System prompt (optional)
+    - max_tokens: Max tokens for response (default: 512)
+    - temperature: Temperature for generation (default: 0.7)
+    Returns:
+    - response: Generated response
+    - context_used: Retrieved context documents
+    - timestamp: Response timestamp
+    """
+    try:
+        # Retrieve context if RAG enabled
+        context_used = []
+        if request.use_rag:
+            context_used = rag_service.retrieve_context(
+                query=request.message,
+                top_k=request.top_k
+            )
+        # Generate response
+        response = rag_service.generate_response(
+            message=request.message,
+            context=context_used,
+            system_message=request.system_message,
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            hf_token=request.hf_token
+        )
+        # Save to history
+        rag_service.save_chat_history(
+            user_message=request.message,
+            assistant_response=response,
+            context_used=context_used
+        )
+        return ChatResponse(
+            response=response,
+            context_used=context_used,
+            timestamp=datetime.utcnow().isoformat()
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/documents", response_model=AddDocumentResponse)
+async def add_document(request: AddDocumentRequest):
+    """
+    Add document to knowledge base
+    Body:
+    - text: Document text
+    - metadata: Additional metadata (optional)
+    Returns:
+    - success: True/False
+    - doc_id: MongoDB document ID
+    - message: Status message
+    """
+    try:
+        doc_id = rag_service.add_document(
+            text=request.text,
+            metadata=request.metadata
+        )
+        return AddDocumentResponse(
+            success=True,
+            doc_id=doc_id,
+            message=f"Document added successfully with ID: {doc_id}"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/search", response_model=SearchResponse)
+async def search(request: SearchRequest):
+    """
+    Search in knowledge base
+    Body:
+    - query: Search query
+    - top_k: Number of results (default: 5)
+    - score_threshold: Minimum score (default: 0.5)
+    Returns:
+    - results: List of matching documents
+    """
+    try:
+        results = rag_service.retrieve_context(
+            query=request.query,
+            top_k=request.top_k,
+            score_threshold=request.score_threshold
+        )
+        return SearchResponse(results=results)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.get("/stats")
+async def get_stats():
+    """
+    Get statistics
+    Returns:
+    - documents_count: Number of documents in MongoDB
+    - chat_history_count: Number of chat messages
+    - qdrant_info: Qdrant collection info
+    """
+    try:
+        return rag_service.get_stats()
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.get("/history")
+async def get_history(limit: int = 10, skip: int = 0):
+    """
+    Get chat history
+    Query params:
+    - limit: Number of messages to return (default: 10)
+    - skip: Number of messages to skip (default: 0)
+    Returns:
+    - history: List of chat messages
+    """
+    try:
+        history = list(
+            rag_service.chat_history_collection
+            .find({}, {"_id": 0})
+            .sort("timestamp", -1)
+            .skip(skip)
+            .limit(limit)
+        )
+        # Convert datetime to string
+        for msg in history:
+            if "timestamp" in msg:
+                msg["timestamp"] = msg["timestamp"].isoformat()
+        return {"history": history, "total": rag_service.chat_history_collection.count_documents({})}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.delete("/documents/{doc_id}")
+async def delete_document(doc_id: str):
+    """
+    Delete document from knowledge base
+    Args:
+    - doc_id: Document ID (MongoDB ObjectId)
+    Returns:
+    - success: True/False
+    - message: Status message
+    """
+    try:
+        # Delete from MongoDB
+        result = rag_service.documents_collection.delete_one({"_id": doc_id})
+        # Delete from Qdrant
+        if result.deleted_count > 0:
+            rag_service.qdrant_service.delete_by_id(doc_id)
+            return {"success": True, "message": f"Document {doc_id} deleted"}
+        else:
+            raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info"
+    )

embedding_service.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoModel
+from typing import Union, List
+import io
+class JinaClipEmbeddingService:
+    """
+    Jina CLIP v2 Embedding Service với hỗ trợ tiếng Việt
+    Sử dụng AutoModel với trust_remote_code
+    """
+    def __init__(self, model_path: str = "jinaai/jina-clip-v2"):
+        """
+        Initialize Jina CLIP v2 model
+        Args:
+            model_path: Path to model hoặc HuggingFace model name
+        """
+        print(f"Loading Jina CLIP v2 model from {model_path}...")
+        # Load model với trust_remote_code
+        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
+        # Chuyển sang eval mode
+        self.model.eval()
+        # Sử dụng GPU nếu có
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(self.device)
+        print(f"✓ Loaded Jina CLIP v2 model on: {self.device}")
+    def encode_text(
+        self,
+        text: Union[str, List[str]],
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode text thành vector embeddings (hỗ trợ tiếng Việt)
+        Args:
+            text: Text hoặc list of texts (tiếng Việt)
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        if isinstance(text, str):
+            text = [text]
+        # Jina CLIP v2 encode_text method
+        # Automatically handles tokenization internally
+        embeddings = self.model.encode_text(
+            text,
+            truncate_dim=truncate_dim  # Optional: 64, 128, 256, 512, 1024
+        )
+        # Convert to numpy
+        if isinstance(embeddings, torch.Tensor):
+            embeddings = embeddings.cpu().detach().numpy()
+        # Normalize nếu cần
+        if normalize:
+            embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings
+    def encode_image(
+        self,
+        image: Union[Image.Image, bytes, List, str],
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode image thành vector embeddings
+        Args:
+            image: PIL Image, bytes, URL string, hoặc list of images
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        # Convert bytes to PIL Image nếu cần
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image)).convert('RGB')
+        elif isinstance(image, list):
+            processed_images = []
+            for img in image:
+                if isinstance(img, bytes):
+                    processed_images.append(Image.open(io.BytesIO(img)).convert('RGB'))
+                elif isinstance(img, str):
+                    # URL string - keep as is, Jina CLIP can handle URLs
+                    processed_images.append(img)
+                else:
+                    processed_images.append(img)
+            image = processed_images
+        elif not isinstance(image, list) and not isinstance(image, str):
+            # Single PIL Image
+            image = [image]
+        # Jina CLIP v2 encode_image method
+        # Supports PIL Images, file paths, or URLs
+        embeddings = self.model.encode_image(
+            image,
+            truncate_dim=truncate_dim  # Optional: 64, 128, 256, 512, 1024
+        )
+        # Convert to numpy
+        if isinstance(embeddings, torch.Tensor):
+            embeddings = embeddings.cpu().detach().numpy()
+        # Normalize nếu cần
+        if normalize:
+            embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings
+    def encode_multimodal(
+        self,
+        text: Union[str, List[str]] = None,
+        image: Union[Image.Image, bytes, List] = None,
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode cả text và image, trả về embeddings kết hợp
+        Args:
+            text: Text hoặc list of texts (tiếng Việt)
+            image: PIL Image, bytes, hoặc list of images
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        embeddings = []
+        if text is not None:
+            text_emb = self.encode_text(text, truncate_dim=truncate_dim, normalize=False)
+            embeddings.append(text_emb)
+        if image is not None:
+            image_emb = self.encode_image(image, truncate_dim=truncate_dim, normalize=False)
+            embeddings.append(image_emb)
+        # Combine embeddings (average)
+        if len(embeddings) == 2:
+            # Average của text và image embeddings
+            combined = np.mean(embeddings, axis=0)
+        elif len(embeddings) == 1:
+            combined = embeddings[0]
+        else:
+            raise ValueError("Phải cung cấp ít nhất text hoặc image")
+        # Normalize nếu cần
+        if normalize:
+            combined = combined / np.linalg.norm(combined, axis=1, keepdims=True)
+        return combined
+    def get_embedding_dimension(self) -> int:
+        """
+        Trả về dimension của embeddings (1024 cho Jina CLIP v2)
+        """
+        return 1024

main.py ADDED Viewed

	@@ -0,0 +1,352 @@

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from typing import Optional, List
+from PIL import Image
+import io
+import numpy as np
+from embedding_service import JinaClipEmbeddingService
+from qdrant_service import QdrantVectorService
+# Initialize FastAPI app
+app = FastAPI(
+    title="Event Social Media Embeddings API",
+    description="API để embeddings và search text + images từ events & social media với Jina CLIP v2 + Qdrant",
+    version="1.0.0"
+)
+# Initialize services
+print("Initializing services...")
+embedding_service = JinaClipEmbeddingService(model_path="jinaai/jina-clip-v2")
+qdrant_service = QdrantVectorService(
+    # URL và API key sẽ lấy từ environment variables
+    collection_name="event_social_media",
+    vector_size=embedding_service.get_embedding_dimension()
+)
+print("✓ Services initialized successfully")
+# Pydantic models
+class SearchRequest(BaseModel):
+    text: Optional[str] = None
+    limit: int = 10
+    score_threshold: Optional[float] = None
+    text_weight: float = 0.5
+    image_weight: float = 0.5
+class SearchResponse(BaseModel):
+    id: str
+    confidence: float
+    metadata: dict
+class IndexResponse(BaseModel):
+    success: bool
+    id: str
+    message: str
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {
+        "status": "running",
+        "service": "Event Social Media Embeddings API",
+        "embedding_model": "Jina CLIP v2",
+        "vector_db": "Qdrant",
+        "language_support": "Vietnamese + 88 other languages"
+    }
+@app.post("/index", response_model=IndexResponse)
+async def index_data(
+    id: str = Form(...),
+    text: str = Form(...),
+    image: Optional[UploadFile] = File(None)
+):
+    """
+    Index data vào vector database
+    Body:
+    - id: Document ID (event ID, post ID, etc.)
+    - text: Text content (tiếng Việt supported)
+    - image: Image file (optional)
+    Returns:
+    - success: True/False
+    - id: Document ID
+    - message: Status message
+    """
+    try:
+        # Prepare embeddings
+        text_embedding = None
+        image_embedding = None
+        # Encode text (tiếng Việt)
+        if text and text.strip():
+            text_embedding = embedding_service.encode_text(text)
+        # Encode image nếu có
+        if image:
+            image_bytes = await image.read()
+            pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+            image_embedding = embedding_service.encode_image(pil_image)
+        # Combine embeddings
+        if text_embedding is not None and image_embedding is not None:
+            # Average của text và image embeddings
+            combined_embedding = np.mean([text_embedding, image_embedding], axis=0)
+        elif text_embedding is not None:
+            combined_embedding = text_embedding
+        elif image_embedding is not None:
+            combined_embedding = image_embedding
+        else:
+            raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất text hoặc image")
+        # Normalize
+        combined_embedding = combined_embedding / np.linalg.norm(combined_embedding, axis=1, keepdims=True)
+        # Index vào Qdrant
+        metadata = {
+            "text": text,
+            "has_image": image is not None,
+            "image_filename": image.filename if image else None
+        }
+        result = qdrant_service.index_data(
+            doc_id=id,
+            embedding=combined_embedding,
+            metadata=metadata
+        )
+        return IndexResponse(
+            success=True,
+            id=result["original_id"],  # Trả về MongoDB ObjectId
+            message=f"Đã index thành công document {result['original_id']} (Qdrant UUID: {result['qdrant_id']})"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi index: {str(e)}")
+@app.post("/search", response_model=List[SearchResponse])
+async def search(
+    text: Optional[str] = Form(None),
+    image: Optional[UploadFile] = File(None),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None),
+    text_weight: float = Form(0.5),
+    image_weight: float = Form(0.5)
+):
+    """
+    Search similar documents bằng text và/hoặc image
+    Body:
+    - text: Query text (tiếng Việt supported)
+    - image: Query image (optional)
+    - limit: Số lượng kết quả (default: 10)
+    - score_threshold: Minimum confidence score (0-1)
+    - text_weight: Weight cho text search (default: 0.5)
+    - image_weight: Weight cho image search (default: 0.5)
+    Returns:
+    - List of results với id, confidence, và metadata
+    """
+    try:
+        # Prepare query embeddings
+        text_embedding = None
+        image_embedding = None
+        # Encode text query
+        if text and text.strip():
+            text_embedding = embedding_service.encode_text(text)
+        # Encode image query
+        if image:
+            image_bytes = await image.read()
+            pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+            image_embedding = embedding_service.encode_image(pil_image)
+        # Validate input
+        if text_embedding is None and image_embedding is None:
+            raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất text hoặc image để search")
+        # Hybrid search với Qdrant
+        results = qdrant_service.hybrid_search(
+            text_embedding=text_embedding,
+            image_embedding=image_embedding,
+            text_weight=text_weight,
+            image_weight=image_weight,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256  # High accuracy search
+        )
+        # Format response
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.post("/search/text", response_model=List[SearchResponse])
+async def search_by_text(
+    text: str = Form(...),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None)
+):
+    """
+    Search chỉ bằng text (tiếng Việt)
+    Body:
+    - text: Query text (tiếng Việt)
+    - limit: Số lượng kết quả
+    - score_threshold: Minimum confidence score
+    Returns:
+    - List of results
+    """
+    try:
+        # Encode text
+        text_embedding = embedding_service.encode_text(text)
+        # Search
+        results = qdrant_service.search(
+            query_embedding=text_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.post("/search/image", response_model=List[SearchResponse])
+async def search_by_image(
+    image: UploadFile = File(...),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None)
+):
+    """
+    Search chỉ bằng image
+    Body:
+    - image: Query image
+    - limit: Số lượng kết quả
+    - score_threshold: Minimum confidence score
+    Returns:
+    - List of results
+    """
+    try:
+        # Encode image
+        image_bytes = await image.read()
+        pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+        image_embedding = embedding_service.encode_image(pil_image)
+        # Search
+        results = qdrant_service.search(
+            query_embedding=image_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.delete("/delete/{doc_id}")
+async def delete_document(doc_id: str):
+    """
+    Delete document by ID (MongoDB ObjectId hoặc UUID)
+    Args:
+    - doc_id: Document ID to delete
+    Returns:
+    - Success message
+    """
+    try:
+        qdrant_service.delete_by_id(doc_id)
+        return {"success": True, "message": f"Đã xóa document {doc_id}"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi xóa: {str(e)}")
+@app.get("/document/{doc_id}")
+async def get_document(doc_id: str):
+    """
+    Get document by ID (MongoDB ObjectId hoặc UUID)
+    Args:
+    - doc_id: Document ID (MongoDB ObjectId)
+    Returns:
+    - Document data
+    """
+    try:
+        doc = qdrant_service.get_by_id(doc_id)
+        if doc:
+            return {
+                "success": True,
+                "data": doc
+            }
+        raise HTTPException(status_code=404, detail=f"Không tìm thấy document {doc_id}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi get document: {str(e)}")
+@app.get("/stats")
+async def get_stats():
+    """
+    Lấy thông tin thống kê collection
+    Returns:
+    - Collection statistics
+    """
+    try:
+        info = qdrant_service.get_collection_info()
+        return info
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi lấy stats: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info"
+    )

qdrant_service.py ADDED Viewed

	@@ -0,0 +1,447 @@

+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    Distance, VectorParams, PointStruct,
+    SearchRequest, SearchParams, HnswConfigDiff,
+    OptimizersConfigDiff, ScalarQuantization,
+    ScalarQuantizationConfig, ScalarType,
+    QuantizationSearchParams
+)
+from typing import List, Dict, Any, Optional
+import numpy as np
+import uuid
+import os
+class QdrantVectorService:
+    """
+    Qdrant Cloud Vector Database Service với cấu hình tối ưu
+    - HNSW algorithm với parameters mạnh mẽ nhất
+    - Scalar Quantization để tối ưu memory và speed
+    - Hỗ trợ hybrid search (text + image)
+    """
+    def __init__(
+        self,
+        url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        collection_name: str = "event_social_media",
+        vector_size: int = 1024,  # Jina CLIP v2 dimension
+    ):
+        """
+        Initialize Qdrant Cloud client
+        Args:
+            url: Qdrant Cloud URL (từ env hoặc truyền vào)
+            api_key: Qdrant API key (từ env hoặc truyền vào)
+            collection_name: Tên collection
+            vector_size: Dimension của vectors (1024 cho Jina CLIP v2)
+        """
+        # Lấy credentials từ env nếu không truyền vào
+        self.url = url or os.getenv("QDRANT_URL")
+        self.api_key = api_key or os.getenv("QDRANT_API_KEY")
+        if not self.url or not self.api_key:
+            raise ValueError("Cần cung cấp QDRANT_URL và QDRANT_API_KEY (qua env hoặc params)")
+        print(f"Connecting to Qdrant Cloud...")
+        # Initialize Qdrant Cloud client
+        self.client = QdrantClient(
+            url=self.url,
+            api_key=self.api_key,
+        )
+        self.collection_name = collection_name
+        self.vector_size = vector_size
+        # Create collection nếu chưa tồn tại
+        self._ensure_collection()
+        print(f"✓ Connected to Qdrant collection: {collection_name}")
+    def _ensure_collection(self):
+        """
+        Tạo collection với HNSW config tối ưu nhất
+        """
+        # Check nếu collection đã tồn tại
+        collections = self.client.get_collections().collections
+        collection_exists = any(c.name == self.collection_name for c in collections)
+        if not collection_exists:
+            print(f"Creating collection {self.collection_name} with optimal HNSW config...")
+            self.client.create_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(
+                    size=self.vector_size,
+                    distance=Distance.COSINE,  # Cosine similarity cho embeddings
+                    hnsw_config=HnswConfigDiff(
+                        m=64,  # Số edges per node - cao nhất cho accuracy
+                        ef_construct=512,  # Search range khi build index - cao cho quality
+                        full_scan_threshold=10000,  # Threshold để switch sang full scan
+                        max_indexing_threads=0,  # Auto-detect số threads
+                        on_disk=False,  # Keep trong RAM cho speed (nếu đủ memory)
+                    )
+                ),
+                optimizers_config=OptimizersConfigDiff(
+                    deleted_threshold=0.2,
+                    vacuum_min_vector_number=1000,
+                    default_segment_number=2,
+                    max_segment_size=200000,
+                    memmap_threshold=50000,
+                    indexing_threshold=10000,
+                    flush_interval_sec=5,
+                    max_optimization_threads=0,  # Auto-detect
+                ),
+                # Sử dụng Scalar Quantization để tối ưu memory và speed
+                quantization_config=ScalarQuantization(
+                    scalar=ScalarQuantizationConfig(
+                        type=ScalarType.INT8,
+                        quantile=0.99,
+                        always_ram=True,  # Keep quantized vectors trong RAM
+                    )
+                )
+            )
+            print("✓ Collection created with optimal configuration")
+        else:
+            print("✓ Collection already exists")
+    def _convert_to_valid_id(self, doc_id: str) -> str:
+        """
+        Convert bất kỳ string ID nào thành UUID hợp lệ cho Qdrant
+        Args:
+            doc_id: Original ID (có thể là MongoDB ObjectId, string, etc.)
+        Returns:
+            UUID string hợp lệ
+        """
+        if not doc_id:
+            return str(uuid.uuid4())
+        # Nếu đã là UUID hợp lệ, giữ nguyên
+        try:
+            uuid.UUID(doc_id)
+            return doc_id
+        except ValueError:
+            pass
+        # Convert string sang UUID deterministic (cùng input = cùng UUID)
+        # Sử dụng UUID v5 với namespace DNS
+        return str(uuid.uuid5(uuid.NAMESPACE_DNS, doc_id))
+    def index_data(
+        self,
+        doc_id: str,
+        embedding: np.ndarray,
+        metadata: Dict[str, Any]
+    ) -> Dict[str, str]:
+        """
+        Index data vào Qdrant
+        Args:
+            doc_id: ID của document (MongoDB ObjectId, string, etc.)
+            embedding: Vector embedding từ Jina CLIP
+            metadata: Metadata (text, image_url, event_info, etc.)
+        Returns:
+            Dict với original_id và qdrant_id
+        """
+        # Convert ID thành UUID hợp lệ
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        # Lưu original ID vào metadata
+        metadata['original_id'] = doc_id
+        # Ensure embedding là 1D array
+        if len(embedding.shape) > 1:
+            embedding = embedding.flatten()
+        # Create point
+        point = PointStruct(
+            id=qdrant_id,
+            vector=embedding.tolist(),
+            payload=metadata
+        )
+        # Upsert vào collection
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=[point]
+        )
+        return {
+            "original_id": doc_id,
+            "qdrant_id": qdrant_id
+        }
+    def batch_index(
+        self,
+        doc_ids: List[str],
+        embeddings: np.ndarray,
+        metadata_list: List[Dict[str, Any]]
+    ) -> List[Dict[str, str]]:
+        """
+        Batch index nhiều documents cùng lúc
+        Args:
+            doc_ids: List of document IDs (MongoDB ObjectId, string, etc.)
+            embeddings: Numpy array of embeddings (n_samples, embedding_dim)
+            metadata_list: List of metadata dicts
+        Returns:
+            List of dicts với original_id và qdrant_id
+        """
+        points = []
+        id_mappings = []
+        for i, (doc_id, embedding, metadata) in enumerate(zip(doc_ids, embeddings, metadata_list)):
+            # Convert to valid UUID
+            qdrant_id = self._convert_to_valid_id(doc_id)
+            # Lưu original ID vào metadata
+            metadata['original_id'] = doc_id
+            # Ensure embedding là 1D
+            if len(embedding.shape) > 1:
+                embedding = embedding.flatten()
+            points.append(PointStruct(
+                id=qdrant_id,
+                vector=embedding.tolist(),
+                payload=metadata
+            ))
+            id_mappings.append({
+                "original_id": doc_id,
+                "qdrant_id": qdrant_id
+            })
+        # Batch upsert
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=points,
+            wait=True  # Wait for indexing to complete
+        )
+        return id_mappings
+    def search(
+        self,
+        query_embedding: np.ndarray,
+        limit: int = 10,
+        score_threshold: Optional[float] = None,
+        filter_conditions: Optional[Dict] = None,
+        ef: int = 256  # Search quality parameter - cao hơn = accurate hơn
+    ) -> List[Dict[str, Any]]:
+        """
+        Search similar vectors trong Qdrant
+        Args:
+            query_embedding: Query embedding từ Jina CLIP
+            limit: Số lượng results trả về
+            score_threshold: Minimum similarity score (0-1)
+            filter_conditions: Qdrant filter conditions
+            ef: HNSW search parameter (128-512, cao hơn = accurate hơn)
+        Returns:
+            List of search results với id, score, và metadata
+        """
+        # Ensure query embedding là 1D
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Search với HNSW parameters tối ưu
+        search_result = self.client.search(
+            collection_name=self.collection_name,
+            query_vector=query_embedding.tolist(),
+            limit=limit,
+            score_threshold=score_threshold,
+            query_filter=filter_conditions,
+            search_params=SearchParams(
+                hnsw_ef=ef,  # Higher ef = more accurate search
+                exact=False,  # Use HNSW (not exact search)
+                quantization=QuantizationSearchParams(
+                    ignore=False,  # Use quantization
+                    rescore=True,  # Rescore với original vectors
+                    oversampling=2.0  # Oversample factor
+                )
+            ),
+            with_payload=True,
+            with_vectors=False  # Không cần return vectors
+        )
+        # Format results - trả về original_id thay vì UUID
+        results = []
+        for hit in search_result:
+            # Lấy original_id từ metadata (MongoDB ObjectId)
+            original_id = hit.payload.get('original_id', hit.id)
+            results.append({
+                "id": original_id,  # Trả về MongoDB ObjectId
+                "qdrant_id": hit.id,  # UUID trong Qdrant
+                "confidence": float(hit.score),  # Cosine similarity score
+                "metadata": hit.payload
+            })
+        return results
+    def hybrid_search(
+        self,
+        text_embedding: Optional[np.ndarray] = None,
+        image_embedding: Optional[np.ndarray] = None,
+        text_weight: float = 0.5,
+        image_weight: float = 0.5,
+        limit: int = 10,
+        score_threshold: Optional[float] = None,
+        ef: int = 256
+    ) -> List[Dict[str, Any]]:
+        """
+        Hybrid search với cả text và image embeddings
+        Args:
+            text_embedding: Text query embedding
+            image_embedding: Image query embedding
+            text_weight: Weight cho text search (0-1)
+            image_weight: Weight cho image search (0-1)
+            limit: Số results
+            score_threshold: Minimum score
+            ef: HNSW search parameter
+        Returns:
+            Combined search results
+        """
+        # Combine embeddings với weights
+        combined_embedding = np.zeros(self.vector_size)
+        if text_embedding is not None:
+            if len(text_embedding.shape) > 1:
+                text_embedding = text_embedding.flatten()
+            combined_embedding += text_weight * text_embedding
+        if image_embedding is not None:
+            if len(image_embedding.shape) > 1:
+                image_embedding = image_embedding.flatten()
+            combined_embedding += image_weight * image_embedding
+        # Normalize combined embedding
+        norm = np.linalg.norm(combined_embedding)
+        if norm > 0:
+            combined_embedding = combined_embedding / norm
+        # Search với combined embedding
+        return self.search(
+            query_embedding=combined_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=ef
+        )
+    def delete_by_id(self, doc_id: str) -> bool:
+        """
+        Delete document by ID (hỗ trợ cả MongoDB ObjectId và UUID)
+        Args:
+            doc_id: Document ID to delete (MongoDB ObjectId hoặc UUID)
+        Returns:
+            Success status
+        """
+        # Convert to UUID nếu là MongoDB ObjectId
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        self.client.delete(
+            collection_name=self.collection_name,
+            points_selector=[qdrant_id]
+        )
+        return True
+    def get_by_id(self, doc_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get document by ID (hỗ trợ cả MongoDB ObjectId và UUID)
+        Args:
+            doc_id: Document ID (MongoDB ObjectId hoặc UUID)
+        Returns:
+            Document data hoặc None nếu không tìm thấy
+        """
+        # Convert to UUID nếu là MongoDB ObjectId
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        try:
+            result = self.client.retrieve(
+                collection_name=self.collection_name,
+                ids=[qdrant_id],
+                with_payload=True,
+                with_vectors=False
+            )
+            if result:
+                point = result[0]
+                original_id = point.payload.get('original_id', point.id)
+                return {
+                    "id": original_id,  # MongoDB ObjectId
+                    "qdrant_id": point.id,  # UUID trong Qdrant
+                    "metadata": point.payload
+                }
+            return None
+        except Exception as e:
+            print(f"Error retrieving document: {e}")
+            return None
+    def search_by_metadata(
+        self,
+        filter_conditions: Dict,
+        limit: int = 100
+    ) -> List[Dict[str, Any]]:
+        """
+        Search documents by metadata conditions (không cần embedding)
+        Args:
+            filter_conditions: Qdrant filter conditions
+            limit: Maximum số results
+        Returns:
+            List of matching documents
+        """
+        try:
+            result = self.client.scroll(
+                collection_name=self.collection_name,
+                scroll_filter=filter_conditions,
+                limit=limit,
+                with_payload=True,
+                with_vectors=False
+            )
+            documents = []
+            for point in result[0]:  # result is tuple (points, next_page_offset)
+                original_id = point.payload.get('original_id', point.id)
+                documents.append({
+                    "id": original_id,  # MongoDB ObjectId
+                    "qdrant_id": point.id,  # UUID trong Qdrant
+                    "metadata": point.payload
+                })
+            return documents
+        except Exception as e:
+            print(f"Error searching by metadata: {e}")
+            return []
+    def get_collection_info(self) -> Dict[str, Any]:
+        """
+        Lấy thông tin collection
+        Returns:
+            Collection info
+        """
+        info = self.client.get_collection(collection_name=self.collection_name)
+        return {
+            "vectors_count": info.vectors_count,
+            "points_count": info.points_count,
+            "status": info.status,
+            "config": {
+                "distance": info.config.params.vectors.distance,
+                "size": info.config.params.vectors.size,
+            }
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+# FastAPI và web framework
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+python-multipart==0.0.20
+# Gradio cho Hugging Face Spaces
+gradio>=4.0.0
+# Machine Learning & Embeddings
+torch>=2.0.0
+transformers>=4.50.0
+onnxruntime==1.20.1
+torchvision>=0.15.0
+pillow>=10.0.0
+numpy>=1.24.0
+# Vector Database
+qdrant-client>=1.12.1
+grpcio>=1.60.0
+# Utilities
+pydantic>=2.0.0
+python-dotenv==1.0.0
+# MongoDB
+pymongo>=4.6.0
+huggingface-hub>=0.20.0