ChatMCP

Sleeping

App Files Files Community

Nymbo commited on Oct 9

Commit

0044564

verified ·

1 Parent(s): d095e52

Update chat_handler.py

Browse files

Files changed (1) hide show

chat_handler.py +40 -32

chat_handler.py CHANGED Viewed

@@ -75,7 +75,7 @@ class ChatHandler:
         user_text = ""
         user_files = []
         uploaded_file_urls = []  # Store uploaded file URLs
-        self.file_url_mapping = {}  # Add this: Map local paths to uploaded URLs
         try:
             # Handle multimodal input - message is a dict with 'text' and 'files'
@@ -124,6 +124,7 @@ class ChatHandler:
                     uploaded_url = self._upload_file_to_gradio_server(file_path)
                     # Store the mapping
                     self.file_url_mapping[file_path] = uploaded_url
                     logger.info(f"  ✅ Uploaded File URL: {uploaded_url}")
                     # Add to history with public URL
@@ -169,8 +170,8 @@ class ChatHandler:
             return history, gr.MultimodalTextbox(value=None, interactive=False)
     def _prepare_hf_messages(self, history: List, uploaded_file_urls: List[str] = None) -> List[Dict[str, Any]]:
-        """Convert history (ChatMessage or dict) to HuggingFace Inference API format"""
-        messages = []
         # Get optimal context settings for current model/provider
         if self.mcp_client.current_model and self.mcp_client.current_provider:
@@ -186,6 +187,7 @@ class ChatHandler:
         # Convert history to HF API format (text only for context)
         recent_history = history[-max_history:] if len(history) > max_history else history
         for msg in recent_history:
             # Handle both ChatMessage objects and dictionary format for backward compatibility
             if hasattr(msg, 'role'):  # ChatMessage object
@@ -197,39 +199,45 @@ class ChatHandler:
             else:
                 continue  # Skip invalid messages
-            if role in ["user", "assistant"]:
-                # Convert any non-string content to string description for context
-                if isinstance(content, dict):
-                    if "path" in content:
-                        file_path = content.get('path', 'unknown')
-                        # Check if it's a public URL or local path
-                        if file_path.startswith('http'):
-                            # It's already a public URL
-                            if AppConfig.is_image_file(file_path):
-                                content = f"[User uploaded an image: {file_path}]"
-                            elif AppConfig.is_audio_file(file_path):
-                                content = f"[User uploaded an audio file: {file_path}]"
-                            elif AppConfig.is_video_file(file_path):
-                                content = f"[User uploaded a video file: {file_path}]"
-                            else:
-                                content = f"[User uploaded a file: {file_path}]"
-                        else:
-                            # Local path - mention it's not accessible to remote servers
-                            content = f"[User uploaded a file (local path, not accessible to remote servers): {file_path}]"
                     else:
-                        content = f"[Object: {str(content)[:50]}...]"
                 elif isinstance(content, (list, tuple)):
-                    content = f"[List: {str(content)[:50]}...]"
                 elif content is None:
-                    content = "[Empty]"
                 else:
-                    content = str(content)
-                messages.append({
-                    "role": role,
-                    "content": content
-                })
         return messages

         user_text = ""
         user_files = []
         uploaded_file_urls = []  # Store uploaded file URLs
+        self.file_url_mapping = {}  # Map local paths to uploaded URLs
         try:
             # Handle multimodal input - message is a dict with 'text' and 'files'
                     uploaded_url = self._upload_file_to_gradio_server(file_path)
                     # Store the mapping
                     self.file_url_mapping[file_path] = uploaded_url
+                    uploaded_file_urls.append(uploaded_url)
                     logger.info(f"  ✅ Uploaded File URL: {uploaded_url}")
                     # Add to history with public URL
             return history, gr.MultimodalTextbox(value=None, interactive=False)
     def _prepare_hf_messages(self, history: List, uploaded_file_urls: List[str] = None) -> List[Dict[str, Any]]:
+        """Convert history (ChatMessage or dict) to HF OpenAI-compatible format with multimodal support"""
+        messages: List[Dict[str, Any]] = []
         # Get optimal context settings for current model/provider
         if self.mcp_client.current_model and self.mcp_client.current_provider:
         # Convert history to HF API format (text only for context)
         recent_history = history[-max_history:] if len(history) > max_history else history
+        last_role = None
         for msg in recent_history:
             # Handle both ChatMessage objects and dictionary format for backward compatibility
             if hasattr(msg, 'role'):  # ChatMessage object
             else:
                 continue  # Skip invalid messages
+            if role == "user":
+                # Build multimodal user messages with parts
+                part = None
+                if isinstance(content, dict) and "path" in content:
+                    file_path = content.get("path", "")
+                    if isinstance(file_path, str) and file_path.startswith("http") and AppConfig.is_image_file(file_path):
+                        part = {"type": "image_url", "image_url": {"url": file_path}}
                     else:
+                        # Non-image or non-URL: fallback to text description
+                        part = {"type": "text", "text": f"[File: {file_path}]"}
                 elif isinstance(content, (list, tuple)):
+                    part = {"type": "text", "text": f"[List: {str(content)[:50]}...]"}
                 elif content is None:
+                    part = {"type": "text", "text": "[Empty]"}
                 else:
+                    part = {"type": "text", "text": str(content)}
+                if messages and last_role == "user" and isinstance(messages[-1].get("content"), list):
+                    messages[-1]["content"].append(part)
+                elif messages and last_role == "user" and isinstance(messages[-1].get("content"), str):
+                    # Convert existing string content to parts and append
+                    existing_text = messages[-1]["content"]
+                    messages[-1]["content"] = [{"type": "text", "text": existing_text}, part]
+                else:
+                    messages.append({"role": "user", "content": [part]})
+                last_role = "user"
+            elif role == "assistant":
+                # Assistant content remains text for chat.completions API
+                if isinstance(content, dict):
+                    text = f"[Object: {str(content)[:50]}...]"
+                elif isinstance(content, (list, tuple)):
+                    text = f"[List: {str(content)[:50]}...]"
+                elif content is None:
+                    text = "[Empty]"
+                else:
+                    text = str(content)
+                messages.append({"role": "assistant", "content": text})
+                last_role = "assistant"
         return messages