Spaces:

Omniscient001
/

Omniscient

Sleeping

App Files Files Community

Andy Lee commited on Jun 11

Commit

5e47334

1 Parent(s): 1c04950

fix: qwen 2.5 not supported

Browse files

Files changed (2) hide show

app.py +28 -25
config.py +6 -6

app.py CHANGED Viewed

@@ -163,43 +163,44 @@ if start_button:
                                 key=f"history_{i}_{step}",
                             )
-                            # Get AI response
-                            with st.spinner("AI thinking..."):
                                 message = bot._create_message_with_history(
                                     prompt, [h["image_b64"] for h in history]
                                 )
                                 response = bot.model.invoke(message)
                                 decision = bot._parse_agent_response(response)
-                            if not decision:
-                                decision = {
-                                    "action_details": {"action": "PAN_RIGHT"},
-                                    "reasoning": "Fallback",
-                                }
-                            action = decision.get("action_details", {}).get("action")
-                            history[-1]["action"] = action
-                            # Show AI decision
-                            st.write("**AI Reasoning:**")
-                            st.info(decision.get("reasoning", "N/A"))
-                            st.write("**AI Action:**")
-                            st.success(f"`{action}`")
-                            # Show raw response
-                            with st.expander("Raw AI Response"):
-                                st.text(response.content)
-                        # Force guess on last step
-                        if step_num == steps_per_sample and action != "GUESS":
-                            st.warning("Max steps reached. Forcing GUESS.")
-                            action = "GUESS"
                         # Execute action
                         if action == "GUESS":
-                            lat = decision.get("action_details", {}).get("lat")
-                            lon = decision.get("action_details", {}).get("lon")
                             if lat is not None and lon is not None:
                                 final_guess = (lat, lon)
                                 st.success(f"Final Guess: {lat:.4f}, {lon:.4f}")
@@ -213,6 +214,8 @@ if start_button:
                         elif action == "PAN_RIGHT":
                             bot.controller.pan_view("right")
                         time.sleep(1)
                 # Sample Results

                                 key=f"history_{i}_{step}",
                             )
+                            # Force guess on last step or get AI decision
+                            if step_num == steps_per_sample:
+                                action = "GUESS"
+                                st.warning("Max steps reached. Forcing GUESS.")
+                            else:
+                                # Get AI response
                                 message = bot._create_message_with_history(
                                     prompt, [h["image_b64"] for h in history]
                                 )
                                 response = bot.model.invoke(message)
                                 decision = bot._parse_agent_response(response)
+                                action = decision.get("action_details", {}).get(
+                                    "action"
+                                )
+                                history[-1]["action"] = action
+                                # Show AI decision
+                                st.write("**AI Reasoning:**")
+                                st.info(decision.get("reasoning", "N/A"))
+                                st.write("**AI Action:**")
+                                st.success(f"`{action}`")
+                                # Show raw response
+                                with st.expander("Raw AI Response"):
+                                    st.text(response.content)
                         # Execute action
                         if action == "GUESS":
+                            if step_num == steps_per_sample:
+                                # Forced guess - use fallback coordinates
+                                lat, lon = 0.0, 0.0
+                                st.error("Forced guess with fallback coordinates")
+                            else:
+                                lat = decision.get("action_details", {}).get("lat")
+                                lon = decision.get("action_details", {}).get("lon")
                             if lat is not None and lon is not None:
                                 final_guess = (lat, lon)
                                 st.success(f"Final Guess: {lat:.4f}, {lon:.4f}")
                         elif action == "PAN_RIGHT":
                             bot.controller.pan_view("right")
+                        # Auto scroll to bottom
+                        st.empty()  # Force refresh to show latest content
                         time.sleep(1)
                 # Sample Results

config.py CHANGED Viewed

@@ -48,15 +48,15 @@ MODELS_CONFIG = {
         "model_name": "gemini-1.5-pro-latest",
         "description": "Google Gemini 1.5 Pro",
     },
-    "qwen2.5-vl-7b": {
         "class": "HuggingFaceChat",
-        "model_name": "Qwen/Qwen2.5-VL-7B-Instruct",
-        "description": "Qwen2.5-VL 7B Vision-Language",
     },
-    "qwen2.5-vl-3b": {
         "class": "HuggingFaceChat",
-        "model_name": "Qwen/Qwen2.5-VL-3B-Instruct",
-        "description": "Qwen2.5-VL 3B Vision-Language",
     },
 }

         "model_name": "gemini-1.5-pro-latest",
         "description": "Google Gemini 1.5 Pro",
     },
+    "qwen2-vl-7b": {
         "class": "HuggingFaceChat",
+        "model_name": "Qwen/Qwen2-VL-7B-Instruct",
+        "description": "Qwen2-VL 7B (older but API supported)",
     },
+    "qwen2-vl-2b": {
         "class": "HuggingFaceChat",
+        "model_name": "Qwen/Qwen2-VL-2B-Instruct",
+        "description": "Qwen2-VL 2B (faster, API supported)",
     },
 }