Spaces:
Sleeping
Sleeping
add debug message to benchmark mode in hf UI
Browse files- when the output of model can't be successfully parsed, the original model response will be displayed
benchmark mode add failure signal
- app.py +3 -1
- benchmark.py +6 -1
- geo_bot.py +2 -0
app.py
CHANGED
|
@@ -306,7 +306,9 @@ if start_button:
|
|
| 306 |
|
| 307 |
st.write("**AI Reasoning:**")
|
| 308 |
st.info(step_info.get("reasoning", "N/A"))
|
| 309 |
-
|
|
|
|
|
|
|
| 310 |
st.write("**AI Action:**")
|
| 311 |
if action == "GUESS":
|
| 312 |
lat = step_info.get("action_details", {}).get("lat")
|
|
|
|
| 306 |
|
| 307 |
st.write("**AI Reasoning:**")
|
| 308 |
st.info(step_info.get("reasoning", "N/A"))
|
| 309 |
+
if step_info.get("debug_message") != "N/A":
|
| 310 |
+
st.write("**AI Debug Message:**")
|
| 311 |
+
st.code(step_info.get("debug_message"), language="json")
|
| 312 |
st.write("**AI Action:**")
|
| 313 |
if action == "GUESS":
|
| 314 |
lat = step_info.get("action_details", {}).get("lat")
|
benchmark.py
CHANGED
|
@@ -99,6 +99,9 @@ class MapGuesserBenchmark:
|
|
| 99 |
print(f"π Sample {i + 1}/{len(test_samples)}")
|
| 100 |
try:
|
| 101 |
result = self.run_single_test_with_bot(bot, sample)
|
|
|
|
|
|
|
|
|
|
| 102 |
all_results.append(result)
|
| 103 |
|
| 104 |
status = (
|
|
@@ -154,6 +157,8 @@ class MapGuesserBenchmark:
|
|
| 154 |
}
|
| 155 |
|
| 156 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
|
|
|
|
|
|
| 157 |
inference_time = time.time() - start_time
|
| 158 |
|
| 159 |
true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
|
|
@@ -163,7 +168,7 @@ class MapGuesserBenchmark:
|
|
| 163 |
print(f"π True coords: {true_coords}")
|
| 164 |
print(f"π Predicted coords: {predicted_lat_lon}")
|
| 165 |
distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
|
| 166 |
-
|
| 167 |
is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
|
| 168 |
|
| 169 |
return {
|
|
|
|
| 99 |
print(f"π Sample {i + 1}/{len(test_samples)}")
|
| 100 |
try:
|
| 101 |
result = self.run_single_test_with_bot(bot, sample)
|
| 102 |
+
if result is None:
|
| 103 |
+
print(f"β Sample_{i+1} test failed: No predicted coords")
|
| 104 |
+
continue
|
| 105 |
all_results.append(result)
|
| 106 |
|
| 107 |
status = (
|
|
|
|
| 157 |
}
|
| 158 |
|
| 159 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
| 160 |
+
if predicted_lat_lon is None:
|
| 161 |
+
return None
|
| 162 |
inference_time = time.time() - start_time
|
| 163 |
|
| 164 |
true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
|
|
|
|
| 168 |
print(f"π True coords: {true_coords}")
|
| 169 |
print(f"π Predicted coords: {predicted_lat_lon}")
|
| 170 |
distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
|
| 171 |
+
|
| 172 |
is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
|
| 173 |
|
| 174 |
return {
|
geo_bot.py
CHANGED
|
@@ -250,6 +250,7 @@ class GeoBot:
|
|
| 250 |
decision = {
|
| 251 |
"reasoning": "Recovery due to parsing failure or model error.",
|
| 252 |
"action_details": {"action": "PAN_RIGHT"},
|
|
|
|
| 253 |
}
|
| 254 |
|
| 255 |
return decision
|
|
@@ -347,6 +348,7 @@ class GeoBot:
|
|
| 347 |
"reasoning": decision.get("reasoning", "N/A"),
|
| 348 |
"action_details": decision.get("action_details", {"action": "N/A"}),
|
| 349 |
"history": history.copy(), # History up to current step (excluding current)
|
|
|
|
| 350 |
}
|
| 351 |
|
| 352 |
action_details = decision.get("action_details", {})
|
|
|
|
| 250 |
decision = {
|
| 251 |
"reasoning": "Recovery due to parsing failure or model error.",
|
| 252 |
"action_details": {"action": "PAN_RIGHT"},
|
| 253 |
+
"debug_message": f"{response.content.strip()}",
|
| 254 |
}
|
| 255 |
|
| 256 |
return decision
|
|
|
|
| 348 |
"reasoning": decision.get("reasoning", "N/A"),
|
| 349 |
"action_details": decision.get("action_details", {"action": "N/A"}),
|
| 350 |
"history": history.copy(), # History up to current step (excluding current)
|
| 351 |
+
"debug_message": decision.get("debug_message", "N/A"),
|
| 352 |
}
|
| 353 |
|
| 354 |
action_details = decision.get("action_details", {})
|