Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
"""
|
| 3 |
GAIA Benchmark AI Agent - Hugging Face Space
|
| 4 |
============================================
|
| 5 |
-
|
| 6 |
A Gradio-based web interface for running GAIA benchmark evaluations
|
| 7 |
on Hugging Face Spaces with GPU acceleration.
|
| 8 |
"""
|
|
@@ -45,16 +44,6 @@ logging.basicConfig(level=logging.INFO)
|
|
| 45 |
logger = logging.getLogger(__name__)
|
| 46 |
|
| 47 |
# ================================
|
| 48 |
-
# MAIN APPLICATION
|
| 49 |
-
# ================================
|
| 50 |
-
|
| 51 |
-
if __name__ == "__main__":
|
| 52 |
-
app = create_gaia_app()
|
| 53 |
-
app.launch(
|
| 54 |
-
server_name="0.0.0.0",
|
| 55 |
-
server_port=7860,
|
| 56 |
-
share=False
|
| 57 |
-
)
|
| 58 |
# CORE DATA STRUCTURES
|
| 59 |
# ================================
|
| 60 |
|
|
@@ -90,9 +79,7 @@ class GAIAPromptManager:
|
|
| 90 |
"""Manages GAIA-specific prompting and formatting"""
|
| 91 |
|
| 92 |
GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
| 93 |
-
|
| 94 |
FINAL ANSWER: [YOUR FINAL ANSWER]
|
| 95 |
-
|
| 96 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
| 97 |
|
| 98 |
@staticmethod
|
|
@@ -453,18 +440,15 @@ class GAIASpaceAgent:
|
|
| 453 |
|
| 454 |
summary = f"""
|
| 455 |
# 📊 GAIA Evaluation Summary
|
| 456 |
-
|
| 457 |
## Overall Statistics
|
| 458 |
- **Total Questions**: {total}
|
| 459 |
- **Successful**: {successful}
|
| 460 |
- **Errors**: {errors}
|
| 461 |
- **Success Rate**: {(successful/total*100):.1f}%
|
| 462 |
-
|
| 463 |
## Performance Metrics
|
| 464 |
- **Average Processing Time**: {avg_time:.2f}s
|
| 465 |
- **Total Processing Time**: {total_time:.2f}s
|
| 466 |
- **Questions per Minute**: {(total/(total_time/60)):.1f}
|
| 467 |
-
|
| 468 |
## Model Information
|
| 469 |
- **Model**: {self.current_model}
|
| 470 |
- **Device**: {self.model_manager.device.upper() if self.model_manager else 'Unknown'}
|
|
@@ -480,17 +464,11 @@ class GAIASpaceAgent:
|
|
| 480 |
|
| 481 |
detailed += f"""
|
| 482 |
## Question {i}: {question.task_id} {status}
|
| 483 |
-
|
| 484 |
**Question**: {question.question}
|
| 485 |
-
|
| 486 |
**Model Answer**: {result.final_answer}
|
| 487 |
-
|
| 488 |
**Expected Answer**: {question.final_answer if question.final_answer else 'N/A'}
|
| 489 |
-
|
| 490 |
**Processing Time**: {result.processing_time:.2f}s
|
| 491 |
-
|
| 492 |
**Level**: {question.level}
|
| 493 |
-
|
| 494 |
---
|
| 495 |
"""
|
| 496 |
|
|
@@ -850,3 +828,14 @@ def create_gaia_app():
|
|
| 850 |
|
| 851 |
return app
|
| 852 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"""
|
| 3 |
GAIA Benchmark AI Agent - Hugging Face Space
|
| 4 |
============================================
|
|
|
|
| 5 |
A Gradio-based web interface for running GAIA benchmark evaluations
|
| 6 |
on Hugging Face Spaces with GPU acceleration.
|
| 7 |
"""
|
|
|
|
| 44 |
logger = logging.getLogger(__name__)
|
| 45 |
|
| 46 |
# ================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# CORE DATA STRUCTURES
|
| 48 |
# ================================
|
| 49 |
|
|
|
|
| 79 |
"""Manages GAIA-specific prompting and formatting"""
|
| 80 |
|
| 81 |
GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
|
|
|
| 82 |
FINAL ANSWER: [YOUR FINAL ANSWER]
|
|
|
|
| 83 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
| 84 |
|
| 85 |
@staticmethod
|
|
|
|
| 440 |
|
| 441 |
summary = f"""
|
| 442 |
# 📊 GAIA Evaluation Summary
|
|
|
|
| 443 |
## Overall Statistics
|
| 444 |
- **Total Questions**: {total}
|
| 445 |
- **Successful**: {successful}
|
| 446 |
- **Errors**: {errors}
|
| 447 |
- **Success Rate**: {(successful/total*100):.1f}%
|
|
|
|
| 448 |
## Performance Metrics
|
| 449 |
- **Average Processing Time**: {avg_time:.2f}s
|
| 450 |
- **Total Processing Time**: {total_time:.2f}s
|
| 451 |
- **Questions per Minute**: {(total/(total_time/60)):.1f}
|
|
|
|
| 452 |
## Model Information
|
| 453 |
- **Model**: {self.current_model}
|
| 454 |
- **Device**: {self.model_manager.device.upper() if self.model_manager else 'Unknown'}
|
|
|
|
| 464 |
|
| 465 |
detailed += f"""
|
| 466 |
## Question {i}: {question.task_id} {status}
|
|
|
|
| 467 |
**Question**: {question.question}
|
|
|
|
| 468 |
**Model Answer**: {result.final_answer}
|
|
|
|
| 469 |
**Expected Answer**: {question.final_answer if question.final_answer else 'N/A'}
|
|
|
|
| 470 |
**Processing Time**: {result.processing_time:.2f}s
|
|
|
|
| 471 |
**Level**: {question.level}
|
|
|
|
| 472 |
---
|
| 473 |
"""
|
| 474 |
|
|
|
|
| 828 |
|
| 829 |
return app
|
| 830 |
|
| 831 |
+
# ================================
|
| 832 |
+
# MAIN APPLICATION
|
| 833 |
+
# ================================
|
| 834 |
+
|
| 835 |
+
if __name__ == "__main__":
|
| 836 |
+
app = create_gaia_app()
|
| 837 |
+
app.launch(
|
| 838 |
+
server_name="0.0.0.0",
|
| 839 |
+
server_port=7860,
|
| 840 |
+
share=False
|
| 841 |
+
)
|