Spaces:

Adilbai
/

Gemini-GAIA-Agent

Running

App Files Files Community

Gemini-GAIA-Agent / app.py

Adilbai

Update app.py

2a38c5a verified 5 months ago

raw

history blame contribute delete

26.2 kB

	import os
	import json
	import tempfile
	import logging
	from typing import Dict, List, Any, Optional, Tuple
	from datetime import datetime
	import asyncio

	import gradio as gr
	import pandas as pd
	from agent import GeminiGAIAAgent, GAIAQuestion

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class GeminiGAIAApp:
	"""
	Gradio application for Gemini-powered GAIA Benchmark Agent
	Hugging Face Agents Course - Unit 4 Final Assignment
	"""

	def __init__(self):
	self.agent = None
	self.conversation_history = []
	self.current_question_id = 0

	# Agent metadata
	self.agent_info = {
	"name": "Gemini GAIA Benchmark Agent",
	"author": "AdilzhanB",
	"course": "Hugging Face Agents Course - Unit 4",
	"model": "Google Gemini 2.0-flash-001",
	"version": "1.0",
	"created": "2025-06-17 15:32:22",
	"capabilities": [
	"Complex multi-step reasoning",
	"Advanced mathematical calculations",
	"Real-time web search",
	"Multimodal file analysis",
	"Natural language understanding"
	]
	}

	# Huggingface repository link
	self.agent_code_link = "https://huggingface.co/spaces/AdilzhanB/Gemini-GAIA-Agent"

	def _initialize_agent(self, api_key: Optional[str] = None):
	"""Initialize the Gemini GAIA agent"""
	try:
	self.agent = GeminiGAIAAgent(
	model_name="gemini-2.0-flash-001",
	api_key=api_key,
	temperature=0.1,
	verbose=False
	)
	logger.info("Gemini agent initialized successfully")
	return "✅ Agent initialized successfully!"
	except Exception as e:
	error_msg = f"Failed to initialize agent: {str(e)}"
	logger.error(error_msg)
	self.agent = None
	return f"❌ {error_msg}"

	def solve_question(self,
	question_text: str,
	difficulty_level: int,
	uploaded_file,
	api_key: Optional[str] = None) -> Tuple[str, str, str, str, str, str]:
	"""
	Main function to solve GAIA questions

	Returns: (reasoning, tools_used, confidence, processing_time, final_answer, status)
	"""
	try:
	# Initialize agent if needed or API key changed
	if not self.agent or (api_key and api_key.strip()):
	init_status = self._initialize_agent(api_key.strip() if api_key else None)
	if "❌" in init_status:
	return "", "", "", "", "", init_status

	if not self.agent:
	return "", "", "", "", "", "❌ Agent not initialized. Please provide a valid Google API key."

	if not question_text.strip():
	return "", "", "", "", "", "❌ Please enter a question."

	# Handle file upload
	file_path = None
	file_name = None
	if uploaded_file is not None:
	file_path = uploaded_file.name
	file_name = os.path.basename(file_path)

	# Create GAIA question
	self.current_question_id += 1
	gaia_question = GAIAQuestion(
	question_id=f"user_question_{self.current_question_id}",
	question=question_text,
	level=difficulty_level,
	file_path=file_path,
	file_name=file_name
	)

	# Solve the question
	logger.info(f"Solving question: {question_text[:50]}...")
	result = self.agent.solve_gaia_question(gaia_question)

	# Store in conversation history
	self.conversation_history.append({
	"timestamp": datetime.now().isoformat(),
	"question": question_text,
	"result": result
	})

	# Extract results
	if result.get("error"):
	return "", "", "", "", "", f"❌ Error: {result.get('agent_response', 'Unknown error')}"

	# Format reasoning steps
	reasoning_steps = "\n".join([
	f"{i+1}. {step}" for i, step in enumerate(result.get("reasoning_steps", []))
	])
	if not reasoning_steps:
	reasoning_steps = "Gemini processed the question using its internal reasoning."

	# Format tools used
	tools_used = ", ".join(result.get("tools_used", ["None"]))
	if not tools_used or tools_used == "None":
	tools_used = "Gemini's built-in capabilities"

	# Get other metrics
	confidence = f"{result.get('confidence_score', 0.0):.2f}"
	processing_time = f"{result.get('processing_time_seconds', 0):.2f}s"
	final_answer = result.get("agent_response", "No answer generated")

	# Success status
	status = f"✅ Question solved successfully! (Model: {result.get('model_used', 'Gemini')})"

	logger.info(f"Question solved successfully. Tools: {tools_used}, Confidence: {confidence}")

	return (
	reasoning_steps,
	tools_used,
	confidence,
	processing_time,
	final_answer,
	status
	)

	except Exception as e:
	error_msg = f"❌ Error solving question: {str(e)}"
	logger.error(error_msg)
	return "", "", "", "", "", error_msg

	def get_conversation_history(self) -> str:
	"""Get formatted conversation history"""
	if not self.conversation_history:
	return "No questions solved yet. Try asking a GAIA-style question!"

	history_text = "## 📚 Recent Conversation History\n\n"

	for i, entry in enumerate(self.conversation_history[-5:], 1): # Show last 5
	result = entry['result']

	history_text += f"### Question {i}\n"
	history_text += f"Asked: {entry['question'][:150]}...\n"
	history_text += f"Level: {result.get('level', 'N/A')}\n"
	history_text += f"Tools Used: {', '.join(result.get('tools_used', ['None']))}\n"
	history_text += f"Confidence: {result.get('confidence_score', 0):.2f}\n"
	history_text += f"Answer Preview: {result.get('agent_response', 'No answer')[:200]}...\n"
	history_text += f"Time: {entry['timestamp'][:19]}\n\n"
	history_text += "---\n\n"

	return history_text

	def clear_history(self) -> str:
	"""Clear conversation history"""
	self.conversation_history = []
	self.current_question_id = 0
	return "🗑️ History cleared successfully!"

	def test_agent_capabilities(self, api_key: Optional[str] = None) -> str:
	"""Test agent and tool capabilities"""
	try:
	# Initialize agent if needed
	if not self.agent or (api_key and api_key.strip()):
	init_status = self._initialize_agent(api_key.strip() if api_key else None)
	if "❌" in init_status:
	return init_status

	if not self.agent:
	return "❌ Agent not initialized. Please provide a valid Google API key."

	# Test tools
	tool_results = self.agent.test_tools()

	result_text = "## 🔧 Agent Capability Test Results\n\n"
	result_text += f"Model: {self.agent.model_name}\n"
	result_text += f"Status: {'✅ Initialized' if self.agent.model else '❌ Not initialized'}\n\n"

	result_text += "### Tool Test Results\n"

	for tool_name, result in tool_results.items():
	status_icon = "✅" if "✅" in result else "❌"
	result_text += f"{status_icon} {tool_name.title()}: {result}\n"

	result_text += "\n### Available Capabilities\n"
	for capability in self.agent_info["capabilities"]:
	result_text += f"- ✅ {capability}\n"

	return result_text

	except Exception as e:
	return f"❌ Error testing agent: {str(e)}"

	def get_example_question(self, level: int, example_type: str) -> Tuple[str, int]:
	"""Get example questions based on level and type"""
	examples = {
	1: {
	"math": "What is the square root of 144?",
	"factual": "What is the capital of Japan?",
	"conversion": "Convert 100 degrees Fahrenheit to Celsius"
	},
	2: {
	"financial": "If I invest $1000 at 5% annual compound interest, how much will I have after 3 years?",
	"current": "What is the current population of Tokyo according to the latest data?",
	"analysis": "Calculate the average temperature if the daily temperatures were 72°F, 75°F, 68°F, and 71°F"
	},
	3: {
	"complex": "Based on current economic indicators, what are the main recession risks for 2024?",
	"research": "Compare the GDP growth rates of the top 5 economies in 2023 and identify key trends",
	"multimodal": "Analyze any uploaded data file and provide insights about patterns and trends"
	}
	}

	question = examples.get(level, {}).get(example_type, "What is 2 + 2?")
	return question, level

	def create_interface(self):
	"""Create the comprehensive Gradio interface"""

	# Custom CSS for professional styling
	custom_css = """
	.gradio-container {
	max-width: 1400px !important;
	margin: 0 auto;
	}
	.main-header {
	text-align: center;
	background: linear-gradient(90deg, #4285f4, #34a853, #fbbc05, #ea4335);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	margin-bottom: 20px;
	}
	.info-box {
	background-color: #c0c7cf;
	border-left: 4px solid #4285f4;
	padding: 15px;
	margin: 10px 0;
	border-radius: 5px;
	}
	"""

	with gr.Blocks(css=custom_css, title="Gemini GAIA Agent", theme=gr.themes.Soft()) as interface:

	# Main Header
	gr.HTML("""
	<div class="main-header">
	<h1>🚀 Gemini GAIA Benchmark Agent</h1>
	</div>
	""")

	# Agent Information
	with gr.Row():
	gr.Markdown(f"""
	<div class="info-box">
	<h3>🤖 Agent Information</h3>
	<ul>
	<li><strong>Created by:</strong> {self.agent_info['author']}</li>
	<li><strong>Course:</strong> {self.agent_info['course']}</li>
	<li><strong>Model:</strong> {self.agent_info['model']}</li>
	<li><strong>Version:</strong> {self.agent_info['version']}</li>
	<li><strong>Date:</strong> {self.agent_info['created']}</li>
	</ul>
	</div>
	""")

	# API Key Configuration
	with gr.Row():
	with gr.Column():
	api_key_input = gr.Textbox(
	label="🔑 Google API Key (Required)",
	placeholder="Enter your Google AI API key here...",
	type="password",
	info="Get your free API key from: https://makersuite.google.com/app/apikey"
	)
	test_agent_btn = gr.Button("🧪 Test Agent & Tools", variant="secondary")

	# Main Question Interface
	gr.Markdown("## 💭 Ask Your GAIA Question")

	with gr.Row():
	# Left Panel - Input
	with gr.Column(scale=2):
	question_input = gr.Textbox(
	label="📝 Your Question",
	placeholder="Enter your GAIA-style question here...\n\nExamples:\n- What is the compound interest on $1000 at 5% for 3 years?\n- What is the current population of Tokyo?\n- Analyze the uploaded CSV data and find patterns",
	lines=4,
	max_lines=8
	)

	with gr.Row():
	difficulty_slider = gr.Slider(
	label="🎯 Difficulty Level",
	minimum=1,
	maximum=3,
	value=2,
	step=1,
	info="1=Basic \| 2=Intermediate \| 3=Advanced"
	)

	file_upload = gr.File(
	label="📎 Upload File (Optional)",
	file_types=[".txt", ".csv", ".json", ".xlsx", ".png", ".jpg", ".jpeg", ".gif", ".pdf"],
	)

	solve_button = gr.Button(
	"🚀 Solve with Gemini",
	variant="primary",
	size="lg",
	scale=2
	)

	# Right Panel - Quick Examples
	with gr.Column(scale=1):
	gr.Markdown("### 📚 Quick Examples")

	# Level 1 Examples
	gr.Markdown("Level 1 (Basic)")
	with gr.Row():
	math_btn = gr.Button("🧮 Math", size="sm")
	factual_btn = gr.Button("🌍 Factual", size="sm")
	convert_btn = gr.Button("🔄 Convert", size="sm")

	# Level 2 Examples
	gr.Markdown("Level 2 (Intermediate)")
	with gr.Row():
	finance_btn = gr.Button("💰 Finance", size="sm")
	current_btn = gr.Button("📊 Current", size="sm")
	analysis_btn = gr.Button("📈 Analysis", size="sm")

	# Level 3 Examples
	gr.Markdown("Level 3 (Advanced)")
	with gr.Row():
	complex_btn = gr.Button("🧠 Complex", size="sm")
	research_btn = gr.Button("🔬 Research", size="sm")
	multimodal_btn = gr.Button("🖼️ Multimodal", size="sm")

	# Output Section
	gr.Markdown("## 🎯 Agent Response")

	with gr.Row():
	# Main Answer
	with gr.Column(scale=2):
	final_answer_output = gr.Textbox(
	label="🤖 Gemini's Answer",
	lines=8,
	max_lines=15,
	show_copy_button=True,
	info="Complete response with reasoning and solution"
	)

	# Metrics
	with gr.Column(scale=1):
	confidence_output = gr.Textbox(
	label="📊 Confidence Score",
	max_lines=1,
	info="Agent's confidence in the answer"
	)

	processing_time_output = gr.Textbox(
	label="⏱️ Processing Time",
	max_lines=1,
	info="Time taken to solve"
	)

	tools_used_output = gr.Textbox(
	label="🔧 Tools Used",
	max_lines=3,
	info="Which capabilities were utilized"
	)

	status_output = gr.Textbox(
	label="✅ Status",
	max_lines=2,
	info="Execution status and model info"
	)

	# Detailed Reasoning (Expandable)
	with gr.Accordion("🔍 Detailed Reasoning Steps", open=False):
	reasoning_output = gr.Textbox(
	label="Step-by-Step Reasoning",
	lines=10,
	show_copy_button=True,
	info="Detailed breakdown of the solution process"
	)

	# Additional Features Tabs
	with gr.Tabs():
	# Tool Testing Tab
	with gr.TabItem("🛠️ Agent Capabilities"):
	tool_test_output = gr.Markdown(
	"Click 'Test Agent & Tools' above to check all capabilities.",
	elem_classes=["info-box"]
	)

	gr.Markdown("""
	### 🎯 GAIA Benchmark Capabilities

	This agent is designed to excel at:

	- 🧠 Complex Reasoning: Multi-step logical problem solving
	- 🧮 Mathematical Operations: Advanced calculations and financial modeling
	- 🔍 Web Search: Real-time information retrieval using DuckDuckGo
	- 📄 File Analysis: Processing text, CSV, JSON, and image files
	- 🖼️ Multimodal Understanding: Analyzing images with Gemini's vision capabilities
	- 📊 Data Processing: Statistical analysis and pattern recognition
	""")

	# History Tab
	with gr.TabItem("📚 Conversation History"):
	with gr.Row():
	refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary")
	clear_history_btn = gr.Button("🗑️ Clear History", variant="stop")

	history_output = gr.Markdown(
	"No questions solved yet. Start by asking a GAIA question!",
	elem_classes=["info-box"]
	)

	# Documentation Tab
	with gr.TabItem("📖 About GAIA"):
	gr.Markdown(f"""
	### 🎯 What is GAIA?

	GAIA (General AI Assistants) is a comprehensive benchmark designed to evaluate AI assistants on real-world tasks that require:

	#### 🧠 Core Capabilities Tested
	- Reasoning: Complex multi-step problem solving and logical inference
	- Multimodal Understanding: Processing text, images, documents, and data files
	- Web Browsing: Searching for and utilizing current information
	- Tool Use: Effective integration and use of various computational tools

	#### 📊 Difficulty Levels
	- Level 1: Basic factual questions and simple reasoning tasks
	- Level 2: Multi-step problems requiring tool integration
	- Level 3: Complex tasks requiring advanced reasoning and multiple tools

	#### 🚀 This Agent's Approach
	This implementation uses Google Gemini 1.5 Pro for its:
	- Superior multimodal capabilities (text + images)
	- Advanced reasoning and problem-solving
	- Large context window for complex tasks
	- Built-in safety and reliability features

	#### 🔗 Technical Details
	- Model: Google Gemini 1.5 Pro
	- Framework: Custom Python implementation
	- Tools: Calculator, Web Search, File Analyzer
	- Interface: Gradio 4.0+
	- Author: {self.agent_info['author']}

	#### 📚 Resources
	- [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983)
	- [GAIA Dataset](https://huggingface.co/datasets/gaia-benchmark/GAIA)
	- [Google AI Studio](https://makersuite.google.com/)
	- [Course Repository]({self.agent_code_link})
	""")

	# Wire up all the interactions

	# Main solve function
	solve_button.click(
	self.solve_question,
	inputs=[question_input, difficulty_slider, file_upload, api_key_input],
	outputs=[reasoning_output, tools_used_output, confidence_output,
	processing_time_output, final_answer_output, status_output]
	)

	# Tool testing
	test_agent_btn.click(
	self.test_agent_capabilities,
	inputs=[api_key_input],
	outputs=[tool_test_output]
	)

	# History management
	refresh_history_btn.click(
	self.get_conversation_history,
	outputs=[history_output]
	)

	clear_history_btn.click(
	self.clear_history,
	outputs=[history_output]
	)

	# Example buttons - Level 1
	math_btn.click(
	lambda: self.get_example_question(1, "math"),
	outputs=[question_input, difficulty_slider]
	)
	factual_btn.click(
	lambda: self.get_example_question(1, "factual"),
	outputs=[question_input, difficulty_slider]
	)
	convert_btn.click(
	lambda: self.get_example_question(1, "conversion"),
	outputs=[question_input, difficulty_slider]
	)

	# Example buttons - Level 2
	finance_btn.click(
	lambda: self.get_example_question(2, "financial"),
	outputs=[question_input, difficulty_slider]
	)
	current_btn.click(
	lambda: self.get_example_question(2, "current"),
	outputs=[question_input, difficulty_slider]
	)
	analysis_btn.click(
	lambda: self.get_example_question(2, "analysis"),
	outputs=[question_input, difficulty_slider]
	)

	# Example buttons - Level 3
	complex_btn.click(
	lambda: self.get_example_question(3, "complex"),
	outputs=[question_input, difficulty_slider]
	)
	research_btn.click(
	lambda: self.get_example_question(3, "research"),
	outputs=[question_input, difficulty_slider]
	)
	multimodal_btn.click(
	lambda: self.get_example_question(3, "multimodal"),
	outputs=[question_input, difficulty_slider]
	)

	# Footer
	gr.HTML(f"""
	<div style="text-align: center; margin-top: 40px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
	<h3>🎓 Hugging Face Agents Course - Unit 4 Final Assignment</h3>
	<p><strong>Gemini GAIA Benchmark Agent</strong> \| Created with ❤️ by {self.agent_info['author']}</p>
	<p>🔗 <a href="{self.agent_code_link}" target="_blank">View Source Code</a> \|
	📚 <a href="https://huggingface.co/learn/agents-course" target="_blank">Course Materials</a> \|
	🤖 <a href="https://makersuite.google.com/" target="_blank">Google AI Studio</a></p>
	<p><em>Powered by Google Gemini 1.5 Pro • Built with Gradio • Current Time (UTC): 2025-06-17 15:32:22</em></p>
	</div>
	""")

	return interface

	def main():
	"""Main function to launch the Gemini GAIA application"""

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)

	logger.info("🚀 Starting Gemini GAIA Benchmark Agent Application...")

	# Create the application
	app = GeminiGAIAApp()
	interface = app.create_interface()

	# Launch configuration for Hugging Face Spaces
	launch_kwargs = {
	"share": True, # Create public shareable link
	"server_name": "0.0.0.0", # Allow external connections
	"server_port": 7860, # Default Gradio port
	"show_error": True, # Show errors in UI
	"quiet": False, # Show startup logs
	"favicon_path": None, # Custom favicon
	"auth": None, # No authentication required
	}

	logger.info("🌐 Launching Gradio interface...")
	logger.info("🔗 The app will be available at http://localhost:7860")

	try:
	interface.launch(**launch_kwargs)
	except Exception as e:
	logger.error(f"❌ Failed to launch application: {str(e)}")
	print("Please check your environment setup and try again.")

	if __name__ == "__main__":
	main()