Spaces:

Adilbai
/

Gemini-GAIA-Agent

Running

File size: 26,185 Bytes

import os
import json
import tempfile
import logging
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
import asyncio

import gradio as gr
import pandas as pd
from agent import GeminiGAIAAgent, GAIAQuestion

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class GeminiGAIAApp:
    """
    Gradio application for Gemini-powered GAIA Benchmark Agent
    Hugging Face Agents Course - Unit 4 Final Assignment
    """
    
    def __init__(self):
        self.agent = None
        self.conversation_history = []
        self.current_question_id = 0
        
        # Agent metadata
        self.agent_info = {
            "name": "Gemini GAIA Benchmark Agent",
            "author": "AdilzhanB",
            "course": "Hugging Face Agents Course - Unit 4",
            "model": "Google Gemini 2.0-flash-001",
            "version": "1.0",
            "created": "2025-06-17 15:32:22",
            "capabilities": [
                "Complex multi-step reasoning",
                "Advanced mathematical calculations", 
                "Real-time web search",
                "Multimodal file analysis",
                "Natural language understanding"
            ]
        }
        
        # Huggingface repository link
        self.agent_code_link = "https://huggingface.co/spaces/AdilzhanB/Gemini-GAIA-Agent"
        
    def _initialize_agent(self, api_key: Optional[str] = None):
        """Initialize the Gemini GAIA agent"""
        try:
            self.agent = GeminiGAIAAgent(
                model_name="gemini-2.0-flash-001",
                api_key=api_key,
                temperature=0.1,
                verbose=False
            )
            logger.info("Gemini agent initialized successfully")
            return "✅ Agent initialized successfully!"
        except Exception as e:
            error_msg = f"Failed to initialize agent: {str(e)}"
            logger.error(error_msg)
            self.agent = None
            return f"❌ {error_msg}"
    
    def solve_question(self, 
                      question_text: str, 
                      difficulty_level: int, 
                      uploaded_file, 
                      api_key: Optional[str] = None) -> Tuple[str, str, str, str, str, str]:
        """
        Main function to solve GAIA questions
        
        Returns: (reasoning, tools_used, confidence, processing_time, final_answer, status)
        """
        try:
            # Initialize agent if needed or API key changed
            if not self.agent or (api_key and api_key.strip()):
                init_status = self._initialize_agent(api_key.strip() if api_key else None)
                if "❌" in init_status:
                    return "", "", "", "", "", init_status
            
            if not self.agent:
                return "", "", "", "", "", "❌ Agent not initialized. Please provide a valid Google API key."
            
            if not question_text.strip():
                return "", "", "", "", "", "❌ Please enter a question."
            
            # Handle file upload
            file_path = None
            file_name = None
            if uploaded_file is not None:
                file_path = uploaded_file.name
                file_name = os.path.basename(file_path)
            
            # Create GAIA question
            self.current_question_id += 1
            gaia_question = GAIAQuestion(
                question_id=f"user_question_{self.current_question_id}",
                question=question_text,
                level=difficulty_level,
                file_path=file_path,
                file_name=file_name
            )
            
            # Solve the question
            logger.info(f"Solving question: {question_text[:50]}...")
            result = self.agent.solve_gaia_question(gaia_question)
            
            # Store in conversation history
            self.conversation_history.append({
                "timestamp": datetime.now().isoformat(),
                "question": question_text,
                "result": result
            })
            
            # Extract results
            if result.get("error"):
                return "", "", "", "", "", f"❌ Error: {result.get('agent_response', 'Unknown error')}"
            
            # Format reasoning steps
            reasoning_steps = "\n".join([
                f"{i+1}. {step}" for i, step in enumerate(result.get("reasoning_steps", []))
            ])
            if not reasoning_steps:
                reasoning_steps = "Gemini processed the question using its internal reasoning."
            
            # Format tools used
            tools_used = ", ".join(result.get("tools_used", ["None"]))
            if not tools_used or tools_used == "None":
                tools_used = "Gemini's built-in capabilities"
            
            # Get other metrics
            confidence = f"{result.get('confidence_score', 0.0):.2f}"
            processing_time = f"{result.get('processing_time_seconds', 0):.2f}s"
            final_answer = result.get("agent_response", "No answer generated")
            
            # Success status
            status = f"✅ Question solved successfully! (Model: {result.get('model_used', 'Gemini')})"
            
            logger.info(f"Question solved successfully. Tools: {tools_used}, Confidence: {confidence}")
            
            return (
                reasoning_steps,
                tools_used,
                confidence,
                processing_time,
                final_answer,
                status
            )
            
        except Exception as e:
            error_msg = f"❌ Error solving question: {str(e)}"
            logger.error(error_msg)
            return "", "", "", "", "", error_msg
    
    def get_conversation_history(self) -> str:
        """Get formatted conversation history"""
        if not self.conversation_history:
            return "No questions solved yet. Try asking a GAIA-style question!"
        
        history_text = "## 📚 Recent Conversation History\n\n"
        
        for i, entry in enumerate(self.conversation_history[-5:], 1):  # Show last 5
            result = entry['result']
            
            history_text += f"### Question {i}\n"
            history_text += f"**Asked:** {entry['question'][:150]}...\n"
            history_text += f"**Level:** {result.get('level', 'N/A')}\n"
            history_text += f"**Tools Used:** {', '.join(result.get('tools_used', ['None']))}\n"
            history_text += f"**Confidence:** {result.get('confidence_score', 0):.2f}\n"
            history_text += f"**Answer Preview:** {result.get('agent_response', 'No answer')[:200]}...\n"
            history_text += f"**Time:** {entry['timestamp'][:19]}\n\n"
            history_text += "---\n\n"
        
        return history_text
    
    def clear_history(self) -> str:
        """Clear conversation history"""
        self.conversation_history = []
        self.current_question_id = 0
        return "🗑️ History cleared successfully!"
    
    def test_agent_capabilities(self, api_key: Optional[str] = None) -> str:
        """Test agent and tool capabilities"""
        try:
            # Initialize agent if needed
            if not self.agent or (api_key and api_key.strip()):
                init_status = self._initialize_agent(api_key.strip() if api_key else None)
                if "❌" in init_status:
                    return init_status
            
            if not self.agent:
                return "❌ Agent not initialized. Please provide a valid Google API key."
            
            # Test tools
            tool_results = self.agent.test_tools()
            
            result_text = "## 🔧 Agent Capability Test Results\n\n"
            result_text += f"**Model:** {self.agent.model_name}\n"
            result_text += f"**Status:** {'✅ Initialized' if self.agent.model else '❌ Not initialized'}\n\n"
            
            result_text += "### Tool Test Results\n"
            
            for tool_name, result in tool_results.items():
                status_icon = "✅" if "✅" in result else "❌"
                result_text += f"{status_icon} **{tool_name.title()}**: {result}\n"
            
            result_text += "\n### Available Capabilities\n"
            for capability in self.agent_info["capabilities"]:
                result_text += f"- ✅ {capability}\n"
            
            return result_text
            
        except Exception as e:
            return f"❌ Error testing agent: {str(e)}"
    
    def get_example_question(self, level: int, example_type: str) -> Tuple[str, int]:
        """Get example questions based on level and type"""
        examples = {
            1: {
                "math": "What is the square root of 144?",
                "factual": "What is the capital of Japan?",
                "conversion": "Convert 100 degrees Fahrenheit to Celsius"
            },
            2: {
                "financial": "If I invest $1000 at 5% annual compound interest, how much will I have after 3 years?",
                "current": "What is the current population of Tokyo according to the latest data?",
                "analysis": "Calculate the average temperature if the daily temperatures were 72°F, 75°F, 68°F, and 71°F"
            },
            3: {
                "complex": "Based on current economic indicators, what are the main recession risks for 2024?",
                "research": "Compare the GDP growth rates of the top 5 economies in 2023 and identify key trends",
                "multimodal": "Analyze any uploaded data file and provide insights about patterns and trends"
            }
        }
        
        question = examples.get(level, {}).get(example_type, "What is 2 + 2?")
        return question, level
    
    def create_interface(self):
        """Create the comprehensive Gradio interface"""
        
        # Custom CSS for professional styling
        custom_css = """
        .gradio-container {
            max-width: 1400px !important;
            margin: 0 auto;
        }
        .main-header {
            text-align: center;
            background: linear-gradient(90deg, #4285f4, #34a853, #fbbc05, #ea4335);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            background-clip: text;
            margin-bottom: 20px;
        }
        .info-box {
            background-color: #c0c7cf;
            border-left: 4px solid #4285f4;
            padding: 15px;
            margin: 10px 0;
            border-radius: 5px;
        }
        """
        
        with gr.Blocks(css=custom_css, title="Gemini GAIA Agent", theme=gr.themes.Soft()) as interface:
            
            # Main Header
            gr.HTML("""
                <div class="main-header">
                    <h1>🚀 Gemini GAIA Benchmark Agent</h1>
                </div>
            """)
            
            # Agent Information
            with gr.Row():
                gr.Markdown(f"""
                <div class="info-box">
                    <h3>🤖 Agent Information</h3>
                    <ul>
                        <li><strong>Created by:</strong> {self.agent_info['author']}</li>
                        <li><strong>Course:</strong> {self.agent_info['course']}</li>
                        <li><strong>Model:</strong> {self.agent_info['model']}</li>
                        <li><strong>Version:</strong> {self.agent_info['version']}</li>
                        <li><strong>Date:</strong> {self.agent_info['created']}</li>
                    </ul>
                </div>
                """)
        
            # API Key Configuration
            with gr.Row():
                with gr.Column():
                    api_key_input = gr.Textbox(
                        label="🔑 Google API Key (Required)",
                        placeholder="Enter your Google AI API key here...",
                        type="password",
                        info="Get your free API key from: https://makersuite.google.com/app/apikey"
                    )
                    test_agent_btn = gr.Button("🧪 Test Agent & Tools", variant="secondary")
            
            # Main Question Interface
            gr.Markdown("## 💭 Ask Your GAIA Question")
            
            with gr.Row():
                # Left Panel - Input
                with gr.Column(scale=2):
                    question_input = gr.Textbox(
                        label="📝 Your Question",
                        placeholder="Enter your GAIA-style question here...\n\nExamples:\n- What is the compound interest on $1000 at 5% for 3 years?\n- What is the current population of Tokyo?\n- Analyze the uploaded CSV data and find patterns",
                        lines=4,
                        max_lines=8
                    )
                    
                    with gr.Row():
                        difficulty_slider = gr.Slider(
                            label="🎯 Difficulty Level",
                            minimum=1,
                            maximum=3,
                            value=2,
                            step=1,
                            info="1=Basic | 2=Intermediate | 3=Advanced"
                        )
                        
                        file_upload = gr.File(
                            label="📎 Upload File (Optional)",
                            file_types=[".txt", ".csv", ".json", ".xlsx", ".png", ".jpg", ".jpeg", ".gif", ".pdf"],
                        )
                    
                    solve_button = gr.Button(
                        "🚀 Solve with Gemini",
                        variant="primary",
                        size="lg",
                        scale=2
                    )
                
                # Right Panel - Quick Examples
                with gr.Column(scale=1):
                    gr.Markdown("### 📚 Quick Examples")
                    
                    # Level 1 Examples
                    gr.Markdown("**Level 1 (Basic)**")
                    with gr.Row():
                        math_btn = gr.Button("🧮 Math", size="sm")
                        factual_btn = gr.Button("🌍 Factual", size="sm")
                        convert_btn = gr.Button("🔄 Convert", size="sm")
                    
                    # Level 2 Examples  
                    gr.Markdown("**Level 2 (Intermediate)**")
                    with gr.Row():
                        finance_btn = gr.Button("💰 Finance", size="sm")
                        current_btn = gr.Button("📊 Current", size="sm")
                        analysis_btn = gr.Button("📈 Analysis", size="sm")
                    
                    # Level 3 Examples
                    gr.Markdown("**Level 3 (Advanced)**")
                    with gr.Row():
                        complex_btn = gr.Button("🧠 Complex", size="sm")
                        research_btn = gr.Button("🔬 Research", size="sm")
                        multimodal_btn = gr.Button("🖼️ Multimodal", size="sm")
            
            # Output Section
            gr.Markdown("## 🎯 Agent Response")
            
            with gr.Row():
                # Main Answer
                with gr.Column(scale=2):
                    final_answer_output = gr.Textbox(
                        label="🤖 Gemini's Answer",
                        lines=8,
                        max_lines=15,
                        show_copy_button=True,
                        info="Complete response with reasoning and solution"
                    )
                
                # Metrics
                with gr.Column(scale=1):
                    confidence_output = gr.Textbox(
                        label="📊 Confidence Score",
                        max_lines=1,
                        info="Agent's confidence in the answer"
                    )
                    
                    processing_time_output = gr.Textbox(
                        label="⏱️ Processing Time",
                        max_lines=1,
                        info="Time taken to solve"
                    )
                    
                    tools_used_output = gr.Textbox(
                        label="🔧 Tools Used",
                        max_lines=3,
                        info="Which capabilities were utilized"
                    )
                    
                    status_output = gr.Textbox(
                        label="✅ Status",
                        max_lines=2,
                        info="Execution status and model info"
                    )
            
            # Detailed Reasoning (Expandable)
            with gr.Accordion("🔍 Detailed Reasoning Steps", open=False):
                reasoning_output = gr.Textbox(
                    label="Step-by-Step Reasoning",
                    lines=10,
                    show_copy_button=True,
                    info="Detailed breakdown of the solution process"
                )
            
            # Additional Features Tabs
            with gr.Tabs():
                # Tool Testing Tab
                with gr.TabItem("🛠️ Agent Capabilities"):
                    tool_test_output = gr.Markdown(
                        "Click 'Test Agent & Tools' above to check all capabilities.",
                        elem_classes=["info-box"]
                    )
                    
                    gr.Markdown("""
                    ### 🎯 GAIA Benchmark Capabilities
                    
                    This agent is designed to excel at:
                    
                    - **🧠 Complex Reasoning**: Multi-step logical problem solving
                    - **🧮 Mathematical Operations**: Advanced calculations and financial modeling  
                    - **🔍 Web Search**: Real-time information retrieval using DuckDuckGo
                    - **📄 File Analysis**: Processing text, CSV, JSON, and image files
                    - **🖼️ Multimodal Understanding**: Analyzing images with Gemini's vision capabilities
                    - **📊 Data Processing**: Statistical analysis and pattern recognition
                    """)
                
                # History Tab
                with gr.TabItem("📚 Conversation History"):
                    with gr.Row():
                        refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary")
                        clear_history_btn = gr.Button("🗑️ Clear History", variant="stop")
                    
                    history_output = gr.Markdown(
                        "No questions solved yet. Start by asking a GAIA question!",
                        elem_classes=["info-box"]
                    )
                
                # Documentation Tab
                with gr.TabItem("📖 About GAIA"):
                    gr.Markdown(f"""
                    ### 🎯 What is GAIA?
                    
                    **GAIA (General AI Assistants)** is a comprehensive benchmark designed to evaluate AI assistants on real-world tasks that require:
                    
                    #### 🧠 Core Capabilities Tested
                    - **Reasoning**: Complex multi-step problem solving and logical inference
                    - **Multimodal Understanding**: Processing text, images, documents, and data files
                    - **Web Browsing**: Searching for and utilizing current information
                    - **Tool Use**: Effective integration and use of various computational tools
                    
                    #### 📊 Difficulty Levels
                    - **Level 1**: Basic factual questions and simple reasoning tasks
                    - **Level 2**: Multi-step problems requiring tool integration
                    - **Level 3**: Complex tasks requiring advanced reasoning and multiple tools
                    
                    #### 🚀 This Agent's Approach
                    This implementation uses **Google Gemini 1.5 Pro** for its:
                    - Superior multimodal capabilities (text + images)
                    - Advanced reasoning and problem-solving
                    - Large context window for complex tasks
                    - Built-in safety and reliability features
                    
                    #### 🔗 Technical Details
                    - **Model**: Google Gemini 1.5 Pro
                    - **Framework**: Custom Python implementation
                    - **Tools**: Calculator, Web Search, File Analyzer
                    - **Interface**: Gradio 4.0+
                    - **Author**: {self.agent_info['author']}
                    
                    #### 📚 Resources
                    - [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983)
                    - [GAIA Dataset](https://huggingface.co/datasets/gaia-benchmark/GAIA)
                    - [Google AI Studio](https://makersuite.google.com/)
                    - [Course Repository]({self.agent_code_link})
                    """)
            
            # Wire up all the interactions
            
            # Main solve function
            solve_button.click(
                self.solve_question,
                inputs=[question_input, difficulty_slider, file_upload, api_key_input],
                outputs=[reasoning_output, tools_used_output, confidence_output, 
                        processing_time_output, final_answer_output, status_output]
            )
            
            # Tool testing
            test_agent_btn.click(
                self.test_agent_capabilities,
                inputs=[api_key_input],
                outputs=[tool_test_output]
            )
            
            # History management
            refresh_history_btn.click(
                self.get_conversation_history,
                outputs=[history_output]
            )
            
            clear_history_btn.click(
                self.clear_history,
                outputs=[history_output]
            )
            
            # Example buttons - Level 1
            math_btn.click(
                lambda: self.get_example_question(1, "math"),
                outputs=[question_input, difficulty_slider]
            )
            factual_btn.click(
                lambda: self.get_example_question(1, "factual"),
                outputs=[question_input, difficulty_slider]
            )
            convert_btn.click(
                lambda: self.get_example_question(1, "conversion"),
                outputs=[question_input, difficulty_slider]
            )
            
            # Example buttons - Level 2
            finance_btn.click(
                lambda: self.get_example_question(2, "financial"),
                outputs=[question_input, difficulty_slider]
            )
            current_btn.click(
                lambda: self.get_example_question(2, "current"),
                outputs=[question_input, difficulty_slider]
            )
            analysis_btn.click(
                lambda: self.get_example_question(2, "analysis"),
                outputs=[question_input, difficulty_slider]
            )
            
            # Example buttons - Level 3
            complex_btn.click(
                lambda: self.get_example_question(3, "complex"),
                outputs=[question_input, difficulty_slider]
            )
            research_btn.click(
                lambda: self.get_example_question(3, "research"),
                outputs=[question_input, difficulty_slider]
            )
            multimodal_btn.click(
                lambda: self.get_example_question(3, "multimodal"),
                outputs=[question_input, difficulty_slider]
            )
            
            # Footer
            gr.HTML(f"""
                <div style="text-align: center; margin-top: 40px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
                    <h3>🎓 Hugging Face Agents Course - Unit 4 Final Assignment</h3>
                    <p><strong>Gemini GAIA Benchmark Agent</strong> | Created with ❤️ by {self.agent_info['author']}</p>
                    <p>🔗 <a href="{self.agent_code_link}" target="_blank">View Source Code</a> | 
                       📚 <a href="https://huggingface.co/learn/agents-course" target="_blank">Course Materials</a> |
                       🤖 <a href="https://makersuite.google.com/" target="_blank">Google AI Studio</a></p>
                    <p><em>Powered by Google Gemini 1.5 Pro • Built with Gradio • Current Time (UTC): 2025-06-17 15:32:22</em></p>
                </div>
            """)
        
        return interface

def main():
    """Main function to launch the Gemini GAIA application"""
    
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    
    logger.info("🚀 Starting Gemini GAIA Benchmark Agent Application...")
    
    # Create the application
    app = GeminiGAIAApp()
    interface = app.create_interface()
    
    # Launch configuration for Hugging Face Spaces
    launch_kwargs = {
        "share": True,  # Create public shareable link
        "server_name": "0.0.0.0",  # Allow external connections  
        "server_port": 7860,  # Default Gradio port
        "show_error": True,  # Show errors in UI
        "quiet": False,  # Show startup logs
        "favicon_path": None,  # Custom favicon
        "auth": None,  # No authentication required
    }
    
    logger.info("🌐 Launching Gradio interface...")
    logger.info("🔗 The app will be available at http://localhost:7860")
    
    try:
        interface.launch(**launch_kwargs)
    except Exception as e:
        logger.error(f"❌ Failed to launch application: {str(e)}")
        print("Please check your environment setup and try again.")

if __name__ == "__main__":
    main()