AdilzhanB commited on
Commit
abfc6f8
·
1 Parent(s): 4f6bec9
Files changed (4) hide show
  1. agent.py +829 -0
  2. app.py +603 -0
  3. config.yaml +0 -0
  4. requirements.txt +28 -0
agent.py ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import Dict, List, Any, Optional, Union
5
+ from datetime import datetime
6
+ import asyncio
7
+ import base64
8
+ from io import BytesIO
9
+
10
+ import google.generativeai as genai
11
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
12
+ from PIL import Image
13
+ import pandas as pd
14
+ import numpy as np
15
+ import requests
16
+ from duckduckgo_search import DDGS
17
+ import tempfile
18
+ from pathlib import Path
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class GAIAQuestion:
25
+ """GAIA benchmark question structure"""
26
+
27
+ def __init__(self, question_id: str, question: str, level: int,
28
+ final_answer: Optional[str] = None, file_name: Optional[str] = None,
29
+ file_path: Optional[str] = None, annotator_metadata: Optional[Dict] = None):
30
+ self.question_id = question_id
31
+ self.question = question
32
+ self.level = level
33
+ self.final_answer = final_answer
34
+ self.file_name = file_name
35
+ self.file_path = file_path
36
+ self.annotator_metadata = annotator_metadata
37
+
38
+ class GeminiTool:
39
+ """Base class for Gemini agent tools"""
40
+
41
+ def __init__(self, name: str, description: str):
42
+ self.name = name
43
+ self.description = description
44
+
45
+ def execute(self, input_data: str) -> str:
46
+ raise NotImplementedError
47
+
48
+ class CalculatorTool(GeminiTool):
49
+ """Advanced calculator tool for mathematical operations"""
50
+
51
+ def __init__(self):
52
+ super().__init__(
53
+ name="calculator",
54
+ description="""
55
+ Performs mathematical calculations including:
56
+ - Basic arithmetic (+, -, *, /, %)
57
+ - Advanced math (sqrt, log, sin, cos, tan, exp, etc.)
58
+ - Financial calculations (compound interest, annuities, etc.)
59
+ - Statistical operations (mean, median, std, etc.)
60
+
61
+ Examples:
62
+ - "sqrt(144)" → 12
63
+ - "log(100)" → 2.0 (base 10)
64
+ - "sin(pi/2)" → 1.0
65
+ - "compound_interest(1000, 0.05, 3)" → compound interest calculation
66
+ """
67
+ )
68
+
69
+ def execute(self, expression: str) -> str:
70
+ try:
71
+ import math
72
+ import statistics
73
+
74
+ # Enhanced safe evaluation environment
75
+ safe_dict = {
76
+ "__builtins__": {},
77
+ # Basic operations
78
+ "abs": abs, "round": round, "min": min, "max": max,
79
+ "sum": sum, "pow": pow, "divmod": divmod,
80
+
81
+ # Math functions
82
+ "sqrt": math.sqrt, "log": math.log, "log10": math.log10,
83
+ "ln": math.log, "exp": math.exp,
84
+ "sin": math.sin, "cos": math.cos, "tan": math.tan,
85
+ "asin": math.asin, "acos": math.acos, "atan": math.atan,
86
+ "sinh": math.sinh, "cosh": math.cosh, "tanh": math.tanh,
87
+ "pi": math.pi, "e": math.e,
88
+ "floor": math.floor, "ceil": math.ceil,
89
+ "factorial": math.factorial, "gcd": math.gcd,
90
+
91
+ # Statistical functions
92
+ "mean": statistics.mean, "median": statistics.median,
93
+ "mode": statistics.mode, "stdev": statistics.stdev,
94
+
95
+ # Financial functions
96
+ "compound_interest": self._compound_interest,
97
+ "simple_interest": self._simple_interest,
98
+ "present_value": self._present_value,
99
+ "future_value": self._future_value,
100
+ }
101
+
102
+ # Handle special financial calculations
103
+ if "compound_interest" in expression.lower():
104
+ return self._handle_financial_calculation(expression)
105
+
106
+ # Evaluate the expression safely
107
+ result = eval(expression, safe_dict)
108
+
109
+ return f"Calculation result: {result}"
110
+
111
+ except Exception as e:
112
+ return f"Calculation error: {str(e)}. Please check your mathematical expression."
113
+
114
+ def _compound_interest(self, principal: float, rate: float, time: float, n: int = 1) -> float:
115
+ """Calculate compound interest: A = P(1 + r/n)^(nt)"""
116
+ return principal * (1 + rate/n) ** (n * time)
117
+
118
+ def _simple_interest(self, principal: float, rate: float, time: float) -> float:
119
+ """Calculate simple interest: A = P(1 + rt)"""
120
+ return principal * (1 + rate * time)
121
+
122
+ def _present_value(self, future_value: float, rate: float, time: float) -> float:
123
+ """Calculate present value: PV = FV / (1 + r)^t"""
124
+ return future_value / (1 + rate) ** time
125
+
126
+ def _future_value(self, present_value: float, rate: float, time: float) -> float:
127
+ """Calculate future value: FV = PV * (1 + r)^t"""
128
+ return present_value * (1 + rate) ** time
129
+
130
+ def _handle_financial_calculation(self, expression: str) -> str:
131
+ """Handle complex financial calculations"""
132
+ try:
133
+ # Parse common financial calculation patterns
134
+ if "compound" in expression.lower():
135
+ # Extract parameters from natural language
136
+ # This is a simplified parser - in production, you'd use more sophisticated NLP
137
+ import re
138
+
139
+ # Look for patterns like "1000 at 5% for 3 years"
140
+ money_pattern = r'\$?(\d+(?:\.\d+)?)'
141
+ rate_pattern = r'(\d+(?:\.\d+)?)%'
142
+ time_pattern = r'(\d+(?:\.\d+)?)\s*years?'
143
+
144
+ money_match = re.search(money_pattern, expression)
145
+ rate_match = re.search(rate_pattern, expression)
146
+ time_match = re.search(time_pattern, expression)
147
+
148
+ if money_match and rate_match and time_match:
149
+ principal = float(money_match.group(1))
150
+ rate = float(rate_match.group(1)) / 100 # Convert percentage
151
+ time = float(time_match.group(1))
152
+
153
+ # Default to annual compounding
154
+ n = 12 if "monthly" in expression.lower() else 1
155
+
156
+ result = self._compound_interest(principal, rate, time, n)
157
+
158
+ return f"""
159
+ Financial Calculation - Compound Interest:
160
+ - Principal: ${principal:,.2f}
161
+ - Interest Rate: {rate*100}% per year
162
+ - Time Period: {time} years
163
+ - Compounding: {'Monthly' if n == 12 else 'Annually'}
164
+ - Final Amount: ${result:,.2f}
165
+ - Interest Earned: ${result - principal:,.2f}
166
+ """
167
+
168
+ return "Unable to parse financial calculation. Please use format like: compound_interest(1000, 0.05, 3)"
169
+
170
+ except Exception as e:
171
+ return f"Financial calculation error: {str(e)}"
172
+
173
+ class WebSearchTool(GeminiTool):
174
+ """Web search tool using DuckDuckGo"""
175
+
176
+ def __init__(self):
177
+ super().__init__(
178
+ name="web_search",
179
+ description="""
180
+ Searches the web for current information using DuckDuckGo.
181
+ Returns relevant, up-to-date search results with summaries.
182
+
183
+ Best for:
184
+ - Current events and news
185
+ - Recent statistics and data
186
+ - Current prices, populations, etc.
187
+ - Latest information on any topic
188
+
189
+ Example: "current population of Tokyo 2024"
190
+ """
191
+ )
192
+ self.ddgs = DDGS()
193
+
194
+ def execute(self, query: str) -> str:
195
+ try:
196
+ # Perform web search
197
+ results = list(self.ddgs.text(query, max_results=5))
198
+
199
+ if not results:
200
+ return f"No search results found for: {query}"
201
+
202
+ formatted_results = f"Web search results for '{query}':\n\n"
203
+
204
+ for i, result in enumerate(results, 1):
205
+ title = result.get('title', 'No title')
206
+ snippet = result.get('body', 'No description')
207
+ url = result.get('href', 'No URL')
208
+
209
+ formatted_results += f"{i}. **{title}**\n"
210
+ formatted_results += f" {snippet[:200]}...\n"
211
+ formatted_results += f" Source: {url}\n\n"
212
+
213
+ return formatted_results
214
+
215
+ except Exception as e:
216
+ return f"Web search error: {str(e)}. Unable to perform search at this time."
217
+
218
+ class FileAnalyzerTool(GeminiTool):
219
+ """Tool for analyzing various file types"""
220
+
221
+ def __init__(self):
222
+ super().__init__(
223
+ name="file_analyzer",
224
+ description="""
225
+ Analyzes various file types including:
226
+ - Text files (.txt, .md, .json, .csv)
227
+ - Data files (CSV, Excel, JSON)
228
+ - Image files (PNG, JPG, GIF, etc.)
229
+ - Documents and structured data
230
+
231
+ Provides summaries, statistics, and insights from file contents.
232
+ """
233
+ )
234
+
235
+ def execute(self, file_path: str) -> str:
236
+ try:
237
+ if not os.path.exists(file_path):
238
+ return f"File not found: {file_path}"
239
+
240
+ file_extension = Path(file_path).suffix.lower()
241
+
242
+ if file_extension in ['.txt', '.md', '.py', '.js', '.html', '.css']:
243
+ return self._analyze_text_file(file_path)
244
+ elif file_extension == '.json':
245
+ return self._analyze_json_file(file_path)
246
+ elif file_extension == '.csv':
247
+ return self._analyze_csv_file(file_path)
248
+ elif file_extension in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']:
249
+ return self._analyze_image_file(file_path)
250
+ else:
251
+ return f"Unsupported file type: {file_extension}"
252
+
253
+ except Exception as e:
254
+ return f"Error analyzing file: {str(e)}"
255
+
256
+ def _analyze_text_file(self, file_path: str) -> str:
257
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
258
+ content = f.read()
259
+
260
+ lines = content.split('\n')
261
+ words = content.split()
262
+ chars = len(content)
263
+
264
+ # Basic text statistics
265
+ avg_line_length = sum(len(line) for line in lines) / len(lines) if lines else 0
266
+ avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
267
+
268
+ preview = content[:500] + ('...' if len(content) > 500 else '')
269
+
270
+ return f"""
271
+ 📄 Text File Analysis:
272
+ - File: {Path(file_path).name}
273
+ - Lines: {len(lines):,}
274
+ - Words: {len(words):,}
275
+ - Characters: {chars:,}
276
+ - Average line length: {avg_line_length:.1f} characters
277
+ - Average word length: {avg_word_length:.1f} characters
278
+
279
+ 📝 Content Preview:
280
+ {preview}
281
+ """
282
+
283
+ def _analyze_json_file(self, file_path: str) -> str:
284
+ with open(file_path, 'r', encoding='utf-8') as f:
285
+ data = json.load(f)
286
+
287
+ data_type = type(data).__name__
288
+
289
+ if isinstance(data, dict):
290
+ keys_info = f"Keys ({len(data)}): {list(data.keys())[:10]}"
291
+ if len(data) > 10:
292
+ keys_info += "..."
293
+ elif isinstance(data, list):
294
+ keys_info = f"List with {len(data)} items"
295
+ else:
296
+ keys_info = f"Single {data_type} value"
297
+
298
+ preview = json.dumps(data, indent=2)[:500]
299
+ if len(str(data)) > 500:
300
+ preview += "..."
301
+
302
+ return f"""
303
+ 🔧 JSON File Analysis:
304
+ - File: {Path(file_path).name}
305
+ - Data type: {data_type}
306
+ - {keys_info}
307
+ - File size: {os.path.getsize(file_path):,} bytes
308
+
309
+ 📊 Content Preview:
310
+ {preview}
311
+ """
312
+
313
+ def _analyze_csv_file(self, file_path: str) -> str:
314
+ try:
315
+ df = pd.read_csv(file_path)
316
+
317
+ # Basic statistics
318
+ rows, cols = df.shape
319
+ numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
320
+ text_cols = df.select_dtypes(include=['object']).columns.tolist()
321
+ missing_data = df.isnull().sum()
322
+
323
+ # Summary statistics for numeric columns
324
+ numeric_summary = ""
325
+ if numeric_cols:
326
+ numeric_summary = "\n📊 Numeric Columns Summary:\n"
327
+ for col in numeric_cols[:5]: # Show first 5 numeric columns
328
+ col_data = df[col]
329
+ numeric_summary += f" {col}: mean={col_data.mean():.2f}, std={col_data.std():.2f}, min={col_data.min()}, max={col_data.max()}\n"
330
+
331
+ preview = df.head(3).to_string(max_cols=6)
332
+
333
+ return f"""
334
+ 📊 CSV File Analysis:
335
+ - File: {Path(file_path).name}
336
+ - Dimensions: {rows:,} rows × {cols} columns
337
+ - Numeric columns: {len(numeric_cols)} ({numeric_cols[:5]})
338
+ - Text columns: {len(text_cols)} ({text_cols[:5]})
339
+ - Missing values: {missing_data.sum()} total
340
+ - File size: {os.path.getsize(file_path):,} bytes
341
+
342
+ {numeric_summary}
343
+
344
+ 📋 Data Preview (first 3 rows):
345
+ {preview}
346
+ """
347
+ except Exception as e:
348
+ return f"Error analyzing CSV file: {str(e)}"
349
+
350
+ def _analyze_image_file(self, file_path: str) -> str:
351
+ try:
352
+ with Image.open(file_path) as img:
353
+ width, height = img.size
354
+ mode = img.mode
355
+ format_name = img.format
356
+ file_size = os.path.getsize(file_path)
357
+
358
+ # Calculate aspect ratio
359
+ aspect_ratio = width / height
360
+
361
+ # Determine image orientation
362
+ orientation = "Square" if abs(aspect_ratio - 1) < 0.1 else ("Landscape" if aspect_ratio > 1 else "Portrait")
363
+
364
+ return f"""
365
+ 🖼️ Image File Analysis:
366
+ - File: {Path(file_path).name}
367
+ - Format: {format_name}
368
+ - Dimensions: {width} × {height} pixels
369
+ - Color mode: {mode}
370
+ - Aspect ratio: {aspect_ratio:.2f} ({orientation})
371
+ - File size: {file_size:,} bytes ({file_size/1024:.1f} KB)
372
+
373
+ Note: For detailed image content analysis, the image will be processed by Gemini's vision capabilities.
374
+ """
375
+ except Exception as e:
376
+ return f"Error analyzing image: {str(e)}"
377
+
378
+ class GeminiGAIAAgent:
379
+ """
380
+ Advanced GAIA benchmark agent using Google Gemini
381
+ Optimized for multimodal understanding and complex reasoning
382
+ """
383
+
384
+ def __init__(self,
385
+ model_name: str = "gemini-2.5-flash",
386
+ api_key: Optional[str] = None,
387
+ temperature: float = 0.1,
388
+ max_tokens: int = 2048,
389
+ verbose: bool = True):
390
+
391
+ self.model_name = model_name
392
+ self.temperature = temperature
393
+ self.max_tokens = max_tokens
394
+ self.verbose = verbose
395
+
396
+ # Configure Gemini API
397
+ self._configure_gemini(api_key)
398
+
399
+ # Initialize model
400
+ self.model = self._initialize_model()
401
+
402
+ # Initialize tools
403
+ self.tools = self._initialize_tools()
404
+
405
+ # Conversation history
406
+ self.conversation_history = []
407
+
408
+ logger.info(f"Gemini GAIA Agent initialized with model: {model_name}")
409
+
410
+ def _configure_gemini(self, api_key: Optional[str]):
411
+ """Configure Gemini API"""
412
+ if api_key:
413
+ genai.configure(api_key=api_key)
414
+ elif os.getenv("GOOGLE_API_KEY"):
415
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
416
+ else:
417
+ logger.warning("No Google API key provided. Please set GOOGLE_API_KEY environment variable or pass api_key parameter.")
418
+
419
+ def _initialize_model(self):
420
+ """Initialize the Gemini model"""
421
+ try:
422
+ # Configure safety settings for more permissive responses
423
+ safety_settings = {
424
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
425
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
426
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
427
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
428
+ }
429
+
430
+ # Generation configuration
431
+ generation_config = genai.types.GenerationConfig(
432
+ temperature=self.temperature,
433
+ max_output_tokens=self.max_tokens,
434
+ top_p=0.8,
435
+ top_k=40
436
+ )
437
+
438
+ model = genai.GenerativeModel(
439
+ model_name=self.model_name,
440
+ generation_config=generation_config,
441
+ safety_settings=safety_settings
442
+ )
443
+
444
+ return model
445
+
446
+ except Exception as e:
447
+ logger.error(f"Failed to initialize Gemini model: {str(e)}")
448
+ return None
449
+
450
+ def _initialize_tools(self) -> Dict[str, GeminiTool]:
451
+ """Initialize all available tools"""
452
+ tools = {
453
+ "calculator": CalculatorTool(),
454
+ "web_search": WebSearchTool(),
455
+ "file_analyzer": FileAnalyzerTool(),
456
+ }
457
+
458
+ return tools
459
+
460
+ def _create_system_prompt(self) -> str:
461
+ """Create the system prompt for the agent"""
462
+ current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
463
+
464
+ return f"""You are an advanced AI assistant designed to solve GAIA benchmark questions with exceptional accuracy and reasoning.
465
+
466
+ GAIA (General AI Assistants) benchmark tests your ability to:
467
+ 1. 🧠 **Complex Reasoning**: Multi-step problem solving and logical inference
468
+ 2. 🔧 **Tool Usage**: Effective use of calculators, web search, and file analysis
469
+ 3. 🖼️ **Multimodal Understanding**: Processing text, images, data files, and documents
470
+ 4. 🎯 **Accuracy**: Providing precise, well-researched answers
471
+
472
+ AVAILABLE TOOLS:
473
+ - **calculator**: Advanced mathematical operations, financial calculations, statistics
474
+ - **web_search**: Current information from the web using DuckDuckGo
475
+ - **file_analyzer**: Analysis of text files, CSV data, JSON, and images
476
+
477
+ INSTRUCTIONS:
478
+ 1. **Think Step-by-Step**: Break down complex problems into logical steps
479
+ 2. **Use Tools Strategically**: Choose the right tools for each task
480
+ 3. **Verify Information**: Double-check calculations and search for current data when needed
481
+ 4. **Be Precise**: Provide exact, accurate answers with proper reasoning
482
+ 5. **Show Your Work**: Explain your thought process clearly
483
+ 6. **Handle Files**: Analyze uploaded files as part of your solution process
484
+
485
+ RESPONSE FORMAT:
486
+ When using tools, clearly indicate:
487
+ - Which tool you're using and why
488
+ - The input you're providing to the tool
489
+ - How the tool's output contributes to your final answer
490
+
491
+ Current Date/Time (UTC): {current_time}
492
+ User: AdilzhanB
493
+
494
+ Remember: Your goal is to provide the most accurate and well-reasoned answer possible for each GAIA question."""
495
+
496
+ def _identify_required_tools(self, question: str, file_path: Optional[str] = None) -> List[str]:
497
+ """Identify which tools might be needed for a question"""
498
+ required_tools = []
499
+ question_lower = question.lower()
500
+
501
+ # Mathematical operations
502
+ math_keywords = ['calculate', 'compute', 'math', 'formula', 'equation',
503
+ 'interest', 'percentage', 'average', 'sum', 'multiply',
504
+ 'divide', 'square root', 'logarithm', 'statistics']
505
+ if any(keyword in question_lower for keyword in math_keywords):
506
+ required_tools.append('calculator')
507
+
508
+ # Current/recent information
509
+ current_keywords = ['current', 'latest', 'recent', 'today', '2024', '2025',
510
+ 'now', 'present', 'up-to-date', 'newest']
511
+ search_keywords = ['population', 'price', 'news', 'event', 'happening']
512
+ if any(keyword in question_lower for keyword in current_keywords + search_keywords):
513
+ required_tools.append('web_search')
514
+
515
+ # File analysis
516
+ if file_path or any(keyword in question_lower for keyword in
517
+ ['file', 'document', 'image', 'data', 'csv', 'analyze', 'uploaded']):
518
+ required_tools.append('file_analyzer')
519
+
520
+ return required_tools
521
+
522
+ def _use_tool(self, tool_name: str, input_data: str) -> str:
523
+ """Execute a specific tool with given input"""
524
+ if tool_name not in self.tools:
525
+ return f"Tool '{tool_name}' not available."
526
+
527
+ try:
528
+ result = self.tools[tool_name].execute(input_data)
529
+ return result
530
+ except Exception as e:
531
+ return f"Error using {tool_name}: {str(e)}"
532
+
533
+ def _process_image_for_gemini(self, file_path: str) -> Optional[dict]:
534
+ """Process image file for Gemini's multimodal capabilities"""
535
+ try:
536
+ with open(file_path, 'rb') as f:
537
+ image_data = f.read()
538
+
539
+ # Convert to format Gemini expects
540
+ import mimetypes
541
+ mime_type, _ = mimetypes.guess_type(file_path)
542
+
543
+ return {
544
+ 'mime_type': mime_type or 'image/jpeg',
545
+ 'data': image_data
546
+ }
547
+ except Exception as e:
548
+ logger.error(f"Error processing image: {str(e)}")
549
+ return None
550
+
551
+ def solve_gaia_question(self, gaia_question: GAIAQuestion) -> Dict[str, Any]:
552
+ """
553
+ Main method to solve a GAIA benchmark question
554
+ """
555
+ start_time = datetime.utcnow()
556
+ logger.info(f"Solving GAIA Question {gaia_question.question_id} (Level {gaia_question.level})")
557
+
558
+ if not self.model:
559
+ return {
560
+ "question_id": gaia_question.question_id,
561
+ "error": "Model not initialized. Please check your Google API key.",
562
+ "timestamp": start_time.isoformat()
563
+ }
564
+
565
+ try:
566
+ # Step 1: Analyze question and identify required tools
567
+ required_tools = self._identify_required_tools(gaia_question.question, gaia_question.file_path)
568
+
569
+ # Step 2: Gather context from tools
570
+ tool_results = {}
571
+ reasoning_steps = []
572
+
573
+ # File analysis first (if applicable)
574
+ if gaia_question.file_path and os.path.exists(gaia_question.file_path):
575
+ reasoning_steps.append(f"📎 Analyzing uploaded file: {gaia_question.file_name}")
576
+ file_analysis = self._use_tool("file_analyzer", gaia_question.file_path)
577
+ tool_results["file_analyzer"] = file_analysis
578
+ reasoning_steps.append(f"✅ File analysis completed")
579
+
580
+ # Use other tools as needed
581
+ for tool_name in required_tools:
582
+ if tool_name != "file_analyzer": # Already handled above
583
+ reasoning_steps.append(f"🔧 Using {tool_name} tool")
584
+
585
+ if tool_name == "web_search":
586
+ # Extract search query from question
587
+ search_query = gaia_question.question
588
+ tool_result = self._use_tool(tool_name, search_query)
589
+ elif tool_name == "calculator":
590
+ # For now, we'll let Gemini decide what to calculate
591
+ tool_result = "Calculator tool available for mathematical operations"
592
+ else:
593
+ tool_result = self._use_tool(tool_name, gaia_question.question)
594
+
595
+ tool_results[tool_name] = tool_result
596
+ reasoning_steps.append(f"✅ {tool_name} completed")
597
+
598
+ # Step 3: Prepare content for Gemini
599
+ content_parts = []
600
+
601
+ # System prompt and question
602
+ prompt = f"""{self._create_system_prompt()}
603
+
604
+ GAIA BENCHMARK QUESTION (Level {gaia_question.level}):
605
+ Question ID: {gaia_question.question_id}
606
+ Question: {gaia_question.question}
607
+
608
+ AVAILABLE TOOL RESULTS:
609
+ {json.dumps(tool_results, indent=2) if tool_results else "No tools used yet."}
610
+
611
+ TASK:
612
+ Solve this GAIA question step by step. You may request specific tool usage if needed by clearly stating:
613
+ "USE_TOOL: [tool_name] with input: [input_data]"
614
+
615
+ Provide your complete reasoning and final answer."""
616
+
617
+ content_parts.append(prompt)
618
+
619
+ # Add image if it's an image file
620
+ if (gaia_question.file_path and
621
+ Path(gaia_question.file_path).suffix.lower() in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']):
622
+
623
+ image_data = self._process_image_for_gemini(gaia_question.file_path)
624
+ if image_data:
625
+ content_parts.append(image_data)
626
+ reasoning_steps.append("🖼️ Image included for visual analysis")
627
+
628
+ # Step 4: Generate response with Gemini
629
+ reasoning_steps.append("🤖 Generating response with Gemini...")
630
+
631
+ response = self.model.generate_content(content_parts)
632
+
633
+ if not response or not response.text:
634
+ raise Exception("Empty response from Gemini model")
635
+
636
+ agent_response = response.text
637
+ reasoning_steps.append("✅ Response generated successfully")
638
+
639
+ # Step 5: Process any additional tool requests
640
+ if "USE_TOOL:" in agent_response:
641
+ reasoning_steps.append("🔧 Processing additional tool requests...")
642
+ agent_response = self._process_tool_requests(agent_response, reasoning_steps)
643
+
644
+ # Step 6: Calculate confidence and metrics
645
+ confidence_score = self._calculate_confidence(agent_response, tool_results)
646
+ end_time = datetime.utcnow()
647
+ processing_time = (end_time - start_time).total_seconds()
648
+
649
+ # Step 7: Prepare final result
650
+ result = {
651
+ "question_id": gaia_question.question_id,
652
+ "question": gaia_question.question,
653
+ "level": gaia_question.level,
654
+ "agent_response": agent_response,
655
+ "reasoning_steps": reasoning_steps,
656
+ "tools_used": list(tool_results.keys()),
657
+ "tool_results": tool_results,
658
+ "confidence_score": confidence_score,
659
+ "processing_time_seconds": processing_time,
660
+ "timestamp": end_time.isoformat(),
661
+ "model_used": self.model_name,
662
+ "agent_version": "1.0-gemini"
663
+ }
664
+
665
+ # Add to conversation history
666
+ self.conversation_history.append(result)
667
+
668
+ logger.info(f"Question {gaia_question.question_id} solved successfully in {processing_time:.2f}s")
669
+ return result
670
+
671
+ except Exception as e:
672
+ error_msg = f"Error solving question: {str(e)}"
673
+ logger.error(error_msg)
674
+
675
+ return {
676
+ "question_id": gaia_question.question_id,
677
+ "question": gaia_question.question,
678
+ "level": gaia_question.level,
679
+ "agent_response": f"Error: {error_msg}",
680
+ "error": True,
681
+ "timestamp": datetime.utcnow().isoformat(),
682
+ "model_used": self.model_name
683
+ }
684
+
685
+ def _process_tool_requests(self, response: str, reasoning_steps: List[str]) -> str:
686
+ """Process tool usage requests from Gemini's response"""
687
+ lines = response.split('\n')
688
+ processed_response = []
689
+
690
+ for line in lines:
691
+ if line.strip().startswith("USE_TOOL:"):
692
+ try:
693
+ # Parse tool request: "USE_TOOL: calculator with input: 2+2"
694
+ parts = line.split("USE_TOOL:")[1].strip()
695
+ tool_name = parts.split("with input:")[0].strip()
696
+ tool_input = parts.split("with input:")[1].strip()
697
+
698
+ reasoning_steps.append(f"🔧 Executing {tool_name} with input: {tool_input}")
699
+
700
+ # Execute the tool
701
+ tool_result = self._use_tool(tool_name, tool_input)
702
+
703
+ # Replace the tool request with the result
704
+ processed_response.append(f"Tool Result ({tool_name}): {tool_result}")
705
+ reasoning_steps.append(f"✅ {tool_name} executed successfully")
706
+
707
+ except Exception as e:
708
+ processed_response.append(f"Tool Error: {str(e)}")
709
+ reasoning_steps.append(f"❌ Tool execution failed: {str(e)}")
710
+ else:
711
+ processed_response.append(line)
712
+
713
+ return '\n'.join(processed_response)
714
+
715
+ def _calculate_confidence(self, response: str, tool_results: Dict) -> float:
716
+ """Calculate confidence score based on various factors"""
717
+ confidence = 0.5 # Base confidence
718
+
719
+ # Increase confidence for detailed responses
720
+ if len(response) > 200:
721
+ confidence += 0.1
722
+
723
+ # Increase confidence for tool usage
724
+ if tool_results:
725
+ confidence += 0.2
726
+
727
+ # Increase confidence for structured responses
728
+ if any(marker in response for marker in ['Step', 'Analysis:', 'Result:', 'Conclusion:']):
729
+ confidence += 0.1
730
+
731
+ # Decrease confidence for uncertainty indicators
732
+ uncertainty_words = ['uncertain', 'unclear', 'might', 'possibly', 'approximately', 'estimate']
733
+ if any(word in response.lower() for word in uncertainty_words):
734
+ confidence -= 0.1
735
+
736
+ # Increase confidence for numerical precision
737
+ if any(char.isdigit() for char in response):
738
+ confidence += 0.1
739
+
740
+ return max(0.0, min(1.0, confidence))
741
+
742
+ def get_available_tools(self) -> List[str]:
743
+ """Get list of available tool names"""
744
+ return list(self.tools.keys())
745
+
746
+ def test_tools(self) -> Dict[str, str]:
747
+ """Test all tools to ensure they're working"""
748
+ test_results = {}
749
+
750
+ for tool_name, tool in self.tools.items():
751
+ try:
752
+ if tool_name == "calculator":
753
+ result = tool.execute("sqrt(16)")
754
+ elif tool_name == "web_search":
755
+ result = tool.execute("test search query")
756
+ elif tool_name == "file_analyzer":
757
+ # Create a temporary test file
758
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
759
+ f.write("Test file content")
760
+ temp_path = f.name
761
+
762
+ result = tool.execute(temp_path)
763
+ os.unlink(temp_path) # Clean up
764
+ else:
765
+ result = "Tool available"
766
+
767
+ test_results[tool_name] = f"✅ Working: {result[:100]}..."
768
+
769
+ except Exception as e:
770
+ test_results[tool_name] = f"❌ Error: {str(e)}"
771
+
772
+ return test_results
773
+
774
+ def get_conversation_history(self, limit: int = 5) -> List[Dict]:
775
+ """Get recent conversation history"""
776
+ return self.conversation_history[-limit:] if self.conversation_history else []
777
+
778
+ # Example usage and testing
779
+ if __name__ == "__main__":
780
+ import sys
781
+
782
+ # Check for API key
783
+ if not os.getenv("GOOGLE_API_KEY"):
784
+ print("⚠️ Please set your GOOGLE_API_KEY environment variable")
785
+ print("You can get one from: https://makersuite.google.com/app/apikey")
786
+ sys.exit(1)
787
+
788
+ # Initialize agent
789
+ print("🚀 Initializing Gemini GAIA Agent...")
790
+ agent = GeminiGAIAAgent(verbose=True)
791
+
792
+ # Test tools
793
+ print("\n🔧 Testing tools...")
794
+ tool_results = agent.test_tools()
795
+ for tool, result in tool_results.items():
796
+ print(f" {tool}: {result}")
797
+
798
+ # Test with sample questions
799
+ sample_questions = [
800
+ GAIAQuestion(
801
+ question_id="test_001",
802
+ question="What is the square root of 144?",
803
+ level=1
804
+ ),
805
+ GAIAQuestion(
806
+ question_id="test_002",
807
+ question="If I invest $1000 at 5% annual compound interest, how much will I have after 3 years?",
808
+ level=2
809
+ ),
810
+ GAIAQuestion(
811
+ question_id="test_003",
812
+ question="What is the current population of Tokyo according to the latest data?",
813
+ level=2
814
+ )
815
+ ]
816
+
817
+ print("\n📝 Testing sample questions...")
818
+ for question in sample_questions:
819
+ print(f"\n{'='*60}")
820
+ result = agent.solve_gaia_question(question)
821
+
822
+ print(f"Question: {result['question']}")
823
+ print(f"Level: {result['level']}")
824
+ print(f"Tools Used: {result.get('tools_used', [])}")
825
+ print(f"Confidence: {result.get('confidence_score', 0):.2f}")
826
+ print(f"Answer: {result['agent_response'][:300]}...")
827
+
828
+ if result.get('error'):
829
+ print(f"❌ Error occurred: {result.get('agent_response')}")
app.py ADDED
@@ -0,0 +1,603 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ import logging
5
+ from typing import Dict, List, Any, Optional, Tuple
6
+ from datetime import datetime
7
+ import asyncio
8
+
9
+ import gradio as gr
10
+ import pandas as pd
11
+ from agent import GeminiGAIAAgent, GAIAQuestion
12
+
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class GeminiGAIAApp:
18
+ """
19
+ Gradio application for Gemini-powered GAIA Benchmark Agent
20
+ Hugging Face Agents Course - Unit 4 Final Assignment
21
+ """
22
+
23
+ def __init__(self):
24
+ self.agent = None
25
+ self.conversation_history = []
26
+ self.current_question_id = 0
27
+
28
+ # Agent metadata
29
+ self.agent_info = {
30
+ "name": "Gemini GAIA Benchmark Agent",
31
+ "author": "AdilzhanB",
32
+ "course": "Hugging Face Agents Course - Unit 4",
33
+ "model": "Google Gemini 2.5 Flash",
34
+ "version": "1.0",
35
+ "created": "2025-06-17 15:32:22",
36
+ "capabilities": [
37
+ "Complex multi-step reasoning",
38
+ "Advanced mathematical calculations",
39
+ "Real-time web search",
40
+ "Multimodal file analysis",
41
+ "Natural language understanding"
42
+ ]
43
+ }
44
+
45
+ # Huggingface repository link
46
+ self.agent_code_link = "https://huggingface.co/spaces/AdilzhanB/Gemini-GAIA-Agent"
47
+
48
+ def _initialize_agent(self, api_key: Optional[str] = None):
49
+ """Initialize the Gemini GAIA agent"""
50
+ try:
51
+ self.agent = GeminiGAIAAgent(
52
+ model_name="gemini-1.5-pro",
53
+ api_key=api_key,
54
+ temperature=0.1,
55
+ verbose=False
56
+ )
57
+ logger.info("Gemini agent initialized successfully")
58
+ return "✅ Agent initialized successfully!"
59
+ except Exception as e:
60
+ error_msg = f"Failed to initialize agent: {str(e)}"
61
+ logger.error(error_msg)
62
+ self.agent = None
63
+ return f"❌ {error_msg}"
64
+
65
+ def solve_question(self,
66
+ question_text: str,
67
+ difficulty_level: int,
68
+ uploaded_file,
69
+ api_key: Optional[str] = None) -> Tuple[str, str, str, str, str, str]:
70
+ """
71
+ Main function to solve GAIA questions
72
+
73
+ Returns: (reasoning, tools_used, confidence, processing_time, final_answer, status)
74
+ """
75
+ try:
76
+ # Initialize agent if needed or API key changed
77
+ if not self.agent or (api_key and api_key.strip()):
78
+ init_status = self._initialize_agent(api_key.strip() if api_key else None)
79
+ if "❌" in init_status:
80
+ return "", "", "", "", "", init_status
81
+
82
+ if not self.agent:
83
+ return "", "", "", "", "", "❌ Agent not initialized. Please provide a valid Google API key."
84
+
85
+ if not question_text.strip():
86
+ return "", "", "", "", "", "❌ Please enter a question."
87
+
88
+ # Handle file upload
89
+ file_path = None
90
+ file_name = None
91
+ if uploaded_file is not None:
92
+ file_path = uploaded_file.name
93
+ file_name = os.path.basename(file_path)
94
+
95
+ # Create GAIA question
96
+ self.current_question_id += 1
97
+ gaia_question = GAIAQuestion(
98
+ question_id=f"user_question_{self.current_question_id}",
99
+ question=question_text,
100
+ level=difficulty_level,
101
+ file_path=file_path,
102
+ file_name=file_name
103
+ )
104
+
105
+ # Solve the question
106
+ logger.info(f"Solving question: {question_text[:50]}...")
107
+ result = self.agent.solve_gaia_question(gaia_question)
108
+
109
+ # Store in conversation history
110
+ self.conversation_history.append({
111
+ "timestamp": datetime.now().isoformat(),
112
+ "question": question_text,
113
+ "result": result
114
+ })
115
+
116
+ # Extract results
117
+ if result.get("error"):
118
+ return "", "", "", "", "", f"❌ Error: {result.get('agent_response', 'Unknown error')}"
119
+
120
+ # Format reasoning steps
121
+ reasoning_steps = "\n".join([
122
+ f"{i+1}. {step}" for i, step in enumerate(result.get("reasoning_steps", []))
123
+ ])
124
+ if not reasoning_steps:
125
+ reasoning_steps = "Gemini processed the question using its internal reasoning."
126
+
127
+ # Format tools used
128
+ tools_used = ", ".join(result.get("tools_used", ["None"]))
129
+ if not tools_used or tools_used == "None":
130
+ tools_used = "Gemini's built-in capabilities"
131
+
132
+ # Get other metrics
133
+ confidence = f"{result.get('confidence_score', 0.0):.2f}"
134
+ processing_time = f"{result.get('processing_time_seconds', 0):.2f}s"
135
+ final_answer = result.get("agent_response", "No answer generated")
136
+
137
+ # Success status
138
+ status = f"✅ Question solved successfully! (Model: {result.get('model_used', 'Gemini')})"
139
+
140
+ logger.info(f"Question solved successfully. Tools: {tools_used}, Confidence: {confidence}")
141
+
142
+ return (
143
+ reasoning_steps,
144
+ tools_used,
145
+ confidence,
146
+ processing_time,
147
+ final_answer,
148
+ status
149
+ )
150
+
151
+ except Exception as e:
152
+ error_msg = f"❌ Error solving question: {str(e)}"
153
+ logger.error(error_msg)
154
+ return "", "", "", "", "", error_msg
155
+
156
+ def get_conversation_history(self) -> str:
157
+ """Get formatted conversation history"""
158
+ if not self.conversation_history:
159
+ return "No questions solved yet. Try asking a GAIA-style question!"
160
+
161
+ history_text = "## 📚 Recent Conversation History\n\n"
162
+
163
+ for i, entry in enumerate(self.conversation_history[-5:], 1): # Show last 5
164
+ result = entry['result']
165
+
166
+ history_text += f"### Question {i}\n"
167
+ history_text += f"**Asked:** {entry['question'][:150]}...\n"
168
+ history_text += f"**Level:** {result.get('level', 'N/A')}\n"
169
+ history_text += f"**Tools Used:** {', '.join(result.get('tools_used', ['None']))}\n"
170
+ history_text += f"**Confidence:** {result.get('confidence_score', 0):.2f}\n"
171
+ history_text += f"**Answer Preview:** {result.get('agent_response', 'No answer')[:200]}...\n"
172
+ history_text += f"**Time:** {entry['timestamp'][:19]}\n\n"
173
+ history_text += "---\n\n"
174
+
175
+ return history_text
176
+
177
+ def clear_history(self) -> str:
178
+ """Clear conversation history"""
179
+ self.conversation_history = []
180
+ self.current_question_id = 0
181
+ return "🗑️ History cleared successfully!"
182
+
183
+ def test_agent_capabilities(self, api_key: Optional[str] = None) -> str:
184
+ """Test agent and tool capabilities"""
185
+ try:
186
+ # Initialize agent if needed
187
+ if not self.agent or (api_key and api_key.strip()):
188
+ init_status = self._initialize_agent(api_key.strip() if api_key else None)
189
+ if "❌" in init_status:
190
+ return init_status
191
+
192
+ if not self.agent:
193
+ return "❌ Agent not initialized. Please provide a valid Google API key."
194
+
195
+ # Test tools
196
+ tool_results = self.agent.test_tools()
197
+
198
+ result_text = "## 🔧 Agent Capability Test Results\n\n"
199
+ result_text += f"**Model:** {self.agent.model_name}\n"
200
+ result_text += f"**Status:** {'✅ Initialized' if self.agent.model else '❌ Not initialized'}\n\n"
201
+
202
+ result_text += "### Tool Test Results\n"
203
+
204
+ for tool_name, result in tool_results.items():
205
+ status_icon = "✅" if "✅" in result else "❌"
206
+ result_text += f"{status_icon} **{tool_name.title()}**: {result}\n"
207
+
208
+ result_text += "\n### Available Capabilities\n"
209
+ for capability in self.agent_info["capabilities"]:
210
+ result_text += f"- ✅ {capability}\n"
211
+
212
+ return result_text
213
+
214
+ except Exception as e:
215
+ return f"❌ Error testing agent: {str(e)}"
216
+
217
+ def get_example_question(self, level: int, example_type: str) -> Tuple[str, int]:
218
+ """Get example questions based on level and type"""
219
+ examples = {
220
+ 1: {
221
+ "math": "What is the square root of 144?",
222
+ "factual": "What is the capital of Japan?",
223
+ "conversion": "Convert 100 degrees Fahrenheit to Celsius"
224
+ },
225
+ 2: {
226
+ "financial": "If I invest $1000 at 5% annual compound interest, how much will I have after 3 years?",
227
+ "current": "What is the current population of Tokyo according to the latest data?",
228
+ "analysis": "Calculate the average temperature if the daily temperatures were 72°F, 75°F, 68°F, and 71°F"
229
+ },
230
+ 3: {
231
+ "complex": "Based on current economic indicators, what are the main recession risks for 2024?",
232
+ "research": "Compare the GDP growth rates of the top 5 economies in 2023 and identify key trends",
233
+ "multimodal": "Analyze any uploaded data file and provide insights about patterns and trends"
234
+ }
235
+ }
236
+
237
+ question = examples.get(level, {}).get(example_type, "What is 2 + 2?")
238
+ return question, level
239
+
240
+ def create_interface(self):
241
+ """Create the comprehensive Gradio interface"""
242
+
243
+ # Custom CSS for professional styling
244
+ custom_css = """
245
+ .gradio-container {
246
+ max-width: 1400px !important;
247
+ margin: 0 auto;
248
+ }
249
+ .main-header {
250
+ text-align: center;
251
+ background: linear-gradient(90deg, #4285f4, #34a853, #fbbc05, #ea4335);
252
+ -webkit-background-clip: text;
253
+ -webkit-text-fill-color: transparent;
254
+ background-clip: text;
255
+ margin-bottom: 20px;
256
+ }
257
+ .info-box {
258
+ background-color: #f8f9fa;
259
+ border-left: 4px solid #4285f4;
260
+ padding: 15px;
261
+ margin: 10px 0;
262
+ border-radius: 5px;
263
+ }
264
+ """
265
+
266
+ with gr.Blocks(css=custom_css, title="Gemini GAIA Agent", theme=gr.themes.Soft()) as interface:
267
+
268
+ # Main Header
269
+ gr.HTML("""
270
+ <div class="main-header">
271
+ <h1>🚀 Gemini GAIA Benchmark Agent</h1>
272
+ </div>
273
+ """)
274
+
275
+ # Agent Information
276
+ with gr.Row():
277
+ gr.Markdown(f"""
278
+ <div class="info-box">
279
+ <h3>🤖 Agent Information</h3>
280
+ <ul>
281
+ <li><strong>Created by:</strong> {self.agent_info['author']}</li>
282
+ <li><strong>Course:</strong> {self.agent_info['course']}</li>
283
+ <li><strong>Model:</strong> {self.agent_info['model']}</li>
284
+ <li><strong>Version:</strong> {self.agent_info['version']}</li>
285
+ <li><strong>Date:</strong> {self.agent_info['created']}</li>
286
+ </ul>
287
+ </div>
288
+ """)
289
+
290
+ # API Key Configuration
291
+ with gr.Row():
292
+ with gr.Column():
293
+ api_key_input = gr.Textbox(
294
+ label="🔑 Google API Key (Required)",
295
+ placeholder="Enter your Google AI API key here...",
296
+ type="password",
297
+ info="Get your free API key from: https://makersuite.google.com/app/apikey"
298
+ )
299
+ test_agent_btn = gr.Button("🧪 Test Agent & Tools", variant="secondary")
300
+
301
+ # Main Question Interface
302
+ gr.Markdown("## 💭 Ask Your GAIA Question")
303
+
304
+ with gr.Row():
305
+ # Left Panel - Input
306
+ with gr.Column(scale=2):
307
+ question_input = gr.Textbox(
308
+ label="📝 Your Question",
309
+ placeholder="Enter your GAIA-style question here...\n\nExamples:\n- What is the compound interest on $1000 at 5% for 3 years?\n- What is the current population of Tokyo?\n- Analyze the uploaded CSV data and find patterns",
310
+ lines=4,
311
+ max_lines=8
312
+ )
313
+
314
+ with gr.Row():
315
+ difficulty_slider = gr.Slider(
316
+ label="🎯 Difficulty Level",
317
+ minimum=1,
318
+ maximum=3,
319
+ value=2,
320
+ step=1,
321
+ info="1=Basic | 2=Intermediate | 3=Advanced"
322
+ )
323
+
324
+ file_upload = gr.File(
325
+ label="📎 Upload File (Optional)",
326
+ file_types=[".txt", ".csv", ".json", ".xlsx", ".png", ".jpg", ".jpeg", ".gif", ".pdf"],
327
+ info="Support: Text, Data, Images"
328
+ )
329
+
330
+ solve_button = gr.Button(
331
+ "🚀 Solve with Gemini",
332
+ variant="primary",
333
+ size="lg",
334
+ scale=2
335
+ )
336
+
337
+ # Right Panel - Quick Examples
338
+ with gr.Column(scale=1):
339
+ gr.Markdown("### 📚 Quick Examples")
340
+
341
+ # Level 1 Examples
342
+ gr.Markdown("**Level 1 (Basic)**")
343
+ with gr.Row():
344
+ math_btn = gr.Button("🧮 Math", size="sm")
345
+ factual_btn = gr.Button("🌍 Factual", size="sm")
346
+ convert_btn = gr.Button("🔄 Convert", size="sm")
347
+
348
+ # Level 2 Examples
349
+ gr.Markdown("**Level 2 (Intermediate)**")
350
+ with gr.Row():
351
+ finance_btn = gr.Button("💰 Finance", size="sm")
352
+ current_btn = gr.Button("📊 Current", size="sm")
353
+ analysis_btn = gr.Button("📈 Analysis", size="sm")
354
+
355
+ # Level 3 Examples
356
+ gr.Markdown("**Level 3 (Advanced)**")
357
+ with gr.Row():
358
+ complex_btn = gr.Button("🧠 Complex", size="sm")
359
+ research_btn = gr.Button("🔬 Research", size="sm")
360
+ multimodal_btn = gr.Button("🖼️ Multimodal", size="sm")
361
+
362
+ # Output Section
363
+ gr.Markdown("## 🎯 Agent Response")
364
+
365
+ with gr.Row():
366
+ # Main Answer
367
+ with gr.Column(scale=2):
368
+ final_answer_output = gr.Textbox(
369
+ label="🤖 Gemini's Answer",
370
+ lines=8,
371
+ max_lines=15,
372
+ show_copy_button=True,
373
+ info="Complete response with reasoning and solution"
374
+ )
375
+
376
+ # Metrics
377
+ with gr.Column(scale=1):
378
+ confidence_output = gr.Textbox(
379
+ label="📊 Confidence Score",
380
+ max_lines=1,
381
+ info="Agent's confidence in the answer"
382
+ )
383
+
384
+ processing_time_output = gr.Textbox(
385
+ label="⏱️ Processing Time",
386
+ max_lines=1,
387
+ info="Time taken to solve"
388
+ )
389
+
390
+ tools_used_output = gr.Textbox(
391
+ label="🔧 Tools Used",
392
+ max_lines=3,
393
+ info="Which capabilities were utilized"
394
+ )
395
+
396
+ status_output = gr.Textbox(
397
+ label="✅ Status",
398
+ max_lines=2,
399
+ info="Execution status and model info"
400
+ )
401
+
402
+ # Detailed Reasoning (Expandable)
403
+ with gr.Accordion("🔍 Detailed Reasoning Steps", open=False):
404
+ reasoning_output = gr.Textbox(
405
+ label="Step-by-Step Reasoning",
406
+ lines=10,
407
+ show_copy_button=True,
408
+ info="Detailed breakdown of the solution process"
409
+ )
410
+
411
+ # Additional Features Tabs
412
+ with gr.Tabs():
413
+ # Tool Testing Tab
414
+ with gr.TabItem("🛠️ Agent Capabilities"):
415
+ tool_test_output = gr.Markdown(
416
+ "Click 'Test Agent & Tools' above to check all capabilities.",
417
+ elem_classes=["info-box"]
418
+ )
419
+
420
+ gr.Markdown("""
421
+ ### 🎯 GAIA Benchmark Capabilities
422
+
423
+ This agent is designed to excel at:
424
+
425
+ - **🧠 Complex Reasoning**: Multi-step logical problem solving
426
+ - **🧮 Mathematical Operations**: Advanced calculations and financial modeling
427
+ - **🔍 Web Search**: Real-time information retrieval using DuckDuckGo
428
+ - **📄 File Analysis**: Processing text, CSV, JSON, and image files
429
+ - **🖼️ Multimodal Understanding**: Analyzing images with Gemini's vision capabilities
430
+ - **📊 Data Processing**: Statistical analysis and pattern recognition
431
+ """)
432
+
433
+ # History Tab
434
+ with gr.TabItem("📚 Conversation History"):
435
+ with gr.Row():
436
+ refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary")
437
+ clear_history_btn = gr.Button("🗑️ Clear History", variant="stop")
438
+
439
+ history_output = gr.Markdown(
440
+ "No questions solved yet. Start by asking a GAIA question!",
441
+ elem_classes=["info-box"]
442
+ )
443
+
444
+ # Documentation Tab
445
+ with gr.TabItem("📖 About GAIA"):
446
+ gr.Markdown(f"""
447
+ ### 🎯 What is GAIA?
448
+
449
+ **GAIA (General AI Assistants)** is a comprehensive benchmark designed to evaluate AI assistants on real-world tasks that require:
450
+
451
+ #### 🧠 Core Capabilities Tested
452
+ - **Reasoning**: Complex multi-step problem solving and logical inference
453
+ - **Multimodal Understanding**: Processing text, images, documents, and data files
454
+ - **Web Browsing**: Searching for and utilizing current information
455
+ - **Tool Use**: Effective integration and use of various computational tools
456
+
457
+ #### 📊 Difficulty Levels
458
+ - **Level 1**: Basic factual questions and simple reasoning tasks
459
+ - **Level 2**: Multi-step problems requiring tool integration
460
+ - **Level 3**: Complex tasks requiring advanced reasoning and multiple tools
461
+
462
+ #### 🚀 This Agent's Approach
463
+ This implementation uses **Google Gemini 1.5 Pro** for its:
464
+ - Superior multimodal capabilities (text + images)
465
+ - Advanced reasoning and problem-solving
466
+ - Large context window for complex tasks
467
+ - Built-in safety and reliability features
468
+
469
+ #### 🔗 Technical Details
470
+ - **Model**: Google Gemini 1.5 Pro
471
+ - **Framework**: Custom Python implementation
472
+ - **Tools**: Calculator, Web Search, File Analyzer
473
+ - **Interface**: Gradio 4.0+
474
+ - **Author**: {self.agent_info['author']}
475
+
476
+ #### 📚 Resources
477
+ - [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983)
478
+ - [GAIA Dataset](https://huggingface.co/datasets/gaia-benchmark/GAIA)
479
+ - [Google AI Studio](https://makersuite.google.com/)
480
+ - [Course Repository]({self.agent_code_link})
481
+ """)
482
+
483
+ # Wire up all the interactions
484
+
485
+ # Main solve function
486
+ solve_button.click(
487
+ self.solve_question,
488
+ inputs=[question_input, difficulty_slider, file_upload, api_key_input],
489
+ outputs=[reasoning_output, tools_used_output, confidence_output,
490
+ processing_time_output, final_answer_output, status_output]
491
+ )
492
+
493
+ # Tool testing
494
+ test_agent_btn.click(
495
+ self.test_agent_capabilities,
496
+ inputs=[api_key_input],
497
+ outputs=[tool_test_output]
498
+ )
499
+
500
+ # History management
501
+ refresh_history_btn.click(
502
+ self.get_conversation_history,
503
+ outputs=[history_output]
504
+ )
505
+
506
+ clear_history_btn.click(
507
+ self.clear_history,
508
+ outputs=[history_output]
509
+ )
510
+
511
+ # Example buttons - Level 1
512
+ math_btn.click(
513
+ lambda: self.get_example_question(1, "math"),
514
+ outputs=[question_input, difficulty_slider]
515
+ )
516
+ factual_btn.click(
517
+ lambda: self.get_example_question(1, "factual"),
518
+ outputs=[question_input, difficulty_slider]
519
+ )
520
+ convert_btn.click(
521
+ lambda: self.get_example_question(1, "conversion"),
522
+ outputs=[question_input, difficulty_slider]
523
+ )
524
+
525
+ # Example buttons - Level 2
526
+ finance_btn.click(
527
+ lambda: self.get_example_question(2, "financial"),
528
+ outputs=[question_input, difficulty_slider]
529
+ )
530
+ current_btn.click(
531
+ lambda: self.get_example_question(2, "current"),
532
+ outputs=[question_input, difficulty_slider]
533
+ )
534
+ analysis_btn.click(
535
+ lambda: self.get_example_question(2, "analysis"),
536
+ outputs=[question_input, difficulty_slider]
537
+ )
538
+
539
+ # Example buttons - Level 3
540
+ complex_btn.click(
541
+ lambda: self.get_example_question(3, "complex"),
542
+ outputs=[question_input, difficulty_slider]
543
+ )
544
+ research_btn.click(
545
+ lambda: self.get_example_question(3, "research"),
546
+ outputs=[question_input, difficulty_slider]
547
+ )
548
+ multimodal_btn.click(
549
+ lambda: self.get_example_question(3, "multimodal"),
550
+ outputs=[question_input, difficulty_slider]
551
+ )
552
+
553
+ # Footer
554
+ gr.HTML(f"""
555
+ <div style="text-align: center; margin-top: 40px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
556
+ <h3>🎓 Hugging Face Agents Course - Unit 4 Final Assignment</h3>
557
+ <p><strong>Gemini GAIA Benchmark Agent</strong> | Created with ❤️ by {self.agent_info['author']}</p>
558
+ <p>🔗 <a href="{self.agent_code_link}" target="_blank">View Source Code</a> |
559
+ 📚 <a href="https://huggingface.co/learn/agents-course" target="_blank">Course Materials</a> |
560
+ 🤖 <a href="https://makersuite.google.com/" target="_blank">Google AI Studio</a></p>
561
+ <p><em>Powered by Google Gemini 1.5 Pro • Built with Gradio • Current Time (UTC): 2025-06-17 15:32:22</em></p>
562
+ </div>
563
+ """)
564
+
565
+ return interface
566
+
567
+ def main():
568
+ """Main function to launch the Gemini GAIA application"""
569
+
570
+ # Configure logging
571
+ logging.basicConfig(
572
+ level=logging.INFO,
573
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
574
+ )
575
+
576
+ logger.info("🚀 Starting Gemini GAIA Benchmark Agent Application...")
577
+
578
+ # Create the application
579
+ app = GeminiGAIAApp()
580
+ interface = app.create_interface()
581
+
582
+ # Launch configuration for Hugging Face Spaces
583
+ launch_kwargs = {
584
+ "share": True, # Create public shareable link
585
+ "server_name": "0.0.0.0", # Allow external connections
586
+ "server_port": 7860, # Default Gradio port
587
+ "show_error": True, # Show errors in UI
588
+ "quiet": False, # Show startup logs
589
+ "favicon_path": None, # Custom favicon
590
+ "auth": None, # No authentication required
591
+ }
592
+
593
+ logger.info("🌐 Launching Gradio interface...")
594
+ logger.info("🔗 The app will be available at http://localhost:7860")
595
+
596
+ try:
597
+ interface.launch(**launch_kwargs)
598
+ except Exception as e:
599
+ logger.error(f"❌ Failed to launch application: {str(e)}")
600
+ print("Please check your environment setup and try again.")
601
+
602
+ if __name__ == "__main__":
603
+ main()
config.yaml ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies for Gemini GAIA Agent
2
+ gradio>=4.36.0
3
+ google-generativeai>=0.7.0
4
+
5
+ # Data processing and analysis
6
+ pandas>=2.0.0
7
+ numpy>=1.24.0
8
+ Pillow>=10.0.0
9
+
10
+ # Web search capabilities
11
+ duckduckgo-search>=5.0.0
12
+ requests>=2.31.0
13
+
14
+ # Utilities and environment
15
+ python-dotenv>=1.0.0
16
+ asyncio>=3.4.3
17
+ pathlib>=1.0.0
18
+
19
+ # Optional: Enhanced functionality
20
+ beautifulsoup4>=4.12.0
21
+ lxml>=4.9.0
22
+ openpyxl>=3.1.0
23
+ markdown>=3.5.0
24
+
25
+ # Development and testing (optional)
26
+ pytest>=7.4.0
27
+ black>=23.0.0
28
+ flake8>=6.0.0