Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| GAIA Benchmark AI Agent - With HF Token Input Interface | |
| ===================================================== | |
| Enhanced version with user token input for GAIA dataset access | |
| """ | |
| import gradio as gr | |
| import torch | |
| import json | |
| import os | |
| import logging | |
| import time | |
| import re | |
| from datetime import datetime | |
| from typing import Dict, List, Optional, Tuple, Any | |
| from dataclasses import dataclass | |
| import pandas as pd | |
| from pathlib import Path | |
| # Core ML libraries | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| BitsAndBytesConfig, | |
| pipeline | |
| ) | |
| from datasets import load_dataset | |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # ================================ | |
| # ENHANCED AUTHENTICATION SETUP | |
| # ================================ | |
| class HFTokenManager: | |
| """Manages HuggingFace token for GAIA dataset access""" | |
| def __init__(self): | |
| self.current_token = None | |
| self.token_status = "No token set" | |
| self.gaia_access_status = "Not tested" | |
| def set_token(self, token: str) -> Tuple[str, str]: | |
| """Set and validate HF token""" | |
| if not token or not token.strip(): | |
| self.current_token = None | |
| self.token_status = "❌ No token provided" | |
| self.gaia_access_status = "Not tested" | |
| return self.token_status, self.gaia_access_status | |
| token = token.strip() | |
| # Basic token format validation | |
| if not token.startswith('hf_'): | |
| self.current_token = None | |
| self.token_status = "❌ Invalid token format (should start with 'hf_')" | |
| self.gaia_access_status = "Not tested" | |
| return self.token_status, self.gaia_access_status | |
| try: | |
| # Test token validity | |
| api = HfApi(token=token) | |
| user_info = api.whoami() | |
| self.current_token = token | |
| self.token_status = f"✅ Valid token for user: {user_info['name']}" | |
| # Test GAIA dataset access | |
| try: | |
| dataset_info = api.dataset_info("gaia-benchmark/GAIA", token=token) | |
| available_splits = list(dataset_info.splits.keys()) if dataset_info.splits else [] | |
| self.gaia_access_status = f"✅ GAIA access confirmed (splits: {', '.join(available_splits)})" | |
| except Exception as e: | |
| if "401" in str(e) or "403" in str(e): | |
| self.gaia_access_status = "❌ GAIA access denied - request access at: https://huggingface.co/datasets/gaia-benchmark/GAIA" | |
| else: | |
| self.gaia_access_status = f"⚠️ GAIA access test failed: {str(e)}" | |
| return self.token_status, self.gaia_access_status | |
| except Exception as e: | |
| self.current_token = None | |
| if "401" in str(e): | |
| self.token_status = "❌ Invalid token - check your token is correct" | |
| else: | |
| self.token_status = f"❌ Token validation failed: {str(e)}" | |
| self.gaia_access_status = "Not tested" | |
| return self.token_status, self.gaia_access_status | |
| def get_token(self) -> Optional[str]: | |
| """Get current valid token""" | |
| return self.current_token | |
| def test_gaia_access(self) -> Tuple[bool, str]: | |
| """Test GAIA dataset access with current token""" | |
| if not self.current_token: | |
| return False, "No valid token set" | |
| try: | |
| # Try to load a small sample from validation set | |
| dataset = load_dataset( | |
| "gaia-benchmark/GAIA", | |
| split="validation", | |
| token=self.current_token, | |
| trust_remote_code=True | |
| ) | |
| if len(dataset) > 0: | |
| return True, f"✅ GAIA dataset accessible ({len(dataset)} validation questions)" | |
| else: | |
| return False, "Dataset appears empty" | |
| except Exception as e: | |
| return False, f"Access failed: {str(e)}" | |
| # Global token manager | |
| token_manager = HFTokenManager() | |
| # Legacy HF_TOKEN setup with fallback | |
| def setup_hf_authentication(): | |
| """Setup HuggingFace authentication with environment fallback""" | |
| env_token = os.environ.get('HF_TOKEN') | |
| if env_token: | |
| token_manager.set_token(env_token) | |
| logger.info("✅ Found HF_TOKEN in environment") | |
| return env_token | |
| # Try HuggingFace CLI token | |
| try: | |
| from huggingface_hub import HfFolder | |
| cli_token = HfFolder.get_token() | |
| if cli_token: | |
| token_manager.set_token(cli_token) | |
| logger.info("✅ Found token from HuggingFace CLI") | |
| return cli_token | |
| except: | |
| pass | |
| # Try manual token file | |
| token_path = os.path.expanduser("~/.cache/huggingface/token") | |
| if os.path.exists(token_path): | |
| try: | |
| with open(token_path, 'r') as f: | |
| file_token = f.read().strip() | |
| if file_token: | |
| token_manager.set_token(file_token) | |
| logger.info("✅ Found token in cache file") | |
| return file_token | |
| except: | |
| pass | |
| logger.warning("⚠️ No HuggingFace token found - use interface to set token") | |
| return None | |
| # Initialize with environment token if available | |
| INITIAL_TOKEN = setup_hf_authentication() | |
| # ================================ | |
| # CORE DATA STRUCTURES (unchanged) | |
| # ================================ | |
| class GAIAQuestion: | |
| """Structure for GAIA benchmark questions""" | |
| task_id: str | |
| question: str | |
| level: int | |
| final_answer: Optional[str] = None | |
| file_name: Optional[str] = None | |
| annotator_metadata: Optional[Dict] = None | |
| def from_dict(cls, data: dict): | |
| return cls(**{k: v for k, v in data.items() if k in cls.__annotations__}) | |
| class GAIAResponse: | |
| """Structure for GAIA responses""" | |
| task_id: str | |
| model_answer: str | |
| reasoning_trace: str | |
| final_answer: str | |
| processing_time: float = 0.0 | |
| confidence_score: float = 0.0 | |
| # ================================ | |
| # GAIA PROMPT MANAGEMENT (unchanged) | |
| # ================================ | |
| class GAIAPromptManager: | |
| """Manages GAIA-specific prompting and formatting""" | |
| GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: | |
| FINAL ANSWER: [YOUR FINAL ANSWER] | |
| YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.""" | |
| def create_gaia_prompt(question: str) -> str: | |
| """Create properly formatted GAIA prompt""" | |
| return f"{GAIAPromptManager.GAIA_SYSTEM_PROMPT}\n\nQuestion: {question}\n\nLet me think step by step:" | |
| def extract_final_answer(response: str) -> Tuple[str, str]: | |
| """Extract final answer and reasoning from model response""" | |
| final_answer_pattern = r"FINAL ANSWER:\s*(.+?)(?:\n|$)" | |
| match = re.search(final_answer_pattern, response, re.IGNORECASE | re.DOTALL) | |
| if match: | |
| final_answer = match.group(1).strip() | |
| reasoning_end = match.start() | |
| reasoning = response[:reasoning_end].strip() | |
| else: | |
| lines = response.strip().split('\n') | |
| final_answer = lines[-1].strip() if lines else "" | |
| reasoning = '\n'.join(lines[:-1]) if len(lines) > 1 else response | |
| return final_answer, reasoning | |
| # ================================ | |
| # MODEL MANAGER (unchanged) | |
| # ================================ | |
| class HFSpaceModelManager: | |
| """Hugging Face Spaces optimized model manager""" | |
| SPACE_MODELS = { | |
| "Fast & Light": { | |
| "name": "microsoft/DialoGPT-medium", | |
| "size": "~345MB", | |
| "speed": "Fast", | |
| "quality": "Good", | |
| "gpu_required": False | |
| }, | |
| "Balanced": { | |
| "name": "stabilityai/stablelm-zephyr-3b", | |
| "size": "~3GB", | |
| "speed": "Medium", | |
| "quality": "Better", | |
| "gpu_required": True | |
| }, | |
| "High Quality": { | |
| "name": "HuggingFaceH4/zephyr-7b-beta", | |
| "size": "~7GB", | |
| "speed": "Slower", | |
| "quality": "Best", | |
| "gpu_required": True | |
| }, | |
| "Instruction Following": { | |
| "name": "mistralai/Mistral-7B-Instruct-v0.1", | |
| "size": "~7GB", | |
| "speed": "Medium", | |
| "quality": "Excellent", | |
| "gpu_required": True | |
| } | |
| } | |
| def __init__(self, model_choice: str = "Fast & Light"): | |
| self.model_config = self.SPACE_MODELS[model_choice] | |
| self.model_name = self.model_config["name"] | |
| self.tokenizer = None | |
| self.model = None | |
| self.pipeline = None | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def load_model(self, progress_callback=None) -> str: | |
| """Load model with progress updates""" | |
| try: | |
| if progress_callback: | |
| progress_callback(0.1, "Loading tokenizer...") | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| if self.tokenizer.pad_token is None: | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| if progress_callback: | |
| progress_callback(0.3, "Configuring model...") | |
| quantization_config = None | |
| if self.device == "cuda" and "7b" in self.model_name.lower(): | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4" | |
| ) | |
| if progress_callback: | |
| progress_callback(0.6, "Loading model weights...") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, | |
| quantization_config=quantization_config, | |
| device_map="auto" if self.device == "cuda" else None, | |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, | |
| trust_remote_code=True | |
| ) | |
| if progress_callback: | |
| progress_callback(0.9, "Creating pipeline...") | |
| self.pipeline = pipeline( | |
| "text-generation", | |
| model=self.model, | |
| tokenizer=self.tokenizer, | |
| max_new_tokens=384, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| device=0 if self.device == "cuda" else -1 | |
| ) | |
| if progress_callback: | |
| progress_callback(1.0, "Model loaded successfully!") | |
| return f"✅ Model '{self.model_name}' loaded successfully on {self.device.upper()}" | |
| except Exception as e: | |
| error_msg = f"❌ Error loading model: {str(e)}" | |
| logger.error(error_msg) | |
| return error_msg | |
| def generate_response(self, prompt: str, max_tokens: int = 384) -> str: | |
| """Generate response with error handling""" | |
| if self.pipeline is None: | |
| return "❌ Model not loaded. Please load a model first." | |
| try: | |
| max_input_length = 1000 | |
| if len(prompt) > max_input_length: | |
| prompt = prompt[:max_input_length] + "..." | |
| outputs = self.pipeline( | |
| prompt, | |
| max_new_tokens=max_tokens, | |
| temperature=0.7, | |
| do_sample=True, | |
| return_full_text=False, | |
| pad_token_id=self.tokenizer.eos_token_id | |
| ) | |
| response = outputs[0]['generated_text'].strip() | |
| return response | |
| except Exception as e: | |
| return f"❌ Error generating response: {str(e)}" | |
| # ================================ | |
| # ENHANCED DATASET MANAGEMENT WITH TOKEN SUPPORT | |
| # ================================ | |
| class GAIADatasetManager: | |
| """Manages GAIA dataset loading with user token support""" | |
| def load_gaia_dataset(split: str = "validation", max_questions: int = None, use_token: bool = True) -> Tuple[List[GAIAQuestion], str]: | |
| """Load GAIA dataset with token support""" | |
| try: | |
| logger.info(f"Attempting to load GAIA dataset split: {split}") | |
| current_token = token_manager.get_token() if use_token else None | |
| if use_token and not current_token: | |
| logger.warning("No valid token found, falling back to sample questions") | |
| questions = GAIADatasetManager.get_sample_questions() | |
| return questions[:max_questions] if max_questions else questions, "⚠️ No authentication token - using sample questions" | |
| # Test access first if using token | |
| if use_token: | |
| has_access, access_msg = token_manager.test_gaia_access() | |
| if not has_access: | |
| logger.warning(f"GAIA access test failed: {access_msg}") | |
| questions = GAIADatasetManager.get_sample_questions() | |
| return questions[:max_questions] if max_questions else questions, f"⚠️ {access_msg} - using sample questions" | |
| # Load the actual dataset | |
| dataset = load_dataset( | |
| "gaia-benchmark/GAIA", | |
| split=split, | |
| token=current_token, | |
| trust_remote_code=True | |
| ) | |
| logger.info(f"Successfully loaded GAIA dataset: {len(dataset)} items") | |
| questions = [] | |
| items = dataset[:max_questions] if max_questions else dataset | |
| for i, item in enumerate(items): | |
| # Handle different possible field names in GAIA dataset | |
| task_id = (item.get('task_id') or | |
| item.get('Task ID') or | |
| item.get('id') or | |
| f'gaia_{split}_{i:03d}') | |
| question_text = (item.get('Question') or | |
| item.get('question') or | |
| item.get('input') or | |
| 'No question text available') | |
| level = (item.get('Level') or | |
| item.get('level') or | |
| item.get('difficulty') or | |
| 1) | |
| final_answer = (item.get('Final answer') or | |
| item.get('final_answer') or | |
| item.get('answer') or | |
| item.get('target') or | |
| None) | |
| file_name = (item.get('file_name') or | |
| item.get('File name') or | |
| item.get('files') or | |
| None) | |
| annotator_metadata = (item.get('Annotator Metadata') or | |
| item.get('annotator_metadata') or | |
| item.get('metadata') or | |
| None) | |
| question = GAIAQuestion( | |
| task_id=str(task_id), | |
| question=str(question_text), | |
| level=int(level), | |
| final_answer=str(final_answer) if final_answer else None, | |
| file_name=str(file_name) if file_name else None, | |
| annotator_metadata=annotator_metadata | |
| ) | |
| questions.append(question) | |
| status = f"✅ Loaded {len(questions)} questions from GAIA {split} split" | |
| logger.info(status) | |
| return questions, status | |
| except Exception as e: | |
| error_msg = f"❌ Error loading GAIA dataset: {str(e)}" | |
| logger.error(error_msg) | |
| # Fallback to sample questions | |
| logger.info("Falling back to sample questions") | |
| questions = GAIADatasetManager.get_sample_questions() | |
| return questions[:max_questions] if max_questions else questions, f"{error_msg} (Using sample questions instead)" | |
| def get_sample_questions() -> List[GAIAQuestion]: | |
| """Get sample questions for testing when GAIA dataset is not accessible""" | |
| sample_data = [ | |
| { | |
| "task_id": "sample_001", | |
| "question": "What is the capital of France?", | |
| "level": 1, | |
| "final_answer": "Paris" | |
| }, | |
| { | |
| "task_id": "sample_002", | |
| "question": "Calculate 144 divided by 12.", | |
| "level": 1, | |
| "final_answer": "12" | |
| }, | |
| { | |
| "task_id": "sample_003", | |
| "question": "What is the largest planet in our solar system?", | |
| "level": 1, | |
| "final_answer": "Jupiter" | |
| }, | |
| { | |
| "task_id": "sample_004", | |
| "question": "Convert 100 degrees Celsius to Fahrenheit.", | |
| "level": 2, | |
| "final_answer": "212" | |
| }, | |
| { | |
| "task_id": "sample_005", | |
| "question": "List the first three even numbers greater than zero.", | |
| "level": 1, | |
| "final_answer": "2, 4, 6" | |
| }, | |
| { | |
| "task_id": "sample_006", | |
| "question": "What year did the Berlin Wall fall?", | |
| "level": 1, | |
| "final_answer": "1989" | |
| }, | |
| { | |
| "task_id": "sample_007", | |
| "question": "What is the chemical symbol for water?", | |
| "level": 1, | |
| "final_answer": "H2O" | |
| }, | |
| { | |
| "task_id": "sample_008", | |
| "question": "How many continents are there?", | |
| "level": 1, | |
| "final_answer": "7" | |
| }, | |
| { | |
| "task_id": "sample_009", | |
| "question": "What is 25% of 200?", | |
| "level": 1, | |
| "final_answer": "50" | |
| }, | |
| { | |
| "task_id": "sample_010", | |
| "question": "In which year did World War II end?", | |
| "level": 1, | |
| "final_answer": "1945" | |
| }, | |
| { | |
| "task_id": "sample_011", | |
| "question": "What is the square root of 144?", | |
| "level": 2, | |
| "final_answer": "12" | |
| }, | |
| { | |
| "task_id": "sample_012", | |
| "question": "Name the three primary colors.", | |
| "level": 1, | |
| "final_answer": "red, blue, yellow" | |
| } | |
| ] | |
| return [GAIAQuestion.from_dict(data) for data in sample_data] | |
| # ================================ | |
| # MAIN GAIA AGENT (updated with token support) | |
| # ================================ | |
| class GAIASpaceAgent: | |
| """Main GAIA agent with token support""" | |
| def __init__(self): | |
| self.model_manager = None | |
| self.prompt_manager = GAIAPromptManager() | |
| self.current_model = None | |
| self.evaluation_results: List[GAIAResponse] = [] | |
| def initialize_model(self, model_choice: str, progress=gr.Progress()) -> str: | |
| """Initialize model with progress tracking""" | |
| try: | |
| progress(0, desc="Initializing model manager...") | |
| self.model_manager = HFSpaceModelManager(model_choice) | |
| self.current_model = model_choice | |
| def progress_callback(value, desc): | |
| progress(value, desc=desc) | |
| result = self.model_manager.load_model(progress_callback) | |
| self.evaluation_results = [] | |
| return result | |
| except Exception as e: | |
| return f"❌ Failed to initialize model: {str(e)}" | |
| def process_single_question(self, question_text: str, progress=gr.Progress()) -> Tuple[str, str, str, float]: | |
| """Process a single question with detailed output""" | |
| if self.model_manager is None or self.model_manager.pipeline is None: | |
| return "❌ No model loaded", "", "", 0.0 | |
| start_time = time.time() | |
| try: | |
| progress(0.2, desc="Creating GAIA prompt...") | |
| prompt = self.prompt_manager.create_gaia_prompt(question_text) | |
| progress(0.4, desc="Generating response...") | |
| raw_response = self.model_manager.generate_response(prompt) | |
| progress(0.8, desc="Extracting final answer...") | |
| final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response) | |
| processing_time = time.time() - start_time | |
| progress(1.0, desc="Complete!") | |
| return final_answer, raw_response, reasoning, processing_time | |
| except Exception as e: | |
| processing_time = time.time() - start_time | |
| error_msg = f"❌ Error processing question: {str(e)}" | |
| return error_msg, "", "", processing_time | |
| def batch_evaluate(self, questions: List[GAIAQuestion], progress=gr.Progress()) -> Tuple[str, str, str]: | |
| """Evaluate multiple questions with progress tracking""" | |
| if self.model_manager is None: | |
| return "❌ No model loaded", "", "" | |
| results = [] | |
| total_questions = len(questions) | |
| progress(0, desc=f"Starting evaluation of {total_questions} questions...") | |
| for i, question in enumerate(questions): | |
| try: | |
| progress((i + 1) / total_questions, | |
| desc=f"Processing question {i + 1}/{total_questions}: {question.task_id}") | |
| start_time = time.time() | |
| prompt = self.prompt_manager.create_gaia_prompt(question.question) | |
| raw_response = self.model_manager.generate_response(prompt) | |
| final_answer, reasoning = self.prompt_manager.extract_final_answer(raw_response) | |
| processing_time = time.time() - start_time | |
| response = GAIAResponse( | |
| task_id=question.task_id, | |
| model_answer=raw_response, | |
| reasoning_trace=reasoning, | |
| final_answer=final_answer, | |
| processing_time=processing_time | |
| ) | |
| results.append(response) | |
| self.evaluation_results.append(response) | |
| except Exception as e: | |
| logger.error(f"Error processing {question.task_id}: {e}") | |
| error_response = GAIAResponse( | |
| task_id=question.task_id, | |
| model_answer=f"Error: {str(e)}", | |
| reasoning_trace="Processing failed", | |
| final_answer="ERROR", | |
| processing_time=0.0 | |
| ) | |
| results.append(error_response) | |
| self.evaluation_results.append(error_response) | |
| summary = self._generate_summary(results) | |
| detailed_results = self._generate_detailed_results(results, questions) | |
| jsonl_content = self._generate_jsonl(results) | |
| return summary, detailed_results, jsonl_content | |
| def _generate_summary(self, results: List[GAIAResponse]) -> str: | |
| """Generate evaluation summary""" | |
| total = len(results) | |
| errors = sum(1 for r in results if r.final_answer == "ERROR") | |
| successful = total - errors | |
| avg_time = sum(r.processing_time for r in results) / total if total > 0 else 0 | |
| total_time = sum(r.processing_time for r in results) | |
| auth_status = "✅ GAIA Access" if token_manager.get_token() else "⚠️ Sample Data Only" | |
| summary = f""" | |
| # 📊 GAIA Evaluation Summary | |
| ## Overall Statistics | |
| - **Total Questions**: {total} | |
| - **Successful**: {successful} | |
| - **Errors**: {errors} | |
| - **Success Rate**: {(successful/total*100):.1f}% | |
| ## Performance Metrics | |
| - **Average Processing Time**: {avg_time:.2f}s | |
| - **Total Processing Time**: {total_time:.2f}s | |
| - **Questions per Minute**: {(total/(total_time/60)):.1f} | |
| ## Model Information | |
| - **Model**: {self.current_model} | |
| - **Device**: {self.model_manager.device.upper() if self.model_manager else 'Unknown'} | |
| - **Authentication**: {auth_status} | |
| """ | |
| return summary | |
| def _generate_detailed_results(self, results: List[GAIAResponse], questions: List[GAIAQuestion]) -> str: | |
| """Generate detailed results breakdown""" | |
| detailed = "# 📋 Detailed Results\n\n" | |
| for i, (result, question) in enumerate(zip(results, questions), 1): | |
| status = "✅" if result.final_answer != "ERROR" else "❌" | |
| detailed += f""" | |
| ## Question {i}: {question.task_id} {status} | |
| **Question**: {question.question} | |
| **Model Answer**: {result.final_answer} | |
| **Expected Answer**: {question.final_answer if question.final_answer else 'N/A'} | |
| **Processing Time**: {result.processing_time:.2f}s | |
| **Level**: {question.level} | |
| --- | |
| """ | |
| return detailed | |
| def _generate_jsonl(self, results: List[GAIAResponse]) -> str: | |
| """Generate JSONL format for download""" | |
| jsonl_lines = [] | |
| for result in results: | |
| line = { | |
| "task_id": result.task_id, | |
| "model_answer": result.model_answer, | |
| "reasoning_trace": result.reasoning_trace | |
| } | |
| jsonl_lines.append(json.dumps(line)) | |
| return '\n'.join(jsonl_lines) | |
| # ================================ | |
| # GLOBAL AGENT INSTANCE | |
| # ================================ | |
| gaia_agent = GAIASpaceAgent() | |
| # ================================ | |
| # ENHANCED GRADIO INTERFACE FUNCTIONS | |
| # ================================ | |
| def set_hf_token_interface(token: str): | |
| """Interface function for setting HF token""" | |
| token_status, gaia_status = token_manager.set_token(token) | |
| return token_status, gaia_status, update_auth_status() | |
| def update_auth_status(): | |
| """Update authentication status display""" | |
| if token_manager.get_token(): | |
| return f"""### 🔐 Authentication Status | |
| {token_manager.token_status} | |
| ### 📊 GAIA Dataset Access | |
| {token_manager.gaia_access_status} | |
| ### 💡 Usage | |
| - ✅ Can access GAIA validation/test sets | |
| - ✅ Can download official benchmark data | |
| - ✅ Results suitable for leaderboard submission""" | |
| else: | |
| return """### 🔐 Authentication Status | |
| ❌ No valid HF token set | |
| ### 📊 GAIA Dataset Access | |
| ❌ Cannot access GAIA dataset - using sample questions | |
| ### 💡 To Access GAIA Dataset: | |
| 1. **Get Access**: Visit https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| 2. **Get Token**: Visit https://huggingface.co/settings/tokens | |
| 3. **Set Token**: Enter your token in the field above""" | |
| def load_model_interface(model_choice: str, progress=gr.Progress()): | |
| """Interface function for model loading""" | |
| return gaia_agent.initialize_model(model_choice, progress) | |
| def single_question_interface(question: str, progress=gr.Progress()): | |
| """Interface function for single question processing""" | |
| if not question.strip(): | |
| return "Please enter a question", "", "", "0.00s" | |
| final_answer, full_response, reasoning, proc_time = gaia_agent.process_single_question(question, progress) | |
| return ( | |
| final_answer, | |
| full_response, | |
| reasoning, | |
| f"{proc_time:.2f}s" | |
| ) | |
| def batch_evaluate_interface(dataset_choice: str, max_questions: int, progress=gr.Progress()): | |
| """Interface function for batch evaluation""" | |
| if gaia_agent.model_manager is None: | |
| return "❌ Please load a model first", "", "" | |
| progress(0.1, desc="Loading dataset...") | |
| if dataset_choice == "Sample Questions": | |
| questions = GAIADatasetManager.get_sample_questions() | |
| status_msg = f"✅ Loaded {len(questions)} sample questions" | |
| else: | |
| use_token = dataset_choice in ["GAIA Validation Set", "GAIA Test Set"] | |
| split = "test" if dataset_choice == "GAIA Test Set" else "validation" | |
| questions, status_msg = GAIADatasetManager.load_gaia_dataset(split, max_questions, use_token) | |
| if max_questions and len(questions) > max_questions: | |
| questions = questions[:max_questions] | |
| progress(0.2, desc=f"{status_msg}. Starting evaluation...") | |
| summary, detailed, jsonl = gaia_agent.batch_evaluate(questions, progress) | |
| return summary, detailed, jsonl | |
| def get_model_info(model_choice: str): | |
| """Get information about selected model""" | |
| if model_choice in HFSpaceModelManager.SPACE_MODELS: | |
| config = HFSpaceModelManager.SPACE_MODELS[model_choice] | |
| return f""" | |
| **Model**: {config['name']} | |
| **Size**: {config['size']} | |
| **Speed**: {config['speed']} | |
| **Quality**: {config['quality']} | |
| **GPU Required**: {'Yes' if config['gpu_required'] else 'No'} | |
| """ | |
| return "Model information not available" | |
| def preview_gaia_interface(): | |
| """Interface for previewing GAIA dataset with token support""" | |
| if not token_manager.get_token(): | |
| return """ | |
| ## ⚠️ GAIA Dataset Preview - Authentication Required | |
| To access the GAIA dataset, you need: | |
| 1. **Request Access**: https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| 2. **Get Token**: https://huggingface.co/settings/tokens | |
| 3. **Set Token**: Enter your token in the Authentication tab above | |
| ### 📋 Sample Questions Available: | |
| We provide 12 sample questions for testing your setup without authentication. | |
| Use "Sample Questions" in the evaluation tabs to get started! | |
| """ | |
| try: | |
| # Test access and get basic info | |
| has_access, access_msg = token_manager.test_gaia_access() | |
| if not has_access: | |
| return f""" | |
| ## ❌ GAIA Dataset Access Failed | |
| **Error**: {access_msg} | |
| ### 🔧 Troubleshooting: | |
| 1. Check your HF_TOKEN is valid | |
| 2. Ensure you have access to GAIA dataset | |
| 3. Try refreshing your token | |
| ### 🔄 Alternative: | |
| Use "Sample Questions" for testing without authentication. | |
| """ | |
| # Try to get some preview data | |
| dataset = load_dataset( | |
| "gaia-benchmark/GAIA", | |
| split="validation", | |
| token=token_manager.get_token(), | |
| trust_remote_code=True | |
| ) | |
| # Analyze the dataset | |
| total_questions = len(dataset) | |
| # Get level distribution | |
| levels = {} | |
| sample_questions = [] | |
| for i, item in enumerate(dataset): | |
| level = item.get('Level', 1) | |
| levels[level] = levels.get(level, 0) + 1 | |
| # Collect a few sample questions | |
| if len(sample_questions) < 3: | |
| question_text = item.get('Question', 'No question') | |
| if len(question_text) > 100: | |
| question_text = question_text[:100] + "..." | |
| sample_questions.append(f"- **Level {level}**: {question_text}") | |
| level_dist = "\n".join([f"- **Level {k}**: {v} questions" for k, v in sorted(levels.items())]) | |
| sample_text = "\n".join(sample_questions) | |
| return f""" | |
| ## ✅ GAIA Dataset Preview - Access Confirmed | |
| ### 📊 Dataset Statistics: | |
| - **Total Questions**: {total_questions} | |
| - **Available Split**: validation (development set) | |
| ### 📈 Level Distribution: | |
| {level_dist} | |
| ### 📋 Sample Questions: | |
| {sample_text} | |
| ### 🎯 Ready for Evaluation! | |
| You can now use "GAIA Validation Set" or "GAIA Test Set" in the evaluation tabs to test your model on real GAIA questions. | |
| """ | |
| except Exception as e: | |
| return f""" | |
| ## ❌ Error Previewing GAIA Dataset | |
| **Error**: {str(e)} | |
| ### 🔄 Recommendations: | |
| 1. Use "Sample Questions" for immediate testing | |
| 2. Check your authentication setup | |
| 3. Try again in a few minutes | |
| ### 📞 Need Help? | |
| - GAIA Dataset: https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| - HF Authentication: https://huggingface.co/docs/hub/security-tokens | |
| """ | |
| # ================================ | |
| # ENHANCED GRADIO APP CREATION WITH TOKEN INPUT | |
| # ================================ | |
| def create_gaia_app(): | |
| """Create the main Gradio application with token input""" | |
| with gr.Blocks( | |
| title="GAIA Benchmark AI Agent", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .main-header { | |
| text-align: center; | |
| background: linear-gradient(45deg, #2196F3, #21CBF3); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 20px; | |
| } | |
| .auth-section { | |
| background: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 10px; | |
| border-left: 4px solid #2196F3; | |
| margin: 10px 0; | |
| } | |
| """ | |
| ) as app: | |
| # Header | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| 🧠 GAIA Benchmark AI Agent | |
| </div> | |
| <p style="text-align: center; font-size: 1.2em; color: #666;"> | |
| Evaluate AI models on the GAIA benchmark with step-by-step reasoning | |
| </p> | |
| """) | |
| with gr.Tabs(): | |
| # =============================== | |
| # TAB 1: AUTHENTICATION | |
| # =============================== | |
| with gr.Tab("🔐 Authentication"): | |
| gr.HTML('<div class="auth-section">') | |
| gr.Markdown("## HuggingFace Token Setup") | |
| gr.Markdown(""" | |
| **To access the GAIA dataset, you need:** | |
| 1. **Request access** to GAIA dataset | |
| 2. **Get your HuggingFace token** | |
| 3. **Enter token below** | |
| """) | |
| gr.HTML('</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 🔑 Enter Your HuggingFace Token") | |
| hf_token_input = gr.Textbox( | |
| label="HuggingFace Token", | |
| placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", | |
| type="password", | |
| info="Get your token from https://huggingface.co/settings/tokens", | |
| value="" | |
| ) | |
| set_token_btn = gr.Button("🔓 Set Token & Test Access", variant="primary") | |
| with gr.Row(): | |
| token_status = gr.Textbox( | |
| label="Token Status", | |
| value="No token set", | |
| interactive=False, | |
| lines=1 | |
| ) | |
| gaia_access_status = gr.Textbox( | |
| label="GAIA Access Status", | |
| value="Not tested", | |
| interactive=False, | |
| lines=1 | |
| ) | |
| with gr.Column(scale=1): | |
| auth_status_display = gr.Markdown( | |
| value=update_auth_status(), | |
| label="Authentication Status" | |
| ) | |
| gr.Markdown(""" | |
| ### 📋 Step-by-Step Setup Guide | |
| #### 1. Request GAIA Dataset Access | |
| - Visit: https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| - Click **"Request Access"** button | |
| - Fill out the form explaining your use case | |
| - Wait for approval (usually within 24 hours) | |
| #### 2. Get Your HuggingFace Token | |
| - Go to: https://huggingface.co/settings/tokens | |
| - Click **"New token"** | |
| - Choose **"Read"** permissions | |
| - Copy the token (starts with `hf_`) | |
| #### 3. Enter Token Above | |
| - Paste your token in the field above | |
| - Click **"Set Token & Test Access"** | |
| - Verify both token validity and GAIA access | |
| ### ⚠️ Token Security | |
| - Your token is only stored in memory during this session | |
| - Never share your token publicly | |
| - You can revoke tokens at any time from HuggingFace settings | |
| ### 🔄 Without Authentication | |
| - You can still use **12 sample questions** for testing | |
| - All features work except real GAIA dataset access | |
| - Perfect for getting familiar with the interface | |
| """) | |
| # Set token event | |
| set_token_btn.click( | |
| fn=set_hf_token_interface, | |
| inputs=[hf_token_input], | |
| outputs=[token_status, gaia_access_status, auth_status_display] | |
| ) | |
| # =============================== | |
| # TAB 2: MODEL SETUP | |
| # =============================== | |
| with gr.Tab("🔧 Model Setup"): | |
| gr.Markdown("## Choose and Load Your Model") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| model_dropdown = gr.Dropdown( | |
| choices=list(HFSpaceModelManager.SPACE_MODELS.keys()), | |
| value="Fast & Light", | |
| label="Select Model", | |
| info="Choose based on your quality vs speed preference" | |
| ) | |
| model_info = gr.Markdown( | |
| value=get_model_info("Fast & Light"), | |
| label="Model Information" | |
| ) | |
| load_btn = gr.Button("🚀 Load Model", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gpu_info = gr.Markdown(f""" | |
| ### 🖥️ System Info | |
| **CUDA Available**: {torch.cuda.is_available()} | |
| {f"**GPU**: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "**Device**: CPU"} | |
| ### 🔐 Authentication Status | |
| {"✅ Token Set" if token_manager.get_token() else "⚠️ No Token - Go to Authentication tab"} | |
| """) | |
| model_status = gr.Textbox( | |
| label="Model Status", | |
| value="No model loaded", | |
| interactive=False | |
| ) | |
| # Update model info when selection changes | |
| model_dropdown.change( | |
| fn=get_model_info, | |
| inputs=[model_dropdown], | |
| outputs=[model_info] | |
| ) | |
| # Load model when button clicked | |
| load_btn.click( | |
| fn=load_model_interface, | |
| inputs=[model_dropdown], | |
| outputs=[model_status] | |
| ) | |
| # =============================== | |
| # TAB 3: SINGLE QUESTION | |
| # =============================== | |
| with gr.Tab("❓ Single Question"): | |
| gr.Markdown("## Test Individual Questions") | |
| with gr.Row(): | |
| with gr.Column(): | |
| question_input = gr.Textbox( | |
| label="Enter your question", | |
| placeholder="e.g., What is the capital of France?", | |
| lines=3 | |
| ) | |
| process_btn = gr.Button("🤔 Process Question", variant="primary") | |
| # Example questions | |
| gr.Markdown("### 💡 Example Questions:") | |
| example_questions = [ | |
| "What is the capital of France?", | |
| "Calculate 144 divided by 12", | |
| "What is the largest planet in our solar system?", | |
| "Convert 100 degrees Celsius to Fahrenheit" | |
| ] | |
| for example in example_questions: | |
| gr.Button(f"📝 {example}", size="sm").click( | |
| lambda x=example: x, | |
| outputs=[question_input] | |
| ) | |
| with gr.Column(): | |
| final_answer_output = gr.Textbox( | |
| label="🎯 Final Answer", | |
| interactive=False | |
| ) | |
| processing_time = gr.Textbox( | |
| label="⏱️ Processing Time", | |
| interactive=False | |
| ) | |
| with gr.Accordion("🧠 Full Response", open=False): | |
| full_response = gr.Textbox( | |
| label="Complete Model Response", | |
| lines=8, | |
| interactive=False | |
| ) | |
| with gr.Accordion("🔍 Reasoning Trace", open=False): | |
| reasoning_trace = gr.Textbox( | |
| label="Step-by-step Reasoning", | |
| lines=6, | |
| interactive=False | |
| ) | |
| # Process single question | |
| process_btn.click( | |
| fn=single_question_interface, | |
| inputs=[question_input], | |
| outputs=[final_answer_output, full_response, reasoning_trace, processing_time] | |
| ) | |
| # =============================== | |
| # TAB 4: BATCH EVALUATION | |
| # =============================== | |
| with gr.Tab("📊 Batch Evaluation"): | |
| gr.Markdown("## Evaluate Multiple Questions") | |
| with gr.Row(): | |
| dataset_choice = gr.Radio( | |
| choices=["Sample Questions", "GAIA Validation Set", "GAIA Test Set"], | |
| value="Sample Questions", | |
| label="Dataset Choice", | |
| info="Sample Questions work without authentication" | |
| ) | |
| max_questions = gr.Slider( | |
| minimum=1, | |
| maximum=300, | |
| value=10, | |
| step=1, | |
| label="Max Questions", | |
| info="Number of questions to evaluate" | |
| ) | |
| evaluate_btn = gr.Button("🚀 Start Batch Evaluation", variant="primary", size="lg") | |
| # Dataset info display | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### 📊 Dataset Information | |
| **Sample Questions (No Auth Required)**: | |
| - 12 curated questions for testing | |
| - Works without HuggingFace token | |
| - Perfect for setup verification | |
| **GAIA Validation Set (Auth Required)**: | |
| - ~165 official validation questions | |
| - Good for model development | |
| - May include reference answers | |
| **GAIA Test Set (Auth Required)**: | |
| - ~450 official test questions | |
| - Used for leaderboard submissions | |
| - Answers typically hidden (blind evaluation) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| summary_output = gr.Markdown( | |
| label="📊 Evaluation Summary", | |
| value="No evaluation completed yet" | |
| ) | |
| with gr.Column(): | |
| download_output = gr.File( | |
| label="💾 Download Results (JSONL)", | |
| visible=False | |
| ) | |
| with gr.Accordion("📋 Detailed Results", open=False): | |
| detailed_output = gr.Markdown( | |
| value="Run an evaluation to see detailed results" | |
| ) | |
| # Batch evaluation with download | |
| def batch_eval_with_download(*args): | |
| summary, detailed, jsonl_content = batch_evaluate_interface(*args) | |
| # Save JSONL for download | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| dataset_name = args[0].lower().replace(" ", "_") | |
| filename = f"gaia_{dataset_name}_{timestamp}.jsonl" | |
| with open(filename, 'w') as f: | |
| f.write(jsonl_content) | |
| return summary, detailed, filename | |
| evaluate_btn.click( | |
| fn=batch_eval_with_download, | |
| inputs=[dataset_choice, max_questions], | |
| outputs=[summary_output, detailed_output, download_output] | |
| ).then( | |
| lambda: gr.update(visible=True), | |
| outputs=[download_output] | |
| ) | |
| # =============================== | |
| # TAB 5: DATASET PREVIEW | |
| # =============================== | |
| with gr.Tab("📋 Dataset Preview"): | |
| gr.Markdown("## GAIA Dataset Information") | |
| preview_btn = gr.Button("🔍 Preview GAIA Dataset", variant="primary") | |
| preview_output = gr.Markdown( | |
| value="Click above to preview the GAIA dataset structure and your access status" | |
| ) | |
| gr.Markdown(""" | |
| ## 🎯 About GAIA Benchmark | |
| **GAIA (General AI Assistant)** is a comprehensive benchmark for evaluating AI assistants on real-world tasks that require: | |
| ### 🧠 Key Capabilities Tested: | |
| - **Multi-step reasoning**: Complex logical thinking and problem decomposition | |
| - **Tool use**: Web browsing, calculations, file processing | |
| - **Multi-modality**: Text, images, PDFs, spreadsheets, audio files | |
| - **Real-world knowledge**: Current events, specialized domains | |
| - **Following instructions**: Precise output formatting | |
| ### 📊 Dataset Structure: | |
| - **Total Questions**: ~450 in test set, ~165 in validation set | |
| - **Difficulty Levels**: | |
| - Level 1: Basic questions (≤30 seconds for humans) | |
| - Level 2: Intermediate (≤5 minutes for humans) | |
| - Level 3: Advanced (≤30 minutes for humans) | |
| - **Question Types**: Factual, mathematical, reasoning, research tasks | |
| ### 🏆 Current Leaderboard (Top Performers): | |
| 1. **GPT-4 + plugins**: ~20% accuracy | |
| 2. **Claude-3 Opus**: ~15% accuracy | |
| 3. **Gemini Pro**: ~12% accuracy | |
| 4. **Human Performance**: ~92% accuracy | |
| ### 📁 File Types in GAIA: | |
| - Text documents, PDFs | |
| - Images (charts, diagrams, photos) | |
| - Spreadsheets (CSV, Excel) | |
| - Audio files | |
| - Web pages and URLs | |
| ### 🎯 Evaluation Criteria: | |
| - **Exact Match**: Final answer must match exactly | |
| - **Case Sensitive**: Proper formatting required | |
| - **No Partial Credit**: Binary scoring (correct/incorrect) | |
| - **Format Specific**: Numbers vs strings vs lists handled differently | |
| ### 🔬 Research Impact: | |
| - Used in 50+ research papers | |
| - Standard benchmark for assistant evaluation | |
| - Drives development of reasoning capabilities | |
| - Identifies gaps in current AI systems | |
| """) | |
| preview_btn.click( | |
| fn=preview_gaia_interface, | |
| outputs=[preview_output] | |
| ) | |
| # =============================== | |
| # TAB 6: HELP & INFO | |
| # =============================== | |
| with gr.Tab("ℹ️ Help & Info"): | |
| gr.Markdown(""" | |
| # 🧠 GAIA Benchmark AI Agent - Complete Guide | |
| ## 🎯 Quick Start Guide | |
| ### 1. **Authentication** (For GAIA Dataset Access) | |
| - Go to "Authentication" tab | |
| - Get access to GAIA dataset: https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| - Get HF token: https://huggingface.co/settings/tokens | |
| - Enter token and test access | |
| ### 2. **Model Setup** (Required!) | |
| - Go to "Model Setup" tab | |
| - Choose a model based on your needs: | |
| - **Fast & Light**: Good for testing, works on CPU | |
| - **High Quality**: Best results, requires GPU | |
| - Click "Load Model" and wait for success message | |
| ### 3. **Test Your Setup** | |
| - Go to "Single Question" tab | |
| - Try example questions like "What is the capital of France?" | |
| - Verify your model responds correctly | |
| ### 4. **Batch Evaluation** | |
| - Go to "Batch Evaluation" tab | |
| - Start with "Sample Questions" (no auth needed) | |
| - Try 5-10 questions first | |
| - Download results for analysis | |
| ### 5. **GAIA Dataset** | |
| - Use "Dataset Preview" to check access | |
| - Try "GAIA Validation Set" for development | |
| - Use "GAIA Test Set" for leaderboard submission | |
| ## 📊 Dataset Options Explained | |
| ### Sample Questions (Always Available) | |
| - **12 curated questions** for testing | |
| - **No authentication required** | |
| - Perfect for verifying your setup | |
| - Good for debugging and development | |
| ### GAIA Validation Set (Requires Auth) | |
| - **~165 official questions** from GAIA | |
| - Good for **model development** and tuning | |
| - May include reference answers for comparison | |
| - Faster to evaluate than full test set | |
| ### GAIA Test Set (Requires Auth) | |
| - **~450 official questions** from GAIA | |
| - Used for **official leaderboard** submissions | |
| - Answers typically hidden (blind evaluation) | |
| - Takes longer but gives official ranking | |
| ## 🏆 Performance Expectations | |
| | Model Type | Expected Accuracy | Use Case | | |
| |------------|------------------|----------| | |
| | **Top Commercial** | 15-20% | GPT-4 + plugins, research | | |
| | **Strong Models** | 10-15% | Claude-3, Gemini Pro | | |
| | **Good Open Source** | 5-10% | Llama-2-70B, Mixtral | | |
| | **Smaller Models** | 1-5% | 7B parameter models | | |
| | **Humans** | ~92% | Reference performance | | |
| ## 🔧 Troubleshooting | |
| ### Authentication Issues | |
| - **"Invalid token"**: Check token format (starts with `hf_`) | |
| - **"Access denied"**: Request GAIA dataset access first | |
| - **"Token not found"**: Get token from HF settings | |
| ### Model Issues | |
| - **Out of Memory**: Try "Fast & Light" model | |
| - **CUDA Errors**: Restart and use CPU mode | |
| - **Slow loading**: Normal for large models, be patient | |
| ### Evaluation Issues | |
| - **No responses**: Ensure model is loaded first | |
| - **All errors**: Check model compatibility | |
| - **Slow evaluation**: Normal for complex questions | |
| ## 📁 Output Files | |
| ### JSONL Format (Leaderboard Ready) | |
| ```json | |
| {"task_id": "gaia_001", "model_answer": "Complete response...", "reasoning_trace": "Step by step..."} | |
| {"task_id": "gaia_002", "model_answer": "Complete response...", "reasoning_trace": "Step by step..."} | |
| ``` | |
| ### Key Fields: | |
| - **task_id**: Unique question identifier | |
| - **model_answer**: Full model response | |
| - **reasoning_trace**: Step-by-step thinking process | |
| ## 🚀 Best Practices | |
| ### For Accuracy: | |
| 1. **Use best model**: Don't compromise on model quality | |
| 2. **Test prompts**: Verify prompt format works | |
| 3. **Check reasoning**: Review step-by-step traces | |
| 4. **Analyze failures**: Learn from incorrect answers | |
| ### For Efficiency: | |
| 1. **Start small**: Test with 5-10 questions first | |
| 2. **Monitor resources**: Watch GPU/CPU usage | |
| 3. **Save progress**: Download results frequently | |
| 4. **Use appropriate model**: Match model to available hardware | |
| ### For Leaderboard: | |
| 1. **Use test set**: Official ranking requires test set | |
| 2. **Validate format**: Check JSONL is properly formatted | |
| 3. **Document approach**: Note any special techniques | |
| 4. **Submit promptly**: Upload to official leaderboard | |
| ## 🔗 Important Links | |
| - **GAIA Dataset**: https://huggingface.co/datasets/gaia-benchmark/GAIA | |
| - **GAIA Leaderboard**: https://huggingface.co/spaces/gaia-benchmark/leaderboard | |
| - **GAIA Paper**: https://arxiv.org/abs/2311.12983 | |
| - **HuggingFace Tokens**: https://huggingface.co/settings/tokens | |
| - **Authentication Guide**: https://huggingface.co/docs/hub/security-tokens | |
| """) | |
| return app | |
| # ================================ | |
| # MAIN APPLICATION | |
| # ================================ | |
| if __name__ == "__main__": | |
| # Print startup information | |
| print("🧠 GAIA Benchmark AI Agent Starting...") | |
| print(f"🔐 Environment Token: {'✅ Found' if INITIAL_TOKEN else '⚠️ Not found'}") | |
| print(f"🖥️ CUDA Available: {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}") | |
| if torch.cuda.is_available(): | |
| print(f"🎮 GPU: {torch.cuda.get_device_name(0)}") | |
| print(""" | |
| 💡 Token Setup Options: | |
| 1. Environment: export HF_TOKEN=hf_your_token | |
| 2. Interface: Enter token in Authentication tab | |
| 3. CLI: huggingface-cli login | |
| """) | |
| app = create_gaia_app() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |