Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from typing import List, Dict, Any, Union | |
| from datasets import load_dataset | |
| import random | |
| import os | |
| app = FastAPI() | |
| # Carga y filtra nivel 1 GAIA (validation split) | |
| ds = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True) | |
| QUESTIONS = [] | |
| GROUND_TRUTH: Dict[str, str] = {} | |
| for item in ds: | |
| task_id = str(item["task_id"]) | |
| QUESTIONS.append({ | |
| "task_id": task_id, | |
| "question": item["Question"] | |
| }) | |
| GROUND_TRUTH[task_id] = str(item["Final answer"]) | |
| class AnswerItem(BaseModel): | |
| task_id: str | |
| submitted_answer: Union[str, int, float] | |
| class Submission(BaseModel): | |
| username: str | |
| agent_code: str | |
| answers: List[AnswerItem] | |
| class ScoreResponse(BaseModel): | |
| username: str | |
| score: float | |
| correct_count: int | |
| total_attempted: int | |
| message: str | |
| def get_questions(): | |
| # Devuelve las 20 preguntas aleatorias de nivel 1 cada vez | |
| chosen = random.sample(QUESTIONS, k=min(20, len(QUESTIONS))) | |
| return chosen | |
| def submit(sub: Submission): | |
| correct = sum( | |
| 1 for ans in sub.answers | |
| if GROUND_TRUTH.get(ans.task_id, "") == str(ans.submitted_answer).strip() | |
| ) | |
| total = len(sub.answers) | |
| score = correct / total * 100 if total > 0 else 0.0 | |
| return ScoreResponse( | |
| username=sub.username, | |
| score=score, | |
| correct_count=correct, | |
| total_attempted=total, | |
| message=f"Puntuación: {correct}/{total} = {score:.1f}%" | |
| ) | |