agent-course-gaia

Sleeping

App Files Files Community

kirbah commited on May 29

Commit

77c5529

1 Parent(s): 136bf60

Add file handling functionality

Browse files

Files changed (3) hide show

.gitignore +1 -0
basic_agent.py +101 -21
file_handler.py +144 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	__pycache__/**


1	__pycache__/**
2	+ files/**

basic_agent.py CHANGED Viewed

@@ -1,37 +1,117 @@
 import os
-from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, InferenceClientModel, LiteLLMModel
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, task_id: str, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        model = LiteLLMModel(model_id="groq/deepseek-r1-distill-llama-70b",
-                             api_key=os.getenv("GROQ_API_KEY"))
         agent = ToolCallingAgent(
-            tools=[DuckDuckGoSearchTool(), VisitWebpageTool(),
-                   WikipediaSearchTool(), PythonInterpreterTool()],
             model=model,
-            max_steps=5,
-            name="web_agent",
-            description="Web Search Agent",
-            authorized_imports=['statistics', 'unicodedata', 'collections', 'queue',
-                                'time', 'pandas', 'stat', 'random', 'datetime', 're', 'math', 'itertools'],
             verbosity_level=2
         )
-        answer = agent.run(
-            f"""
-            Answer the question using the tools provided. If you need to search the web, use the DuckDuckGoSearchTool.
-            If you find relevant information, use it to answer the question. Do not make up answers.
-            If you cannot find an answer, respond with 'I don't know'. Provide only the final answer, not the steps taken.
-            Question: {question}
-            """
-        )
-        print(f"Agent returning answer: {answer}")
         return answer

 import os
+# This assumes 'file_handler.py' is in the same directory or accessible via PYTHONPATH
+from file_handler import get_task_file_path, DEFAULT_FILES_DIR
+from smolagents import (
+    ToolCallingAgent,
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+    PythonInterpreterTool,
+    LiteLLMModel
+)
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        # This agent will use the DEFAULT_FILES_DIR from file_handler for context in prompts
+        self.files_dir_for_prompt_context = os.path.abspath(DEFAULT_FILES_DIR)
     def __call__(self, task_id: str, question: str) -> str:
+        print(f"\nProcessing Task ID: {task_id}")
+        print(f"Original Question (first 70 chars): {question[:70]}...")
+        # Use the function from file_handler.py to get the local file path
+        # This function handles checking local first, then downloading.
+        # It uses DEFAULT_FILES_DIR ("files") by default.
+        local_file_path_str = get_task_file_path(task_id)
+        file_context_for_prompt: str
+        if local_file_path_str:
+            file_context_for_prompt = (
+                f"--- File Information for Task ID '{task_id}' ---\n"
+                f"A file relevant to this task has been made available to the agent. "
+                # local_file_path_str is the string filename
+                f"Its local path is: '{local_file_path_str}'. "
+                "If the question requires information from this file, you MUST pass the full path to the proper tool "
+                "to access and process its content from this exact local path."
+                f"--- End File Information ---"
+            )
+        else:
+            file_context_for_prompt = ""
+        question_to_llm = (
+            f"{question}\n\n"
+            f"{file_context_for_prompt}\n"
+        )
+        print(
+            f"\nContext for LLM (first 400 chars of question part):\n{question_to_llm[:400]}...")
+        # LLM as specified by the user
+        model = LiteLLMModel(
+            model_id="groq/deepseek-r1-distill-llama-70b",
+            api_key=os.getenv("GROQ_API_KEY")
+        )
+        # Simplified and generic tool descriptions
+        python_tool_description = (
+            "Executes Python code. This is vital for calculations, data manipulation, "
+            "or accessing content of local files. If 'File Information' (provided with the question) "
+            f"gives a local path for a file (e.g., '{os.path.join(self.files_dir_for_prompt_context, "example_task_file.ext")}'), "
+            "you MUST use this tool to open and process that file using its full, exact path. "
+            "Output from `print()` in your code will be returned. Ensure code is self-contained."
+        )
+        agent_tools = [
+            DuckDuckGoSearchTool(),
+            VisitWebpageTool(),
+            WikipediaSearchTool(),
+            PythonInterpreterTool(authorized_imports=[
+                'statistics', 'unicodedata', 'collections', 'queue', 'time', 'pandas',
+                'stat', 'random', 'datetime', 're', 'math', 'itertools', 'os', 'sys',
+                'io', 'csv', 'json', 'pathlib', 'subprocess', 'base64'
+            ])
+        ]
         agent = ToolCallingAgent(
+            tools=agent_tools,
             model=model,
+            max_steps=8,
+            name="TaskProcessorAgent",
+            description="An agent designed to answer questions by searching the web, processing local files (if a path is provided in 'File Information'), and executing Python code.",
             verbosity_level=2
         )
+        # Simplified prompt template
+        prompt_template = f"""
+Your primary goal is to accurately and concisely answer the provided question using your available tools and any supplied information.
+Key Instructions:
+1.  **Understand the Task**: Carefully read the entire question. Crucially, examine the "File Information" section that follows the question. This section will state if a local file is available for this task and provide its exact path if so.
+2.  **Select Tools Strategically**:
+    *   Choose the best tool(s) based on the question and any file details. Tool descriptions will help guide your choice.
+    *   For web research, use search tools. Use `VisitWebpageTool` for exploring specific URLs.
+    *   **Working with Local Files**: If the "File Information" section provides a local file path:
+        *   You **MUST** use the `PythonInterpreterTool` if the file's content is needed to answer the question.
+        *   Your Python code for the tool should open this file using the full, exact local path given in "File Information". Then, read, process, or execute its content as appropriate.
+    *   For general calculations, data analysis (not from a local file mentioned in "File Information"), or other Python tasks, use `PythonInterpreterTool`.
+3.  **Verify and Combine Information**:
+    *   If multiple steps or tools are used, synthesize the information.
+    *   Always evaluate information for accuracy and direct relevance to the question asked.
+    *   Base your final answer *only* on the information you have gathered.
+4.  **Formulate Your Response**:
+    *   Provide only the final, concise answer to the question.
+    *   Do not include your reasoning steps, apologies, self-correction narratives, or any conversational filler in the final answer.
+    *   If, after a thorough investigation with all relevant tools, you cannot determine a definitive answer, respond with the exact phrase 'I don't know'.
+--- Start of Question & File Information ---
+{question_to_llm}
+--- End of Question & File Information ---
+"""
+        answer = agent.run(prompt_template)
+        print(f"Agent returning answer for Task ID {task_id}: {answer}")
         return answer

file_handler.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import re
+import requests
+DEFAULT_FILES_DIR = "files"  # Subdirectory for task-related files
+FILE_API_BASE_URL = "https://agents-course-unit4-scoring.hf.space/files/"
+def _extract_filename_from_cd(cd_header: str | None) -> str | None:
+    """Extracts filename from Content-Disposition header."""
+    if not cd_header:
+        return None
+    # Check for filename*=UTF-8''<encoded_filename>
+    fname_star_match = re.search(
+        r"filename\*=UTF-8''([^';\s]+)", cd_header, re.IGNORECASE)
+    if fname_star_match:
+        return requests.utils.unquote(fname_star_match.group(1))
+    # Check for filename="<filename>"
+    fname_match = re.search(r'filename="([^"]+)"', cd_header, re.IGNORECASE)
+    if fname_match:
+        return fname_match.group(1)
+    # Check for plain filename=<filename>
+    fname_plain_match = re.search(
+        r'filename=([^;"]+)', cd_header, re.IGNORECASE)
+    if fname_plain_match:
+        return fname_plain_match.group(1).strip('"')
+    return None
+def _get_extension_from_content_type(content_type: str | None) -> str | None:
+    """Suggests a file extension based on MIME type."""
+    if not content_type:
+        return None
+    # Simple mapping, can be expanded
+    mime_to_ext = {
+        'text/plain': '.txt',
+        'application/json': '.json',
+        'text/csv': '.csv',
+        'application/pdf': '.pdf',
+        'image/jpeg': '.jpg',
+        'image/png': '.png',
+        'text/x-python': '.py',
+        # Often used as a generic, extension might be in filename
+        'application/octet-stream': ''
+    }
+    # Get the main type/subtype part
+    main_type = content_type.split(';')[0].strip().lower()
+    return mime_to_ext.get(main_type)
+def get_task_file_path(task_id: str, local_files_dir: str = DEFAULT_FILES_DIR) -> str | None:
+    """
+    Checks for a local file starting with task_id in the specified directory.
+    If not found, attempts to download it from the standard API.
+    Returns the full absolute path to the file if found or successfully downloaded, otherwise None.
+    Prints progress and errors to stdout.
+    """
+    os.makedirs(local_files_dir, exist_ok=True)
+    # 1. Check for existing local file whose name starts with the task_id
+    try:
+        for filename in os.listdir(local_files_dir):
+            if filename.startswith(task_id):
+                full_path = os.path.abspath(
+                    os.path.join(local_files_dir, filename))
+                print(
+                    f"FileHandler: Found existing local file for task {task_id}: {full_path}")
+                return full_path
+    except OSError as e:
+        print(
+            f"FileHandler: Notice - Error listing files in {local_files_dir} (will attempt download): {e}")
+    # 2. If not found locally, attempt to download
+    file_api_url = f"{FILE_API_BASE_URL}{task_id}"
+    print(
+        f"FileHandler: Local file for task {task_id} not found. Attempting download from: {file_api_url}")
+    try:
+        with requests.Session() as session:
+            # Increased timeout slightly
+            response = session.get(
+                file_api_url, timeout=15, allow_redirects=True)
+        if response.status_code == 200:
+            if not response.content:  # Check if the content is empty
+                print(
+                    f"FileHandler: File indicated for task {task_id} but server sent no content (empty file). Not saving.")
+                return None
+            cd_header = response.headers.get('Content-Disposition')
+            original_filename = _extract_filename_from_cd(cd_header)
+            # Determine a sane filename
+            if original_filename:
+                sane_filename_base = os.path.basename(original_filename)
+            else:  # Fallback if no Content-Disposition filename
+                content_type = response.headers.get('Content-Type')
+                extension = _get_extension_from_content_type(
+                    content_type) or ''
+                # Default name if no CD
+                sane_filename_base = f"{task_id}_downloaded{extension}"
+                print(
+                    f"FileHandler: No filename in Content-Disposition for {task_id}. Using fallback: {sane_filename_base}")
+            # Ensure the filename starts with task_id for consistent local finding later
+            if not sane_filename_base.startswith(task_id):
+                sane_filename = f"{task_id}_{sane_filename_base}"
+            else:
+                sane_filename = sane_filename_base
+            file_path = os.path.join(local_files_dir, sane_filename)
+            with open(file_path, 'wb') as f:
+                f.write(response.content)
+            abs_path = os.path.abspath(file_path)
+            print(
+                f"FileHandler: File '{sane_filename}' for task {task_id} downloaded to '{abs_path}'. Size: {len(response.content)} bytes.")
+            return abs_path
+        elif response.status_code == 404:
+            print(
+                f"FileHandler: No file found for task_id {task_id} at API (HTTP 404 Not Found).")
+            return None
+        else:
+            print(
+                f"FileHandler: Failed to download file for task {task_id}. Server responded with HTTP status {response.status_code}.")
+            return None
+    except requests.exceptions.Timeout:
+        print(
+            f"FileHandler: Request timed out while trying to download file for task ID '{task_id}'.")
+        return None
+    except requests.exceptions.RequestException as e:
+        print(
+            f"FileHandler: An error occurred during file download for task ID '{task_id}': {type(e).__name__} - {e}.")
+        return None
+    except IOError as e:  # Catch errors during file writing
+        print(
+            f"FileHandler: An IO error occurred while saving the file for task ID '{task_id}': {e}")
+        return None