Final_Assignment_Template

Runtime error

App Files Files Community

Prabhjotschugh commited on Apr 30

Commit

a5da8b4

verified ·

1 Parent(s): 81917a3

Update app.py

Browse files

Files changed (1) hide show

app.py +302 -17

app.py CHANGED Viewed

@@ -1,23 +1,270 @@
-import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,11 +287,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -76,11 +328,46 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -146,11 +433,9 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
         **Instructions:**
         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).

 import gradio as gr
 import pandas as pd
+from smolagents import CodeAgent, OpenAIServerModel, tool
+import os, subprocess
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+import csv
+import json
+import requests
+import whisper
+from typing import Optional
+import openpyxl
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
+# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
+def download_file(file_name: str) -> None:
+    if not os.path.exists(file_name):
+        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[0]}"
+        r = requests.get(url)
+        with open(file_name, "wb") as f:
+            f.write(r.content)
+@tool
+def open_file_as_text(file_name: str, filetype: Optional[str] = "txt") -> str:
+    """
+    Opens a file and returns its content as readable text.
+    Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
+    Args:
+        file_name (str): The path or name of the file.
+        filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
+    Returns:
+        str: The content of the file as text, or transcribed speech if 'mp3'.
+    """
+    download_file(file_name)
+    try:
+        if filetype == "txt":
+            with open(file_name, "r", encoding="utf-8") as f:
+                return f.read()
+        elif filetype == "json":
+            with open(file_name, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            return json.dumps(data, indent=2)
+        elif filetype == "csv":
+            with open(file_name, "r", encoding="utf-8") as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+            return "\n".join([", ".join(row) for row in rows])
+        elif filetype == "xlsx":
+            wb = openpyxl.load_workbook(file_name, data_only=True)
+            sheet = wb.active
+            content = []
+            for row in sheet.iter_rows(values_only=True):
+                content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
+            return "\n".join(content)
+        elif filetype == "mp3":
+            w = whisper.load_model("base")
+            res = w.transcribe(file_name)
+            return res["text"]
+        else:
+            return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
+    except FileNotFoundError:
+        return f"File '{file_name}' not found."
+    except Exception as e:
+        return f"Error opening file '{file_name}': {str(e)}"
+@tool
+def web_search(query: str) -> str:
+    """
+    Searches the web using DuckDuckGo and returns top search snippets.
+    Args:
+        query (str): The search query string.
+    Returns:
+        str: A list of top search results with title, snippet, and URL.
+    """
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=3)
+            if not results:
+                return "No results found."
+            return "\n\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
+    except Exception as e:
+        return f"Error during search: {str(e)}"
+def parse_wikipedia_table(table) -> str:
+    """
+    Parses a Wikipedia table into a clean, readable text format.
+    Args:
+        table (Tag): BeautifulSoup Tag for the table.
+    Returns:
+        str: Formatted table as readable text.
+    """
+    rows = []
+    headers = []
+    # Try to get headers
+    thead = table.find('thead')
+    if thead:
+        for th in thead.find_all('th'):
+            header_text = th.get_text(separator=" ", strip=True)
+            headers.append(header_text)
+        if headers:
+            rows.append(" | ".join(headers))
+    # Parse table body rows
+    tbody = table.find('tbody')
+    if not tbody:
+        tbody = table  # fallback: some tables have no tbody explicitly
+    for tr in tbody.find_all('tr'):
+        cells = tr.find_all(['th', 'td'])
+        cell_texts = []
+        for cell in cells:
+            # Clean references like [7], [note 1], etc.
+            for sup in cell.find_all('sup', class_='reference'):
+                sup.decompose()
+            text = cell.get_text(separator=" ", strip=True)
+            cell_texts.append(text)
+        if cell_texts:
+            row_text = " | ".join(cell_texts)
+            rows.append(row_text)
+    return "\n".join(rows)
+@tool
+def read_wikipedia_page(url: str) -> str:
+    """
+    Fetches a Wikipedia article and extracts clean sectioned text around the relevant query.
+    Args:
+        url (str): The Wikipedia page URL.
+    Returns:
+        str: Sectioned and readable snippet focused around the query.
+    """
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+    }
+    resp = requests.get(url, headers=headers, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    content_div = soup.find('div', id='mw-content-text')
+    if not content_div:
+        return "Content not found."
+    parts = []
+    for elem in content_div.find_all(['h2', 'h3', 'p', 'ul', 'ol', 'table']):
+        if elem.name in ['h2', 'h3']:
+            parts.append("\n\n" + elem.get_text(strip=True) + "\n")
+        elif elem.name in ['p', 'ul', 'ol']:
+            parts.append(elem.get_text(strip=True))
+        elif elem.name == 'table':
+            parts.append(parse_wikipedia_table(elem))
+    full_text = "\n".join(parts)
+    return full_text
+@tool
+def smart_paginate_around_query(full_text: str, query: str) -> list:
+    """
+    Splits text into windows around each occurrence of the query.
+    Args:
+        full_text (str): The full text to search within.
+        query (str): The search query.
+    Returns:
+        list: List of relevant text windows (pages).
+    """
+    before_chars = 1000
+    after_chars = 3000
+    full_text_lower = full_text.lower()
+    query_lower = query.lower()
+    query_len = len(query_lower)
+    pages = []
+    search_pos = 0
+    text_len = len(full_text)
+    while True:
+        match_pos = full_text_lower.find(query_lower, search_pos)
+        if match_pos == -1:
+            break  # no more matches
+        # Define window around match
+        start = max(0, match_pos - before_chars)
+        end = min(text_len, match_pos + query_len + after_chars)
+        page = full_text[start:end]
+        pages.append(page)
+        # Move search pointer to AFTER current window
+        search_pos = end
+    return pages
+@tool
+def reverse_sentence(text: str) -> str:
+    """
+    Reverses the input text.
+    Args:
+        text (str): The input string to be reversed.
+    Returns:
+        str: The reversed string.
+    """
+    return text[::-1]
+@tool
+def run_python_code(file_name: str) -> str:
+    """
+    Executes a Python file and returns its printed final output.
+    Args:
+        file_name (str): Name of the Python file.
+    Returns:
+        str: The final printed output.
+    """
+    download_file(file_name)
+    try:
+        # Run in subprocess with timeout
+        result = subprocess.run(
+            ["python", file_name],
+            capture_output=True,
+            text=True,
+            timeout=10  # seconds
+        )
+        if result.returncode != 0:
+            return f"Error running code: {result.stderr.strip()}"
+        output = result.stdout.strip()
+        return output
+    except subprocess.TimeoutExpired:
+        return "Execution timed out."
+    except Exception as e:
+        return f"Error: {str(e)}"
+tools = [
+    open_file_as_text,
+    web_search,
+    read_wikipedia_page,
+    smart_paginate_around_query,
+    reverse_sentence,
+]
+model = OpenAIServerModel(
+    model_id="gpt-4o",
+    api_key=os.getenv("OPENAI_API_KEY"),
+    temperature=0
+)
+agent = CodeAgent(
+    model=model,
+    tools=tools,
+    additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv", "urllib"]
+)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = CodeAgent(
+            model=model,
+            tools=tools,
+            additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv",
+                                           "urllib"]
+        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase (useful for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            full_prompt = f"""You are a highly precise answering agent.
+When given a question:
+- If necessary, perform a web search using the tool `web_search` to find possible sources of information.
+- If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
+- Use the `read_wikipedia_page` tool to fetch and read the Wikipedia page when necessary.
+- You just have the ability to read Wikipedia pages only.
+- You MUST paginate the content using `smart_paginate_around_query`.
+- When using `smart_paginate_around_query`, you must select a short, general query based on the main keywords only. Avoid using full questions or long phrases. Use 1–3 essential words.
+- If the task requires reversing the order of words, letters, phrases, or any text, you must use the `reverse_sentence` tool to perform the operation.
+- Never reverse text manually inside your code. Always call the tool instead.
+- If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
+- Comma separated lists MUST contain a single space after each comma.
+- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+- Only answer after you have gathered enough information by reading the actual page contents.
+- Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
+- Do not retry or execute anything else after calling `final_answer`.
+- `final_answer` must wrap the exact printed value.
+Provide ONLY the precise answer requested.
+Do not include explanations, steps, reasoning, or additional text.
+Be direct and specific. GAIA benchmark requires exact matching answers.
+Example: if asked "What is the capital of France?", respond exactly:
+Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
+Code:
+```py
+print("Paris")
+```<end_code>
+Based on the above guidelines, answer the following question:
+--begin of question--
+{question_text}
+--end of question--
+If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
+file_name: {file_name}"""
+            submitted_answer = agent.run(full_prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
     gr.Markdown(
         """
         **Instructions:**
         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).