gaia_final_assignment

Sleeping

App Files Files Community

Dkapsis commited on May 20

Commit

a06b3f8

1 Parent(s): 81917a3

final answer manager and web agents

Browse files

Files changed (7) hide show

.env.example +4 -0
.gitignore +1 -0
__pycache__/agents.cpython-310.pyc +0 -0
agents.py +182 -0
app.py +33 -8
data/gaia_validation.jsonl +0 -0
requirements.txt +5 -1

.env.example ADDED Viewed

	@@ -0,0 +1,4 @@

+SPACE_ID=
+HF_TOKEN=
+OPENAI_API_KEY=
+SERPAPI_API_KEY=

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

__pycache__/agents.cpython-310.pyc ADDED Viewed

Binary file (6.08 kB). View file

agents.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import os
+import pandas as pd
+import requests
+from smolagents import OpenAIServerModel, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, VisitWebpageTool
+from smolagents.tools import tool
+import markdownify
+MANAGER_MODEL = "deepseek-ai/DeepSeek-R1"
+AGENT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
+FINAL_ANSWER_MODEL = "deepseek-ai/DeepSeek-R1" # OpenAIServerModel
+WEB_SEARCH_MODEL        = "Qwen/Qwen2.5-Coder-32B-Instruct"
+IMAGE_ANALYSIS_MODEL    = "Qwen/Qwen2.5-Coder-32B-Instruct"
+AUDIO_ANALYSIS_MODEL    = "Qwen/Qwen2.5-Coder-32B-Instruct"
+VIDEO_ANALYSIS_MODEL    = "Qwen/Qwen2.5-Coder-32B-Instruct"
+YOUTUBE_ANALYSIS_MODEL  = "Qwen/Qwen2.5-Coder-32B-Instruct"
+DOCUMENT_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
+ARITHMETIC_MODEL        = "Qwen/Qwen2.5-Coder-32B-Instruct"
+CODE_GENERATION_MODEL   = "Qwen/Qwen2.5-Coder-32B-Instruct"
+CODE_EXECUTION_MODEL    = "Qwen/Qwen2.5-Coder-32B-Instruct"
+def orchestrate(message, file_path):
+    # Tools
+    simple_web_search_tool = DuckDuckGoSearchTool()
+    visit_web_page_tool = VisitWebpageTool()
+    @tool
+    def web_search_tool(query: str) -> str:
+        """
+        Given a question, search the web and return a summary answer.
+        Args:
+            query (str): The search query to look up.
+        Returns:
+            str: A relevant summary or result from DuckDuckGo.
+        """
+        try:
+            url = "https://api.duckduckgo.com/"
+            params = {"q": query, "format": "json", "no_html": 1}
+            response = requests.get(url, params=params)
+            data = response.json()
+            if abstract := data.get("AbstractText"):
+                return abstract
+            elif related := data.get("RelatedTopics"):
+                return related[0]["Text"] if related else "No result found."
+            else:
+                return "No relevant information found via DuckDuckGo."
+        except Exception as e:
+            raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")
+    # Promts
+    def get_manager_prompt(message, file_path=None):
+        prompt = f"""Your job is to answer the following question.
+            Answer the following question. If needed, delegate to one of your coworkers:\n
+            - Web Search Agent: Use when the question requires current information. Web Search Agent requires a question only.\n
+            Format the prompt like:
+            "You are an expert web search assistant. Your task is to search the web and provide accurate answers to the following question: [INSERT QUESTION]"
+            ...
+            In case you cannot answer the question and there is not a good coworker, delegate to the Code Generation Agent.\n.
+            Question: {message}
+            """
+        return prompt
+    def run_manager_workflow(message, file_path=None):
+        final_prompt = get_manager_prompt(message, file_path)
+        initial_answer = manager_agent.run(message)
+        final_answer = get_final_answer(final_answer_agent, message, str(initial_answer))
+        print(f"=> Initial question: {message}")
+        print(f"=> Final prompt: {final_prompt}")
+        print(f"=> Initial answer: {initial_answer}")
+        print(f"=> Final answer: {final_answer}")
+        return final_answer
+    def get_final_answer(agent, question: str, initial_answer: str) -> str:
+        prompt = f"""
+            You are an expert question answering assistant. Given a question and an initial answer, your task is to provide the final answer.
+        Your final answer must be a number and/or string OR as few words as possible OR a comma-separated list of numbers and/or strings.
+        If you are asked for a number, don't use comma to write your number neither use units such as USD, $, percent, or % unless specified otherwise.
+        If you are asked for a string, don't use articles, neither abbreviations (for example cities), and write the digits in plain text unless specified otherwise.
+        If you are asked for a comma-separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        If the final answer is a number, use a number not a word.
+        If the final answer is a string, start with an uppercase character.
+        If the final answer is a comma-separated list of numbers, use a space character after each comma.
+        If the final answer is a comma-separated list of strings, use a space character after each comma and start with a lowercase character.
+        Do not add any content to the final answer that is not in the initial answer.
+        **Question:** """ + question + """
+        **Initial answer:** """ + initial_answer + """
+        **Example 1:** What is the biggest city in California? Los Angeles
+        **Example 2:** How many 'r's are in strawberry? 3
+        **Example 3:** What is the opposite of black? White
+        **Example 4:** What are the first 5 numbers in the Fibonacci sequence? 0, 1, 1, 2, 3
+        **Example 5:** What is the opposite of bad, worse, worst? good, better, best
+        **Final answer:**
+        """
+        return agent.run(prompt)
+    # Agents
+    web_search_agent = CodeAgent(
+        name="web_search_agent",
+        description="As an expert web search assistant, you search the web to answer the question. Your task is to search the web and provide accurate answers to the question: {message}",
+        model=InferenceClientModel(WEB_SEARCH_MODEL),
+        max_steps=2,
+        tools=[web_search_tool],
+    )
+    simple_web_search_agent = CodeAgent(
+        name="simple_web_search_agent",
+        description="As an expert web search assistant, you search the web to answer the question. Your task is to search the web and provide accurate answers to the question: {message}",
+        # system_message="As an expert web search assistant, you search the web to answer the question. Your task is to search the web and provide accurate answers to the question: {message}",
+        model=InferenceClientModel(WEB_SEARCH_MODEL),
+        max_steps=2,
+        tools=[simple_web_search_tool, visit_web_page_tool],
+    )
+    manager_prompt = get_manager_prompt(message)
+    manager_agent = CodeAgent(
+        name="manager_agent",
+        model=InferenceClientModel(MANAGER_MODEL, provider="together", max_tokens=8096),
+        description=manager_prompt,
+        tools=[],
+        planning_interval=4,
+        verbosity_level=2,
+        managed_agents=[simple_web_search_agent],
+        max_steps=10,
+        additional_authorized_imports=[
+            "requests",
+            "zipfile",
+            "os",
+            "pandas",
+            "numpy",
+            "sympy",
+            "json",
+            "bs4",
+            "pubchempy",
+            "xml",
+            "yahoo_finance",
+            "Bio",
+            "sklearn",
+            "scipy",
+            "pydub",
+            "io",
+            "PIL",
+            "chess",
+            "PyPDF2",
+            "pptx",
+            "torch",
+            "datetime",
+            "csv",
+            "fractions",
+        ],
+    )
+    final_answer_agent = CodeAgent(
+        name="final_answer_agent",
+        description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
+        model=InferenceClientModel(FINAL_ANSWER_MODEL),
+        max_steps=1,
+        tools=[],
+    )
+    final_answer =  run_manager_workflow(message)
+    # final_answer =  manager_agent.run(message)
+    return final_answer

app.py CHANGED Viewed

@@ -3,10 +3,16 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -139,6 +145,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
@@ -150,6 +166,7 @@ with gr.Blocks() as demo:
         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
@@ -159,19 +176,27 @@ with gr.Blocks() as demo:
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")

 import requests
 import inspect
 import pandas as pd
+from huggingface_hub import login
+from dotenv import load_dotenv
+from agents import orchestrate
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+QUESTION_FILE_PATH = "data/gaia_validation.jsonl"
+QUESTION_LEVEL     = 1
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+def test_init_agent_for_chat(text_input, history, file_name = ""):
+    if file_name:
+        file_name = f"data/{file_name}"
+    submitted_answer = orchestrate(text_input, file_name)
+    print(submitted_answer)
+    return submitted_answer
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        4.  who is in the final of champions league this year?
         ---
         **Disclaimers:**
     )
     gr.LoginButton()
+    gr.ChatInterface(test_init_agent_for_chat, type="messages")
+    # run_button = gr.Button("Run Evaluation & Submit All Answers")
+    # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # # Removed max_rows=10 from DataFrame constructor
+    # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # run_button.click(
+    #     fn=run_and_submit_all,
+    #     outputs=[status_output, results_table]
+    # )
 if __name__ == "__main__":
+    load_dotenv()
+    hf_token = os.getenv("HF_TOKEN")
+    if hf_token:
+        login(hf_token)
+    else:
+        print("ℹ️  HF_TOKEN environment variable not found (running locally?).")
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")

data/gaia_validation.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,2 +1,6 @@
 gradio
-requests

 gradio
+requests
+smolagents
+pandas
+duckduckgo-search
+markdownify