Spaces:
Sleeping
Sleeping
image analysis agent
Browse files- __pycache__/agents.cpython-310.pyc +0 -0
- __pycache__/prompts.cpython-310.pyc +0 -0
- __pycache__/tools.cpython-310.pyc +0 -0
- agents.py +18 -3
- prompts.py +10 -3
- tools.py +36 -1
__pycache__/agents.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/agents.cpython-310.pyc and b/__pycache__/agents.cpython-310.pyc differ
|
|
|
__pycache__/prompts.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/prompts.cpython-310.pyc and b/__pycache__/prompts.cpython-310.pyc differ
|
|
|
__pycache__/tools.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/tools.cpython-310.pyc and b/__pycache__/tools.cpython-310.pyc differ
|
|
|
agents.py
CHANGED
|
@@ -8,7 +8,7 @@ MANAGER_MODEL = "deepseek-ai/DeepSeek-R1"
|
|
| 8 |
AGENT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 9 |
FINAL_ANSWER_MODEL = "deepseek-ai/DeepSeek-R1" # OpenAIServerModel
|
| 10 |
WEB_SEARCH_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 11 |
-
IMAGE_ANALYSIS_MODEL = "
|
| 12 |
AUDIO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 13 |
VIDEO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 14 |
YOUTUBE_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
@@ -37,7 +37,19 @@ def create_simple_web_search_agent(message):
|
|
| 37 |
tools=[tools.simple_web_search_tool, tools.visit_web_page_tool],
|
| 38 |
)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def create_manager_agent(message):
|
|
|
|
|
|
|
|
|
|
| 41 |
return CodeAgent(
|
| 42 |
name="manager_agent",
|
| 43 |
model=InferenceClientModel(MANAGER_MODEL, provider="together", max_tokens=8096),
|
|
@@ -45,7 +57,10 @@ def create_manager_agent(message):
|
|
| 45 |
tools=[],
|
| 46 |
planning_interval=4,
|
| 47 |
verbosity_level=2,
|
| 48 |
-
managed_agents=[
|
|
|
|
|
|
|
|
|
|
| 49 |
max_steps=10,
|
| 50 |
additional_authorized_imports=[
|
| 51 |
"requests",
|
|
@@ -80,6 +95,6 @@ def create_final_answer_agent(message):
|
|
| 80 |
name="final_answer_agent",
|
| 81 |
description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
|
| 82 |
model=InferenceClientModel(FINAL_ANSWER_MODEL),
|
| 83 |
-
max_steps=
|
| 84 |
tools=[],
|
| 85 |
)
|
|
|
|
| 8 |
AGENT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 9 |
FINAL_ANSWER_MODEL = "deepseek-ai/DeepSeek-R1" # OpenAIServerModel
|
| 10 |
WEB_SEARCH_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 11 |
+
IMAGE_ANALYSIS_MODEL = "HuggingFaceM4/idefics2-8b"
|
| 12 |
AUDIO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 13 |
VIDEO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 14 |
YOUTUBE_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
|
|
| 37 |
tools=[tools.simple_web_search_tool, tools.visit_web_page_tool],
|
| 38 |
)
|
| 39 |
|
| 40 |
+
def create_image_analysis_agent(message):
|
| 41 |
+
return CodeAgent(
|
| 42 |
+
name="image_analysis_agent",
|
| 43 |
+
description=prompts.get_image_analysis_prompt(message),
|
| 44 |
+
model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
|
| 45 |
+
tools=[image_analysis_tool],
|
| 46 |
+
max_steps=2,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
def create_manager_agent(message):
|
| 50 |
+
simple_web_search_agent = create_simple_web_search_agent(message)
|
| 51 |
+
image_analysis_agent = create_image_analysis_agent(message)
|
| 52 |
+
|
| 53 |
return CodeAgent(
|
| 54 |
name="manager_agent",
|
| 55 |
model=InferenceClientModel(MANAGER_MODEL, provider="together", max_tokens=8096),
|
|
|
|
| 57 |
tools=[],
|
| 58 |
planning_interval=4,
|
| 59 |
verbosity_level=2,
|
| 60 |
+
managed_agents=[
|
| 61 |
+
simple_web_search_agent,
|
| 62 |
+
image_analysis_agent,
|
| 63 |
+
],
|
| 64 |
max_steps=10,
|
| 65 |
additional_authorized_imports=[
|
| 66 |
"requests",
|
|
|
|
| 95 |
name="final_answer_agent",
|
| 96 |
description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
|
| 97 |
model=InferenceClientModel(FINAL_ANSWER_MODEL),
|
| 98 |
+
max_steps=2,
|
| 99 |
tools=[],
|
| 100 |
)
|
prompts.py
CHANGED
|
@@ -7,13 +7,20 @@ def get_web_search_prompt(message, file_path=None):
|
|
| 7 |
|
| 8 |
return prompt
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def get_manager_prompt(message, file_path=None):
|
| 11 |
prompt = f"""Your job is to answer the following question.
|
| 12 |
Answer the following question. If needed, delegate to one of your coworkers:\n
|
| 13 |
|
| 14 |
-
- Web Search Agent:
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
...
|
| 19 |
|
|
|
|
| 7 |
|
| 8 |
return prompt
|
| 9 |
|
| 10 |
+
def get_image_analysis_prompt(message, file_path=None):
|
| 11 |
+
prompt = f"""
|
| 12 |
+
As an expert image analysis assistant, you analyze the image to answer the question. Given a question and image file, analyze the image and answer the question: {message}
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
return prompt
|
| 16 |
+
|
| 17 |
def get_manager_prompt(message, file_path=None):
|
| 18 |
prompt = f"""Your job is to answer the following question.
|
| 19 |
Answer the following question. If needed, delegate to one of your coworkers:\n
|
| 20 |
|
| 21 |
+
- Web Search Agent: requires a question only.\n
|
| 22 |
+
|
| 23 |
+
- Image Analysis Agent: requires a question and **.png, .jpeg, .webp, .heic, or .heif image file**.\n"
|
| 24 |
|
| 25 |
...
|
| 26 |
|
tools.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
|
|
|
|
|
|
|
| 2 |
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
|
| 3 |
from smolagents.tools import tool
|
| 4 |
|
|
@@ -31,4 +33,37 @@ def web_search_tool(query: str) -> str:
|
|
| 31 |
else:
|
| 32 |
return "No relevant information found via DuckDuckGo."
|
| 33 |
except Exception as e:
|
| 34 |
-
raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
import os
|
| 3 |
+
import base64
|
| 4 |
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
|
| 5 |
from smolagents.tools import tool
|
| 6 |
|
|
|
|
| 33 |
else:
|
| 34 |
return "No relevant information found via DuckDuckGo."
|
| 35 |
except Exception as e:
|
| 36 |
+
raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")
|
| 37 |
+
|
| 38 |
+
@tool
|
| 39 |
+
def image_analysis_tool(question: str, file_path: str) -> str:
|
| 40 |
+
"""
|
| 41 |
+
Given a question and an image file path, analyze the image to answer the question.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
question (str): A question about the image.
|
| 45 |
+
file_path (str): Path to the image file.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
str: Answer to the question.
|
| 49 |
+
|
| 50 |
+
Raises:
|
| 51 |
+
RuntimeError: If processing fails.
|
| 52 |
+
"""
|
| 53 |
+
try:
|
| 54 |
+
# Read and encode image to base64
|
| 55 |
+
with open(file_path, "rb") as img_file:
|
| 56 |
+
img_data = base64.b64encode(img_file.read()).decode("utf-8")
|
| 57 |
+
|
| 58 |
+
# Format the content in a typical vision+text prompt format
|
| 59 |
+
prompt = {
|
| 60 |
+
"inputs": {
|
| 61 |
+
"image": img_data,
|
| 62 |
+
"question": question
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
# You can return this dictionary directly if your model expects JSON format
|
| 67 |
+
return prompt # Actual agent model will process this
|
| 68 |
+
except Exception as e:
|
| 69 |
+
raise RuntimeError(f"Image analysis failed: {str(e)}")
|