gaia_final_assignment

Sleeping

App Files Files Community

gaia_final_assignment / tools.py

Dkapsis

audio agent

a9182c5 6 months ago

raw

history blame

3.05 kB


	import os
	import base64
	from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
	from smolagents.tools import tool

	# Tools

	simple_web_search_tool = DuckDuckGoSearchTool()
	visit_web_page_tool = VisitWebpageTool()

	@tool
	def web_search_tool(query: str) -> str:
	"""
	Given a question, search the web and return a summary answer.

	Args:
	query (str): The search query to look up.

	Returns:
	str: A relevant summary or result from DuckDuckGo.
	"""
	try:
	url = "https://api.duckduckgo.com/"
	params = {"q": query, "format": "json", "no_html": 1}
	response = requests.get(url, params=params)
	data = response.json()

	if abstract := data.get("AbstractText"):
	return abstract
	elif related := data.get("RelatedTopics"):
	return related[0]["Text"] if related else "No result found."
	else:
	return "No relevant information found via DuckDuckGo."
	except Exception as e:
	raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")

	@tool
	def image_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and an image file path, analyze the image to answer the question.

	Args:
	question (str): A question about the image.
	file_path (str): Path to the image file.

	Returns:
	str: Answer to the question.

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Read and encode image to base64
	with open(file_path, "rb") as img_file:
	img_data = base64.b64encode(img_file.read()).decode("utf-8")

	# Format the content in a typical vision+text prompt format
	prompt = {
	"inputs": {
	"image": img_data,
	"question": question
	}
	}

	# You can return this dictionary directly if your model expects JSON format
	return prompt # Actual agent model will process this
	except Exception as e:
	raise RuntimeError(f"Image analysis failed: {str(e)}")

	@tool
	def audio_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and an audio file path, analyze the audio to answer the question.

	Args:
	question (str): A question about the audio.
	file_path (str): Path to the audio file.

	Returns:
	str: Structured prompt with audio and question (for agent model to process).

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Read and encode audio to base64
	with open(file_path, "rb") as audio_file:
	audio_data = base64.b64encode(audio_file.read()).decode("utf-8")

	# Format the content in a vision+text style prompt, adapted for audio
	prompt = {
	"inputs": {
	"audio": audio_data,
	"question": question
	}
	}

	return prompt # The agent model will process this
	except Exception as e:
	raise RuntimeError(f"Audio analysis failed: {str(e)}")