gaia_final_assignment

Sleeping

App Files Files Community

gaia_final_assignment / tools.py

Dkapsis

pipes

5037c4d 5 months ago

raw

history blame contribute delete

10.7 kB


	import os
	import base64
	from smolagents import DuckDuckGoSearchTool, VisitWebpageTool, GoogleSearchTool
	from smolagents.tools import tool

	from config import config

	# Tools

	if not os.environ.get("SERPER_API_KEY"):
	print("---------------DEN VRIKA KEY-----------")
	print("---------------DEN VRIKA KEY-----------")
	simple_web_search_tool = DuckDuckGoSearchTool()
	else:
	print("!!!!!!!!!!!!! VRIKA KEY !!!!!!!!!!!!!!!!")
	print("!!!!!!!!!!!!! VRIKA KEY !!!!!!!!!!!!!!!!")
	simple_web_search_tool = GoogleSearchTool("serper")

	visit_web_page_tool = VisitWebpageTool()

	@tool
	def web_search_tool(query: str) -> str:
	"""
	Given a question, search the web and return a summary answer.

	Args:
	query (str): The search query to look up.

	Returns:
	str: A relevant summary or result from DuckDuckGo.
	"""
	try:
	url = "https://api.duckduckgo.com/"
	params = {"q": query, "format": "json", "no_html": 1}
	response = requests.get(url, params=params)
	data = response.json()

	if abstract := data.get("AbstractText"):
	return abstract
	elif related := data.get("RelatedTopics"):
	return related[0]["Text"] if related else "No result found."
	else:
	return "No relevant information found via DuckDuckGo."
	except Exception as e:
	raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")

	@tool
	def image_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and an image file path, analyze the image to answer the question.

	Args:
	question (str): A question about the image.
	file_path (str): Path to the image file.

	Returns:
	str: Answer to the question.

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Read and encode image to base64
	with open(file_path, "rb") as img_file:
	img_data = base64.b64encode(img_file.read()).decode("utf-8")

	# Format the content in a typical vision+text prompt format
	prompt = {
	"inputs": {
	"image": img_data,
	"question": question
	}
	}

	# You can return this dictionary directly if your model expects JSON format
	return prompt # Actual agent model will process this
	except Exception as e:
	raise RuntimeError(f"Image analysis failed: {str(e)}")

	@tool
	def audio_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and an audio file path, analyze the audio to answer the question.

	Args:
	question (str): A question about the audio.
	file_path (str): Path to the audio file.

	Returns:
	str: Structured prompt with audio and question (for agent model to process).

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Read and encode audio to base64
	with open(file_path, "rb") as audio_file:
	audio_data = base64.b64encode(audio_file.read()).decode("utf-8")

	# Format the content in a vision+text style prompt, adapted for audio
	prompt = {
	"inputs": {
	"audio": audio_data,
	"question": question
	}
	}

	return prompt # The agent model will process this
	except Exception as e:
	raise RuntimeError(f"Audio analysis failed: {str(e)}")

	@tool
	def video_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and a video file path, analyze the video to answer the question.

	Args:
	question (str): A question about the video.
	file_path (str): Path to the video file.

	Returns:
	str: Structured prompt with video and question (for agent model to process).

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Read and encode video to base64
	with open(file_path, "rb") as video_file:
	video_data = base64.b64encode(video_file.read()).decode("utf-8")

	# Format the content in a vision+text style prompt, adapted for video
	prompt = {
	"inputs": {
	"video": video_data,
	"question": question
	}
	}

	return prompt # The agent model will process this
	except Exception as e:
	raise RuntimeError(f"Video analysis failed: {str(e)}")

	@tool
	def youtube_analysis_tool(question: str, url: str) -> str:
	"""
	Given a question and a YouTube video URL, analyze the video to answer the question.

	Args:
	question (str): A question about the YouTube video.
	url (str): The YouTube URL.

	Returns:
	str: Structured prompt with URL and question (for agent model to process).

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	# Prepare structured input to be processed by the agent model
	prompt = {
	"inputs": {
	"youtube_url": url,
	"question": question
	}
	}

	return prompt # The agent model will handle downloading and processing
	except Exception as e:
	raise RuntimeError(f"YouTube analysis failed: {str(e)}")

	@tool
	def document_analysis_tool(question: str, file_path: str) -> str:
	"""
	Given a question and a document file path, analyze the document to answer the question.

	Args:
	question (str): A question about the document.
	file_path (str): Path to the document file.

	Returns:
	str: Structured prompt with document content and question (for agent model to process).

	Raises:
	RuntimeError: If processing fails.
	"""
	try:
	if is_ext(file_path, ".docx"):
	# Extract text from .docx files
	text_data = read_docx_text(file_path)
	prompt = {
	"inputs": {
	"document_type": "docx",
	"document_content": text_data,
	"question": question
	}
	}
	elif is_ext(file_path, ".pptx"):
	# Extract text from .pptx files
	text_data = read_pptx_text(file_path)
	prompt = {
	"inputs": {
	"document_type": "pptx",
	"document_content": text_data,
	"question": question
	}
	}
	else:
	# For PDFs or other binary files, encode to base64
	with open(file_path, "rb") as file:
	encoded_data = base64.b64encode(file.read()).decode("utf-8")
	prompt = {
	"inputs": {
	"document_type": "binary",
	"document_base64": encoded_data,
	"question": question
	}
	}

	return prompt # Agent model will handle document type accordingly
	except Exception as e:
	raise RuntimeError(f"Document analysis failed: {str(e)}")

	@tool
	def arithmetic_tool(question: str, a: float, b: float) -> dict:
	"""
	Given a question and two numbers, perform the calculation to answer the question.

	Args:
	question (str): A natural language arithmetic question.
	a (float): First number.
	b (float): Second number.

	Returns:
	dict: Structured input for the model or agent to interpret and compute.

	Raises:
	RuntimeError: If input or processing fails.
	"""
	try:
	prompt = {
	"inputs": {
	"question": question,
	"a": a,
	"b": b
	}
	}

	return prompt # Let the model/agent evaluate and compute the result
	except Exception as e:
	raise RuntimeError(f"Arithmetic processing failed: {str(e)}")

	@tool
	def code_generation_tool(question: str, json_data: str) -> dict:
	"""
	Given a question and JSON data, generate and execute code to answer the question.

	Args:
	question (str): The question to be answered.
	json_data (str): Input JSON data as a string.

	Returns:
	dict: Structured input for the agent or model to process and respond.

	Raises:
	RuntimeError: If formatting or processing fails.
	"""
	try:
	prompt = {
	"inputs": {
	"question": question,
	"json_data": json_data
	}
	}

	return prompt # Model or code-executing agent will handle the execution logic
	except Exception as e:
	raise RuntimeError(f"Code generation processing failed: {str(e)}")

	@tool
	def code_execution_tool(question: str, file_path: str) -> dict:
	"""
	Given a question and a Python file, prepare code execution context to answer the question.

	Args:
	question (str): The question to be answered.
	file_path (str): Path to the Python file.

	Returns:
	dict: Structured input with base64-encoded file and question.

	Raises:
	RuntimeError: If encoding or file handling fails.
	"""
	try:
	# Read and encode the Python file
	with open(file_path, "rb") as py_file:
	code_data = base64.b64encode(py_file.read()).decode("utf-8")

	# Construct prompt structure
	prompt = {
	"inputs": {
	"question": question,
	"python_file": code_data,
	"file_name": os.path.basename(file_path)
	}
	}

	return prompt # Model/agent will handle execution and answer
	except Exception as e:
	raise RuntimeError(f"Code execution processing failed: {str(e)}")

	@tool
	def add(a: float, b: float) -> float:
	"""Add two numbers.

	Args:
	a: First number
	b: Second number
	Returns:
	Result number
	"""
	return a + b

	@tool
	def subtract(a: float, b: float) -> float:
	"""Subtract two numbers.

	Args:
	a: First number
	b: Second number
	Returns:
	Result number
	"""
	return a - b

	@tool
	def multiply(a: float, b: float) -> float:
	"""Multiply two numbers.
	Args:
	a: First number
	b: Second number
	Returns:
	Result number
	"""
	return a * b

	@tool
	def divide(a: float, b: float) -> float:
	"""Divide two numbers.

	Args:
	a: First number
	b: Second number
	Returns:
	Result number
	"""
	if b == 0:
	raise ValueError("Cannot divide by zero.")
	return a / b

	@tool
	def modulus(a: float, b: float) -> float:
	"""Get the modulus of two numbers.

	Args:
	a: First number
	b: Second number
	Returns:
	Result number
	"""
	return a % b