agent-course-gaia

Sleeping

App Files Files Community

agent-course-gaia / audio_tools.py

kirbah

Add MP3 audio transcription tool

65dc764 6 months ago

raw

history blame contribute delete

4.22 kB

	import os
	from smolagents import tool # Assuming smolagents.tool is the correct decorator


	@tool
	def transcribe_mp3_audio_file(mp3_file_path: str) -> str:
	"""
	Transcribes an MP3 audio file using OpenAI's Whisper 'base' model.

	Args:
	mp3_file_path (str): The absolute local path to the MP3 audio file.
	This path should be obtained from the 'File Information' section
	if the file was downloaded by the agent.

	Returns:
	str: The transcribed text from the audio file, or an error message if transcription fails.
	"""
	try:
	import whisper # Attempt to import whisper only when the tool is called
	except ImportError:
	return ("Error: The 'openai-whisper' library is required but not installed. "
	"Please install it using 'pip install openai-whisper' and ensure ffmpeg is also installed.")

	if not os.path.exists(mp3_file_path):
	return f"Error: Audio file not found at the specified path: '{mp3_file_path}'. Please verify the path."

	if not mp3_file_path.lower().endswith(".mp3"):
	return f"Error: The provided file path '{mp3_file_path}' does not appear to be an MP3 file. This tool currently only supports .mp3 files."

	try:
	print(
	f"AudioTool: Loading Whisper 'base' model to transcribe '{mp3_file_path}'...")
	# You can choose different model sizes: "tiny", "base", "small", "medium", "large"
	# "base" is a good balance of speed and accuracy for many use cases.
	# Larger models are more accurate but slower and require more resources.
	model = whisper.load_model("base")

	print(f"AudioTool: Transcribing audio from '{mp3_file_path}'...")
	# fp16=False can improve compatibility on CPU
	result = model.transcribe(mp3_file_path, fp16=False)

	transcribed_text = result.get("text", "")
	if transcribed_text:
	print(
	f"AudioTool: Transcription successful for '{mp3_file_path}'.")
	return transcribed_text
	else:
	# This case might occur if the audio is silent or whisper couldn't detect speech
	return f"Notice: Transcription resulted in empty text for '{mp3_file_path}'. The audio might be silent or contain no clear speech."

	except FileNotFoundError: # Should be caught by os.path.exists, but as a fallback for whisper's internal handling
	return f"Error: Whisper could not find the audio file at path: '{mp3_file_path}' (even if it was initially detected)."
	except Exception as e:
	# Check if the error is due to ffmpeg not being found
	if "ffmpeg" in str(e).lower() and ("not found" in str(e).lower() or "not installed" in str(e).lower()):
	return ("Error during transcription: ffmpeg not found. "
	"OpenAI Whisper requires ffmpeg to be installed and in your system's PATH. "
	f"Details: {type(e).__name__} - {str(e)}")
	return f"Error during audio transcription for '{mp3_file_path}': {type(e).__name__} - {str(e)}"


	if __name__ == '__main__':
	# This is a placeholder for testing.
	# To test, you would need an actual MP3 file.
	# For example:
	# test_mp3_path = "path/to/your/test_audio.mp3"
	# if os.path.exists(test_mp3_path):
	# print(f"--- Testing with MP3 file: {test_mp3_path} ---")
	# transcript = transcribe_mp3_audio_file(test_mp3_path)
	# print("Transcription Result:")
	# print(transcript)
	# else:
	# print(f"Test MP3 file not found at: {test_mp3_path}. Cannot run local test.")

	print("Audio transcription tool defined. To test, provide a path to an MP3 file in the example block.")
	# Test with a non-existent file
	print("\n--- Testing with non-existent file ---")
	print(transcribe_mp3_audio_file("non_existent_file.mp3"))
	# Test with a non-mp3 file
	print("\n--- Testing with non-MP3 file extension ---")
	# Create a dummy file for this test
	dummy_file = "dummy.txt"
	with open(dummy_file, "w") as f:
	f.write("this is not an mp3")
	print(transcribe_mp3_audio_file(dummy_file))
	os.remove(dummy_file)