kirbah commited on
Commit
77c5529
·
1 Parent(s): 136bf60

Add file handling functionality

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. basic_agent.py +101 -21
  3. file_handler.py +144 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
  __pycache__/**
 
 
1
  __pycache__/**
2
+ files/**
basic_agent.py CHANGED
@@ -1,37 +1,117 @@
1
  import os
2
- from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, InferenceClientModel, LiteLLMModel
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  class BasicAgent:
6
  def __init__(self):
7
  print("BasicAgent initialized.")
 
 
8
 
9
  def __call__(self, task_id: str, question: str) -> str:
10
- print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- model = LiteLLMModel(model_id="groq/deepseek-r1-distill-llama-70b",
13
- api_key=os.getenv("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  agent = ToolCallingAgent(
16
- tools=[DuckDuckGoSearchTool(), VisitWebpageTool(),
17
- WikipediaSearchTool(), PythonInterpreterTool()],
18
  model=model,
19
- max_steps=5,
20
- name="web_agent",
21
- description="Web Search Agent",
22
- authorized_imports=['statistics', 'unicodedata', 'collections', 'queue',
23
- 'time', 'pandas', 'stat', 'random', 'datetime', 're', 'math', 'itertools'],
24
  verbosity_level=2
25
  )
26
- answer = agent.run(
27
- f"""
28
- Answer the question using the tools provided. If you need to search the web, use the DuckDuckGoSearchTool.
29
- If you find relevant information, use it to answer the question. Do not make up answers.
30
- If you cannot find an answer, respond with 'I don't know'. Provide only the final answer, not the steps taken.
31
-
32
- Question: {question}
33
- """
34
- )
35
 
36
- print(f"Agent returning answer: {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  return answer
 
1
  import os
2
+ # This assumes 'file_handler.py' is in the same directory or accessible via PYTHONPATH
3
+ from file_handler import get_task_file_path, DEFAULT_FILES_DIR
4
+
5
+ from smolagents import (
6
+ ToolCallingAgent,
7
+ DuckDuckGoSearchTool,
8
+ VisitWebpageTool,
9
+ WikipediaSearchTool,
10
+ PythonInterpreterTool,
11
+ LiteLLMModel
12
+ )
13
 
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
  print("BasicAgent initialized.")
18
+ # This agent will use the DEFAULT_FILES_DIR from file_handler for context in prompts
19
+ self.files_dir_for_prompt_context = os.path.abspath(DEFAULT_FILES_DIR)
20
 
21
  def __call__(self, task_id: str, question: str) -> str:
22
+ print(f"\nProcessing Task ID: {task_id}")
23
+ print(f"Original Question (first 70 chars): {question[:70]}...")
24
+
25
+ # Use the function from file_handler.py to get the local file path
26
+ # This function handles checking local first, then downloading.
27
+ # It uses DEFAULT_FILES_DIR ("files") by default.
28
+ local_file_path_str = get_task_file_path(task_id)
29
+
30
+ file_context_for_prompt: str
31
+ if local_file_path_str:
32
+ file_context_for_prompt = (
33
+ f"--- File Information for Task ID '{task_id}' ---\n"
34
+
35
+ f"A file relevant to this task has been made available to the agent. "
36
+ # local_file_path_str is the string filename
37
+ f"Its local path is: '{local_file_path_str}'. "
38
+ "If the question requires information from this file, you MUST pass the full path to the proper tool "
39
+ "to access and process its content from this exact local path."
40
+ f"--- End File Information ---"
41
+ )
42
+ else:
43
+ file_context_for_prompt = ""
44
 
45
+ question_to_llm = (
46
+ f"{question}\n\n"
47
+ f"{file_context_for_prompt}\n"
48
+ )
49
+
50
+ print(
51
+ f"\nContext for LLM (first 400 chars of question part):\n{question_to_llm[:400]}...")
52
+
53
+ # LLM as specified by the user
54
+ model = LiteLLMModel(
55
+ model_id="groq/deepseek-r1-distill-llama-70b",
56
+ api_key=os.getenv("GROQ_API_KEY")
57
+ )
58
+
59
+ # Simplified and generic tool descriptions
60
+ python_tool_description = (
61
+ "Executes Python code. This is vital for calculations, data manipulation, "
62
+ "or accessing content of local files. If 'File Information' (provided with the question) "
63
+ f"gives a local path for a file (e.g., '{os.path.join(self.files_dir_for_prompt_context, "example_task_file.ext")}'), "
64
+ "you MUST use this tool to open and process that file using its full, exact path. "
65
+ "Output from `print()` in your code will be returned. Ensure code is self-contained."
66
+ )
67
+
68
+ agent_tools = [
69
+ DuckDuckGoSearchTool(),
70
+ VisitWebpageTool(),
71
+ WikipediaSearchTool(),
72
+ PythonInterpreterTool(authorized_imports=[
73
+ 'statistics', 'unicodedata', 'collections', 'queue', 'time', 'pandas',
74
+ 'stat', 'random', 'datetime', 're', 'math', 'itertools', 'os', 'sys',
75
+ 'io', 'csv', 'json', 'pathlib', 'subprocess', 'base64'
76
+ ])
77
+ ]
78
 
79
  agent = ToolCallingAgent(
80
+ tools=agent_tools,
 
81
  model=model,
82
+ max_steps=8,
83
+ name="TaskProcessorAgent",
84
+ description="An agent designed to answer questions by searching the web, processing local files (if a path is provided in 'File Information'), and executing Python code.",
 
 
85
  verbosity_level=2
86
  )
 
 
 
 
 
 
 
 
 
87
 
88
+ # Simplified prompt template
89
+ prompt_template = f"""
90
+ Your primary goal is to accurately and concisely answer the provided question using your available tools and any supplied information.
91
+
92
+ Key Instructions:
93
+ 1. **Understand the Task**: Carefully read the entire question. Crucially, examine the "File Information" section that follows the question. This section will state if a local file is available for this task and provide its exact path if so.
94
+ 2. **Select Tools Strategically**:
95
+ * Choose the best tool(s) based on the question and any file details. Tool descriptions will help guide your choice.
96
+ * For web research, use search tools. Use `VisitWebpageTool` for exploring specific URLs.
97
+ * **Working with Local Files**: If the "File Information" section provides a local file path:
98
+ * You **MUST** use the `PythonInterpreterTool` if the file's content is needed to answer the question.
99
+ * Your Python code for the tool should open this file using the full, exact local path given in "File Information". Then, read, process, or execute its content as appropriate.
100
+ * For general calculations, data analysis (not from a local file mentioned in "File Information"), or other Python tasks, use `PythonInterpreterTool`.
101
+ 3. **Verify and Combine Information**:
102
+ * If multiple steps or tools are used, synthesize the information.
103
+ * Always evaluate information for accuracy and direct relevance to the question asked.
104
+ * Base your final answer *only* on the information you have gathered.
105
+ 4. **Formulate Your Response**:
106
+ * Provide only the final, concise answer to the question.
107
+ * Do not include your reasoning steps, apologies, self-correction narratives, or any conversational filler in the final answer.
108
+ * If, after a thorough investigation with all relevant tools, you cannot determine a definitive answer, respond with the exact phrase 'I don't know'.
109
+
110
+ --- Start of Question & File Information ---
111
+ {question_to_llm}
112
+ --- End of Question & File Information ---
113
+ """
114
+ answer = agent.run(prompt_template)
115
+
116
+ print(f"Agent returning answer for Task ID {task_id}: {answer}")
117
  return answer
file_handler.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+
5
+ DEFAULT_FILES_DIR = "files" # Subdirectory for task-related files
6
+ FILE_API_BASE_URL = "https://agents-course-unit4-scoring.hf.space/files/"
7
+
8
+
9
+ def _extract_filename_from_cd(cd_header: str | None) -> str | None:
10
+ """Extracts filename from Content-Disposition header."""
11
+ if not cd_header:
12
+ return None
13
+
14
+ # Check for filename*=UTF-8''<encoded_filename>
15
+ fname_star_match = re.search(
16
+ r"filename\*=UTF-8''([^';\s]+)", cd_header, re.IGNORECASE)
17
+ if fname_star_match:
18
+ return requests.utils.unquote(fname_star_match.group(1))
19
+
20
+ # Check for filename="<filename>"
21
+ fname_match = re.search(r'filename="([^"]+)"', cd_header, re.IGNORECASE)
22
+ if fname_match:
23
+ return fname_match.group(1)
24
+
25
+ # Check for plain filename=<filename>
26
+ fname_plain_match = re.search(
27
+ r'filename=([^;"]+)', cd_header, re.IGNORECASE)
28
+ if fname_plain_match:
29
+ return fname_plain_match.group(1).strip('"')
30
+ return None
31
+
32
+
33
+ def _get_extension_from_content_type(content_type: str | None) -> str | None:
34
+ """Suggests a file extension based on MIME type."""
35
+ if not content_type:
36
+ return None
37
+ # Simple mapping, can be expanded
38
+ mime_to_ext = {
39
+ 'text/plain': '.txt',
40
+ 'application/json': '.json',
41
+ 'text/csv': '.csv',
42
+ 'application/pdf': '.pdf',
43
+ 'image/jpeg': '.jpg',
44
+ 'image/png': '.png',
45
+ 'text/x-python': '.py',
46
+ # Often used as a generic, extension might be in filename
47
+ 'application/octet-stream': ''
48
+ }
49
+ # Get the main type/subtype part
50
+ main_type = content_type.split(';')[0].strip().lower()
51
+ return mime_to_ext.get(main_type)
52
+
53
+
54
+ def get_task_file_path(task_id: str, local_files_dir: str = DEFAULT_FILES_DIR) -> str | None:
55
+ """
56
+ Checks for a local file starting with task_id in the specified directory.
57
+ If not found, attempts to download it from the standard API.
58
+ Returns the full absolute path to the file if found or successfully downloaded, otherwise None.
59
+ Prints progress and errors to stdout.
60
+ """
61
+ os.makedirs(local_files_dir, exist_ok=True)
62
+
63
+ # 1. Check for existing local file whose name starts with the task_id
64
+ try:
65
+ for filename in os.listdir(local_files_dir):
66
+ if filename.startswith(task_id):
67
+ full_path = os.path.abspath(
68
+ os.path.join(local_files_dir, filename))
69
+ print(
70
+ f"FileHandler: Found existing local file for task {task_id}: {full_path}")
71
+ return full_path
72
+ except OSError as e:
73
+ print(
74
+ f"FileHandler: Notice - Error listing files in {local_files_dir} (will attempt download): {e}")
75
+
76
+ # 2. If not found locally, attempt to download
77
+ file_api_url = f"{FILE_API_BASE_URL}{task_id}"
78
+ print(
79
+ f"FileHandler: Local file for task {task_id} not found. Attempting download from: {file_api_url}")
80
+
81
+ try:
82
+ with requests.Session() as session:
83
+ # Increased timeout slightly
84
+ response = session.get(
85
+ file_api_url, timeout=15, allow_redirects=True)
86
+
87
+ if response.status_code == 200:
88
+ if not response.content: # Check if the content is empty
89
+ print(
90
+ f"FileHandler: File indicated for task {task_id} but server sent no content (empty file). Not saving.")
91
+ return None
92
+
93
+ cd_header = response.headers.get('Content-Disposition')
94
+ original_filename = _extract_filename_from_cd(cd_header)
95
+
96
+ # Determine a sane filename
97
+ if original_filename:
98
+ sane_filename_base = os.path.basename(original_filename)
99
+ else: # Fallback if no Content-Disposition filename
100
+ content_type = response.headers.get('Content-Type')
101
+ extension = _get_extension_from_content_type(
102
+ content_type) or ''
103
+ # Default name if no CD
104
+ sane_filename_base = f"{task_id}_downloaded{extension}"
105
+ print(
106
+ f"FileHandler: No filename in Content-Disposition for {task_id}. Using fallback: {sane_filename_base}")
107
+
108
+ # Ensure the filename starts with task_id for consistent local finding later
109
+ if not sane_filename_base.startswith(task_id):
110
+ sane_filename = f"{task_id}_{sane_filename_base}"
111
+ else:
112
+ sane_filename = sane_filename_base
113
+
114
+ file_path = os.path.join(local_files_dir, sane_filename)
115
+
116
+ with open(file_path, 'wb') as f:
117
+ f.write(response.content)
118
+
119
+ abs_path = os.path.abspath(file_path)
120
+ print(
121
+ f"FileHandler: File '{sane_filename}' for task {task_id} downloaded to '{abs_path}'. Size: {len(response.content)} bytes.")
122
+ return abs_path
123
+
124
+ elif response.status_code == 404:
125
+ print(
126
+ f"FileHandler: No file found for task_id {task_id} at API (HTTP 404 Not Found).")
127
+ return None
128
+ else:
129
+ print(
130
+ f"FileHandler: Failed to download file for task {task_id}. Server responded with HTTP status {response.status_code}.")
131
+ return None
132
+
133
+ except requests.exceptions.Timeout:
134
+ print(
135
+ f"FileHandler: Request timed out while trying to download file for task ID '{task_id}'.")
136
+ return None
137
+ except requests.exceptions.RequestException as e:
138
+ print(
139
+ f"FileHandler: An error occurred during file download for task ID '{task_id}': {type(e).__name__} - {e}.")
140
+ return None
141
+ except IOError as e: # Catch errors during file writing
142
+ print(
143
+ f"FileHandler: An IO error occurred while saving the file for task ID '{task_id}': {e}")
144
+ return None