import pandas as pd from smolagents import tool from typing import Union, Optional @tool def get_csv_data_summary(file_path: str) -> str: """ Reads a CSV file from the given file path and returns a summary of its content. The summary includes the number of rows and columns, column names, and basic descriptive statistics. Args: file_path (str): The absolute local path to the CSV file. This path should be obtained from the 'File Information' section if the file was downloaded by the agent. Returns: str: A string containing the data summary (shape, columns, descriptive statistics) or an error message if processing fails. """ try: df = pd.read_csv(file_path) summary = f"Successfully read CSV file: '{file_path}'\n" summary += f"Number of rows: {len(df)}\n" summary += f"Number of columns: {len(df.columns)}\n" summary += f"Column names: {', '.join(df.columns.astype(str))}\n\n" summary += "Descriptive statistics:\n" # include='all' for mixed types summary += df.describe(include='all').to_string() # For very wide dataframes, head might be more useful than full describe in limited contexts # if len(df.columns) > 15: # summary += "\n\nFirst 5 rows (due to large number of columns):\n" # summary += df.head().to_string() return summary except ImportError: return "Error: The 'pandas' library is required but not installed. Please ensure it is available in the agent's environment." except FileNotFoundError: return f"Error: The CSV file was not found at the specified path: '{file_path}'. Please verify the path." except pd.errors.EmptyDataError: return f"Error: The CSV file at '{file_path}' is empty." except Exception as e: return f"Error processing CSV file '{file_path}': {type(e).__name__} - {str(e)}" @tool def get_excel_data_summary(file_path: str, sheet_name: Optional[str] = "0") -> str: """ Reads an Excel file (supports .xls and .xlsx) from the given file path and returns a summary of the specified sheet's content. The summary includes the number of rows and columns, column names, and basic descriptive statistics for that sheet. Args: file_path (str): The absolute local path to the Excel file. This path should be obtained from the 'File Information' section if the file was downloaded by the agent. sheet_name (str | int | None, optional): The name of the sheet to read (e.g., "Sheet1") or its 0-indexed position (e.g., 0). If None or 0, the first sheet is read. Defaults to 0 (the first sheet). Returns: str: A string containing the data summary from the specified sheet (shape, columns, descriptive statistics) or an error message. """ try: # Determine engine based on file extension for clearer error messages if engine is missing engine = None if file_path.endswith('.xlsx'): engine = 'openpyxl' elif file_path.endswith('.xls'): # or 'openpyxl' if xlrd is not available and openpyxl can handle it. engine = 'xlrd' actual_sheet_name_for_pandas: Union[str, int, None] if sheet_name is None: actual_sheet_name_for_pandas = 0 # Default to first sheet elif sheet_name.isdigit(): actual_sheet_name_for_pandas = int(sheet_name) else: actual_sheet_name_for_pandas = sheet_name df = pd.read_excel( file_path, sheet_name=actual_sheet_name_for_pandas, engine=engine) sheet_identifier = f"sheet '{sheet_name}'" if sheet_name is not None else "the first sheet" summary = f"Successfully read {sheet_identifier} from Excel file: '{file_path}'\n" summary += f"Number of rows: {len(df)}\n" summary += f"Number of columns: {len(df.columns)}\n" summary += f"Column names: {', '.join(df.columns.astype(str))}\n\n" summary += "Descriptive statistics:\n" summary += df.describe(include='all').to_string() return summary except ImportError: return ("Error: The 'pandas' library and an Excel engine ('openpyxl' for .xlsx, 'xlrd' for .xls) " "are required. Please ensure they are available in the agent's environment.") except FileNotFoundError: return f"Error: The Excel file was not found at the specified path: '{file_path}'. Please verify the path." except pd.errors.EmptyDataError: # Though less common for Excel sheets than CSVs return f"Error: The specified sheet in Excel file '{file_path}' is empty or could not be parsed as data." except ValueError as ve: # Catches incorrect sheet names/indices from pandas if "sheet_name" in str(ve).lower(): return f"Error: Sheet '{sheet_name}' not found in Excel file '{file_path}'. Please check the sheet name or index." return f"Error processing Excel file '{file_path}': ValueError - {str(ve)}" except Exception as e: # Specific check for missing engines, as pandas might raise a general Exception or ValueError err_str = str(e).lower() if "openpyxl" in err_str and "install openpyxl" in err_str: return f"Error: Missing 'openpyxl' engine for Excel file '{file_path}'. Please install it." if "xlrd" in err_str and ("install xlrd" in err_str or "support for .xls files" in err_str): return f"Error: Missing 'xlrd' engine for .xls Excel file '{file_path}'. Please install it or try 'openpyxl' if compatible." return f"Error processing Excel file '{file_path}': {type(e).__name__} - {str(e)}"