Spaces:
Runtime error
Runtime error
Upload 28 files
Browse files- alitaDiagram.svg +1 -0
- components/Web_agent_related/README.md +81 -0
- components/Web_agent_related/__pycache__/api_keys.cpython-313.pyc +0 -0
- components/Web_agent_related/__pycache__/config.cpython-313.pyc +0 -0
- components/Web_agent_related/__pycache__/utils.cpython-313.pyc +0 -0
- components/Web_agent_related/__pycache__/web_agent.cpython-313.pyc +0 -0
- components/Web_agent_related/api_keys.py +25 -0
- components/Web_agent_related/config.py +83 -0
- components/Web_agent_related/requirements.txt +15 -0
- components/Web_agent_related/utils.py +198 -0
- components/Web_agent_related/web_agent.py +1326 -0
- components/__init__.py +17 -0
- components/code_generator_loop/__pycache__/code_runner.cpython-313.pyc +0 -0
- components/code_generator_loop/__pycache__/mcp_brainstormer.cpython-313.pyc +0 -0
- components/code_generator_loop/__pycache__/script_generator.cpython-313.pyc +0 -0
- components/code_generator_loop/code_runner.py +251 -0
- components/code_generator_loop/mcp_brainstormer.py +153 -0
- components/code_generator_loop/script_generator.py +91 -0
- components/mcp_registry/__pycache__/registry.cpython-313.pyc +0 -0
- components/mcp_registry/__pycache__/use_registry_tool.cpython-313.pyc +0 -0
- components/mcp_registry/mcp_registry.py +117 -0
- components/mcp_registry/registry.py +235 -0
- components/mcp_registry/use_registry_tool.py +91 -0
- manager_agent.py +663 -0
- models/__init__.py +3 -0
- models/mcp_execution_result.py +21 -0
- models/mcp_tool_spec.py +11 -0
- models/task_prompt.py +26 -0
alitaDiagram.svg
ADDED
|
|
components/Web_agent_related/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# WebAgent
|
| 2 |
+
|
| 3 |
+
A modular, LLM-powered autonomous web research agent for integration into larger systems.
|
| 4 |
+
|
| 5 |
+
## Core Functionality
|
| 6 |
+
|
| 7 |
+
- Performs autonomous web research using LLMs (Claude, OpenAI, etc.)
|
| 8 |
+
- Provides unified access to web browsing, search, and document parsing
|
| 9 |
+
- Returns structured research reports
|
| 10 |
+
|
| 11 |
+
## Integration
|
| 12 |
+
|
| 13 |
+
```python
|
| 14 |
+
from web_agent import WebAgent
|
| 15 |
+
from llama_index.llms.anthropic import Anthropic
|
| 16 |
+
|
| 17 |
+
# 1. Initialize agent with preferred LLM
|
| 18 |
+
agent = WebAgent(
|
| 19 |
+
llm=Anthropic(model="claude-opus-4-20250514", api_key="your_anthropic_key"),
|
| 20 |
+
google_api_key="your_google_key", # Optional: pass directly or via config
|
| 21 |
+
github_token="your_github_token", # Optional: pass directly or via config
|
| 22 |
+
max_research_iterations=100000, # Customize research depth
|
| 23 |
+
verbose=False # Toggle debugging output
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# 2. Execute research and get report
|
| 27 |
+
report = agent.research("Your research query")
|
| 28 |
+
|
| 29 |
+
# 3. Use the report in your application
|
| 30 |
+
process_research_results(report)
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## Example Research Output
|
| 34 |
+
|
| 35 |
+
```python
|
| 36 |
+
# Query: "what is alita generalist agent"
|
| 37 |
+
report = agent.research("what is alita generalist agent")
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
The agent will autonomously conduct research and produce a comprehensive textual report:
|
| 41 |
+
|
| 42 |
+
```json
|
| 43 |
+
{
|
| 44 |
+
"summary": "ALITA (Artificial Learning Intelligence Training Assistant) represents a significant advancement in the field of generalist AI agents. It is capable of performing diverse tasks across multiple domains without requiring task-specific training, marking a departure from traditional narrow AI systems.",
|
| 45 |
+
"detailed_findings": "ALITA is a multi-modal AI system designed to process and understand various types of input (text, images, audio, video), generate contextually appropriate responses across different domains, learn and adapt from interactions without explicit retraining, and transfer knowledge between disparate tasks. Key characteristics include versatility, adaptability, and integration capabilities.",
|
| 46 |
+
"sources": [
|
| 47 |
+
"https://example.com/alita-overview",
|
| 48 |
+
"https://example.com/generalist-ai"
|
| 49 |
+
]
|
| 50 |
+
}
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
## Configuration Options
|
| 54 |
+
|
| 55 |
+
Configure via constructor parameters or `config.py`:
|
| 56 |
+
|
| 57 |
+
- **LLM Integration**: Pass any LlamaIndex-compatible LLM
|
| 58 |
+
- **API Keys**: Set via constructor, environment variables, or `api_keys.py`
|
| 59 |
+
- **Behavior Control**: Adjust `max_research_iterations`, `verbose`, etc.
|
| 60 |
+
|
| 61 |
+
## Available Tools
|
| 62 |
+
|
| 63 |
+
The agent exposes methods that can be called directly or used by the LLM:
|
| 64 |
+
|
| 65 |
+
- Web browsing: `visit_url`, `page_up/down`, `get_full_page_text`
|
| 66 |
+
- Search: `search_google`, `search_github`
|
| 67 |
+
- Document handling: `download_file`, extraction for PDF/DOCX/PPTX
|
| 68 |
+
|
| 69 |
+
## Requirements
|
| 70 |
+
|
| 71 |
+
Python 3.8+, LlamaIndex core & plugins, Anthropic or OpenAI client
|
| 72 |
+
|
| 73 |
+
```
|
| 74 |
+
pip install -r requirements.txt
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## CLI Usage
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
python web_agent.py --query "Research topic" --model "claude-opus-4-20250514" --verbose
|
| 81 |
+
```
|
components/Web_agent_related/__pycache__/api_keys.cpython-313.pyc
ADDED
|
Binary file (853 Bytes). View file
|
|
|
components/Web_agent_related/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (2.28 kB). View file
|
|
|
components/Web_agent_related/__pycache__/utils.cpython-313.pyc
ADDED
|
Binary file (7.86 kB). View file
|
|
|
components/Web_agent_related/__pycache__/web_agent.cpython-313.pyc
ADDED
|
Binary file (59.3 kB). View file
|
|
|
components/Web_agent_related/api_keys.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API Keys Configuration for WebAgent
|
| 3 |
+
|
| 4 |
+
This module contains API keys and sensitive configuration data.
|
| 5 |
+
IMPORTANT: Do not commit this file to version control with real keys.
|
| 6 |
+
Consider using environment variables in production environments.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
from api_keys import API_KEYS
|
| 10 |
+
|
| 11 |
+
google_api_key = API_KEYS['GOOGLE_API_KEY']
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
# API keys dictionary
|
| 15 |
+
API_KEYS = {
|
| 16 |
+
# Google Search API
|
| 17 |
+
"GOOGLE_API_KEY": "AIzaSyAngSEKuKwpBPapYwPnGBbUlsbT8y6p0KQ",
|
| 18 |
+
"GOOGLE_CX_ID": "97c907b98a73a41df",
|
| 19 |
+
|
| 20 |
+
# GitHub API
|
| 21 |
+
"GITHUB_TOKEN": "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",
|
| 22 |
+
|
| 23 |
+
# Anthropic API
|
| 24 |
+
"ANTHROPIC_API_KEY": "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"
|
| 25 |
+
}
|
components/Web_agent_related/config.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration for WebAgent
|
| 3 |
+
|
| 4 |
+
This module contains configuration settings for WebAgent.
|
| 5 |
+
Settings can be customized here or overridden at runtime.
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
from typing import Dict, Any, Optional
|
| 9 |
+
|
| 10 |
+
# Try to import API keys, use empty dict if file doesn't exist
|
| 11 |
+
try:
|
| 12 |
+
from .api_keys import API_KEYS
|
| 13 |
+
except ImportError:
|
| 14 |
+
API_KEYS = {}
|
| 15 |
+
|
| 16 |
+
# API Keys Configuration
|
| 17 |
+
# Priority: 1. Environment variables, 2. api_keys.py
|
| 18 |
+
API_CONFIG = {
|
| 19 |
+
# Google Search API configuration
|
| 20 |
+
"GOOGLE_API_KEY": os.environ.get("GOOGLE_API_KEY") or API_KEYS.get("GOOGLE_API_KEY", ""),
|
| 21 |
+
"GOOGLE_CX_ID": os.environ.get("GOOGLE_CX_ID") or API_KEYS.get("GOOGLE_CX_ID", ""),
|
| 22 |
+
|
| 23 |
+
# GitHub API configuration
|
| 24 |
+
"GITHUB_TOKEN": os.environ.get("GITHUB_TOKEN") or API_KEYS.get("GITHUB_TOKEN", ""),
|
| 25 |
+
|
| 26 |
+
# Anthropic API configuration
|
| 27 |
+
"ANTHROPIC_API_KEY": os.environ.get("ANTHROPIC_API_KEY") or API_KEYS.get("ANTHROPIC_API_KEY", ""),
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
# Web Request Configuration
|
| 31 |
+
WEB_REQUEST_CONFIG = {
|
| 32 |
+
# User agent for web requests
|
| 33 |
+
"USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
| 34 |
+
|
| 35 |
+
# Timeout for web requests in seconds
|
| 36 |
+
"TIMEOUT": 10,
|
| 37 |
+
|
| 38 |
+
# Whether to verify SSL certificates
|
| 39 |
+
"VERIFY_SSL": True,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Content Processing Configuration
|
| 43 |
+
CONTENT_CONFIG = {
|
| 44 |
+
# Default number of characters per page chunk
|
| 45 |
+
"CHARS_PER_CHUNK": 10000,
|
| 46 |
+
|
| 47 |
+
# Maximum size of page to process (in bytes)
|
| 48 |
+
"MAX_PAGE_SIZE": 1024 * 1024 * 5, # 5 MB
|
| 49 |
+
|
| 50 |
+
# Whether to preserve formatting in extracted text
|
| 51 |
+
"PRESERVE_FORMATTING": False,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Search Configuration
|
| 55 |
+
SEARCH_CONFIG = {
|
| 56 |
+
# Default number of search results to return
|
| 57 |
+
"DEFAULT_SEARCH_RESULTS": 5,
|
| 58 |
+
|
| 59 |
+
# Maximum number of search results allowed
|
| 60 |
+
"MAX_SEARCH_RESULTS": 10000000,
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
def get_config(section: Optional[str] = None) -> Dict[str, Any]:
|
| 64 |
+
"""
|
| 65 |
+
Get configuration settings, optionally for a specific section.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
section: Optional section name (API_CONFIG, WEB_REQUEST_CONFIG, etc.)
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Dictionary containing configuration settings
|
| 72 |
+
"""
|
| 73 |
+
all_config = {
|
| 74 |
+
"API_CONFIG": API_CONFIG,
|
| 75 |
+
"WEB_REQUEST_CONFIG": WEB_REQUEST_CONFIG,
|
| 76 |
+
"CONTENT_CONFIG": CONTENT_CONFIG,
|
| 77 |
+
"SEARCH_CONFIG": SEARCH_CONFIG
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
if section:
|
| 81 |
+
return all_config.get(section, {})
|
| 82 |
+
|
| 83 |
+
return all_config
|
components/Web_agent_related/requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
llama-index>=0.8.0
|
| 2 |
+
llama-index-core>=0.10.0
|
| 3 |
+
llama-index-readers-web>=0.1.0
|
| 4 |
+
llama-index-llms-anthropic>=0.1.0
|
| 5 |
+
anthropic>=0.8.0
|
| 6 |
+
requests>=2.28.0
|
| 7 |
+
beautifulsoup4>=4.11.0
|
| 8 |
+
google-api-python-client>=2.70.0
|
| 9 |
+
PyGithub>=1.58.0
|
| 10 |
+
PyPDF2>=3.0.0
|
| 11 |
+
python-docx>=0.8.11
|
| 12 |
+
python-pptx>=0.6.21
|
| 13 |
+
urllib3>=1.26.0
|
| 14 |
+
pathlib>=1.0.1
|
| 15 |
+
argparse>=1.4.0
|
components/Web_agent_related/utils.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for file handling and text extraction.
|
| 3 |
+
|
| 4 |
+
This module contains standalone functions for extracting text from
|
| 5 |
+
various document formats (PDF, DOCX, PPTX).
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
from typing import Tuple
|
| 9 |
+
|
| 10 |
+
# For PDF processing
|
| 11 |
+
import PyPDF2
|
| 12 |
+
from PyPDF2.errors import PdfReadError
|
| 13 |
+
|
| 14 |
+
# For DOCX processing
|
| 15 |
+
import docx
|
| 16 |
+
|
| 17 |
+
# For PPTX processing
|
| 18 |
+
from pptx import Presentation
|
| 19 |
+
from pptx.exc import PackageNotFoundError
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def read_pdf_text(filepath: str) -> Tuple[bool, str]:
|
| 23 |
+
"""
|
| 24 |
+
Extracts text content from a PDF file.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
filepath: Path to the PDF file
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
A tuple (success, text_or_error_message):
|
| 31 |
+
- success: True if extraction was successful, False otherwise
|
| 32 |
+
- text_or_error_message: Extracted text if successful, error message otherwise
|
| 33 |
+
"""
|
| 34 |
+
try:
|
| 35 |
+
if not os.path.exists(filepath):
|
| 36 |
+
return False, f"File not found: {filepath}"
|
| 37 |
+
|
| 38 |
+
# Extract text from PDF
|
| 39 |
+
with open(filepath, 'rb') as file:
|
| 40 |
+
try:
|
| 41 |
+
reader = PyPDF2.PdfReader(file)
|
| 42 |
+
|
| 43 |
+
# Get the number of pages
|
| 44 |
+
num_pages = len(reader.pages)
|
| 45 |
+
|
| 46 |
+
if num_pages == 0:
|
| 47 |
+
return False, "PDF file contains no pages"
|
| 48 |
+
|
| 49 |
+
# Extract text from all pages
|
| 50 |
+
all_text = []
|
| 51 |
+
for page_num in range(num_pages):
|
| 52 |
+
page = reader.pages[page_num]
|
| 53 |
+
all_text.append(page.extract_text() or "")
|
| 54 |
+
|
| 55 |
+
# Combine all text
|
| 56 |
+
full_text = "\n\n".join(all_text)
|
| 57 |
+
|
| 58 |
+
if not full_text.strip():
|
| 59 |
+
return False, "Could not extract text from PDF (the file may be scanned images or empty)"
|
| 60 |
+
|
| 61 |
+
return True, full_text
|
| 62 |
+
|
| 63 |
+
except PdfReadError as e:
|
| 64 |
+
return False, f"Error reading PDF file: {str(e)}"
|
| 65 |
+
|
| 66 |
+
except Exception as e:
|
| 67 |
+
return False, f"Unexpected error when processing PDF file: {str(e)}"
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def read_docx_text(filepath: str) -> Tuple[bool, str]:
|
| 71 |
+
"""
|
| 72 |
+
Extracts text content from a DOCX file.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
filepath: Path to the DOCX file
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
A tuple (success, text_or_error_message):
|
| 79 |
+
- success: True if extraction was successful, False otherwise
|
| 80 |
+
- text_or_error_message: Extracted text if successful, error message otherwise
|
| 81 |
+
"""
|
| 82 |
+
try:
|
| 83 |
+
if not os.path.exists(filepath):
|
| 84 |
+
return False, f"File not found: {filepath}"
|
| 85 |
+
|
| 86 |
+
# Open the docx file
|
| 87 |
+
try:
|
| 88 |
+
doc = docx.Document(filepath)
|
| 89 |
+
|
| 90 |
+
# Extract text from paragraphs
|
| 91 |
+
paragraphs = []
|
| 92 |
+
for para in doc.paragraphs:
|
| 93 |
+
if para.text.strip(): # Ignore empty paragraphs
|
| 94 |
+
paragraphs.append(para.text)
|
| 95 |
+
|
| 96 |
+
# Extract text from tables
|
| 97 |
+
for table in doc.tables:
|
| 98 |
+
for row in table.rows:
|
| 99 |
+
row_texts = []
|
| 100 |
+
for cell in row.cells:
|
| 101 |
+
if cell.text.strip():
|
| 102 |
+
row_texts.append(cell.text.strip())
|
| 103 |
+
if row_texts:
|
| 104 |
+
paragraphs.append(" | ".join(row_texts))
|
| 105 |
+
|
| 106 |
+
# Combine all text
|
| 107 |
+
full_text = "\n\n".join(paragraphs)
|
| 108 |
+
|
| 109 |
+
if not full_text.strip():
|
| 110 |
+
return False, "Document appears to be empty"
|
| 111 |
+
|
| 112 |
+
return True, full_text
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
return False, f"Error parsing DOCX file: {str(e)}"
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
return False, f"Unexpected error when processing DOCX file: {str(e)}"
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def read_pptx_text(filepath: str) -> Tuple[bool, str]:
|
| 122 |
+
"""
|
| 123 |
+
Extracts text content from a PPTX file.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
filepath: Path to the PPTX file
|
| 127 |
+
|
| 128 |
+
Returns:
|
| 129 |
+
A tuple (success, text_or_error_message):
|
| 130 |
+
- success: True if extraction was successful, False otherwise
|
| 131 |
+
- text_or_error_message: Extracted text if successful, error message otherwise
|
| 132 |
+
"""
|
| 133 |
+
try:
|
| 134 |
+
if not os.path.exists(filepath):
|
| 135 |
+
return False, f"File not found: {filepath}"
|
| 136 |
+
|
| 137 |
+
# Open the pptx file
|
| 138 |
+
try:
|
| 139 |
+
presentation = Presentation(filepath)
|
| 140 |
+
|
| 141 |
+
all_text = []
|
| 142 |
+
|
| 143 |
+
# Loop through slides
|
| 144 |
+
for i, slide in enumerate(presentation.slides):
|
| 145 |
+
slide_text = []
|
| 146 |
+
slide_text.append(f"--- Slide {i+1} ---")
|
| 147 |
+
|
| 148 |
+
# Extract text from slide title if present
|
| 149 |
+
if slide.shapes.title and slide.shapes.title.text.strip():
|
| 150 |
+
slide_text.append(f"Title: {slide.shapes.title.text}")
|
| 151 |
+
|
| 152 |
+
# Extract text from all shapes
|
| 153 |
+
for shape in slide.shapes:
|
| 154 |
+
if hasattr(shape, "text") and shape.text.strip():
|
| 155 |
+
if shape != slide.shapes.title: # Avoid duplicating title
|
| 156 |
+
slide_text.append(shape.text)
|
| 157 |
+
|
| 158 |
+
# Add slide text to overall text
|
| 159 |
+
if len(slide_text) > 1: # If there's more than just the slide number
|
| 160 |
+
all_text.append("\n".join(slide_text))
|
| 161 |
+
|
| 162 |
+
# Combine all text
|
| 163 |
+
full_text = "\n\n".join(all_text)
|
| 164 |
+
|
| 165 |
+
if not full_text.strip():
|
| 166 |
+
return False, "Presentation appears to be empty"
|
| 167 |
+
|
| 168 |
+
return True, full_text
|
| 169 |
+
|
| 170 |
+
except PackageNotFoundError:
|
| 171 |
+
return False, "Invalid PPTX file format"
|
| 172 |
+
except Exception as e:
|
| 173 |
+
return False, f"Error parsing PPTX file: {str(e)}"
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
return False, f"Unexpected error when processing PPTX file: {str(e)}"
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def detect_file_type(filepath: str) -> str:
|
| 180 |
+
"""
|
| 181 |
+
Detects file type based on file extension.
|
| 182 |
+
|
| 183 |
+
Args:
|
| 184 |
+
filepath: Path to the file
|
| 185 |
+
|
| 186 |
+
Returns:
|
| 187 |
+
File type as string ('pdf', 'docx', 'pptx', or None if unknown)
|
| 188 |
+
"""
|
| 189 |
+
_, file_extension = os.path.splitext(filepath.lower())
|
| 190 |
+
|
| 191 |
+
if file_extension == '.pdf':
|
| 192 |
+
return 'pdf'
|
| 193 |
+
elif file_extension in ['.doc', '.docx']:
|
| 194 |
+
return 'docx'
|
| 195 |
+
elif file_extension in ['.ppt', '.pptx']:
|
| 196 |
+
return 'pptx'
|
| 197 |
+
else:
|
| 198 |
+
return None
|
components/Web_agent_related/web_agent.py
ADDED
|
@@ -0,0 +1,1326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
WebAgent: An autonomous research agent powered by LLM for web browsing and search operations.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
import sys
|
| 8 |
+
import json
|
| 9 |
+
import uuid
|
| 10 |
+
import shutil
|
| 11 |
+
import io
|
| 12 |
+
from typing import List, Dict, Tuple, Optional, Union, Any, Callable
|
| 13 |
+
from urllib.parse import urlparse, unquote
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
# Import configuration
|
| 17 |
+
from .config import get_config
|
| 18 |
+
|
| 19 |
+
# Import utility functions
|
| 20 |
+
from .utils import read_pdf_text, read_docx_text, read_pptx_text, detect_file_type
|
| 21 |
+
|
| 22 |
+
# LlamaIndex imports
|
| 23 |
+
from llama_index.core.tools import FunctionTool
|
| 24 |
+
from llama_index.core.agent import ReActAgent
|
| 25 |
+
from llama_index.core.llms import LLM
|
| 26 |
+
from llama_index.llms.openai import OpenAI
|
| 27 |
+
from llama_index.readers.web import SimpleWebPageReader
|
| 28 |
+
from llama_index.core.schema import Document
|
| 29 |
+
|
| 30 |
+
# For Google Search
|
| 31 |
+
from googleapiclient.discovery import build
|
| 32 |
+
from googleapiclient.errors import HttpError
|
| 33 |
+
|
| 34 |
+
# For GitHub Search
|
| 35 |
+
from github import Github, GithubException
|
| 36 |
+
|
| 37 |
+
# For file download and handling
|
| 38 |
+
import requests
|
| 39 |
+
from requests.exceptions import RequestException
|
| 40 |
+
|
| 41 |
+
# For Anthropic Claude
|
| 42 |
+
from llama_index.llms.anthropic import Anthropic
|
| 43 |
+
|
| 44 |
+
class WebAgent:
|
| 45 |
+
"""
|
| 46 |
+
An autonomous research agent powered by LLM for web browsing and search operations.
|
| 47 |
+
|
| 48 |
+
This agent can:
|
| 49 |
+
- Autonomously perform web research on a given topic
|
| 50 |
+
- Visit and retrieve textual content from web pages
|
| 51 |
+
- Navigate within the textual content of a currently loaded page
|
| 52 |
+
- Perform searches using Google and GitHub
|
| 53 |
+
- Return information as raw text content or structured search results
|
| 54 |
+
- Download files from the web
|
| 55 |
+
- Extract text from PDF, DOCX, and PPTX files
|
| 56 |
+
- Synthesize findings into a comprehensive report
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
def __init__(
|
| 60 |
+
self,
|
| 61 |
+
llm: Optional[LLM] = None,
|
| 62 |
+
google_api_key: Optional[str] = None,
|
| 63 |
+
google_cx_id: Optional[str] = None,
|
| 64 |
+
github_token: Optional[str] = None,
|
| 65 |
+
chars_per_chunk: Optional[int] = None,
|
| 66 |
+
request_timeout: Optional[int] = None,
|
| 67 |
+
user_agent: Optional[str] = None,
|
| 68 |
+
temp_download_dir: Optional[str] = None,
|
| 69 |
+
max_research_iterations: int = 100000000,
|
| 70 |
+
verbose: bool = False
|
| 71 |
+
):
|
| 72 |
+
"""
|
| 73 |
+
Initialize the WebAgent with LLM and optional API keys and configuration.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
llm: LlamaIndex LLM instance for decision making
|
| 77 |
+
google_api_key: API key for Google Custom Search
|
| 78 |
+
google_cx_id: Custom Search Engine ID for Google Search
|
| 79 |
+
github_token: GitHub API token for authenticated searches
|
| 80 |
+
chars_per_chunk: Character count per chunk for page navigation
|
| 81 |
+
request_timeout: Timeout for web requests in seconds
|
| 82 |
+
user_agent: User agent string for web requests
|
| 83 |
+
temp_download_dir: Directory for temporary file downloads
|
| 84 |
+
max_research_iterations: Maximum number of research steps
|
| 85 |
+
verbose: Whether to print intermediate steps
|
| 86 |
+
"""
|
| 87 |
+
# Get configuration sections
|
| 88 |
+
api_config = get_config("API_CONFIG")
|
| 89 |
+
web_config = get_config("WEB_REQUEST_CONFIG")
|
| 90 |
+
content_config = get_config("CONTENT_CONFIG")
|
| 91 |
+
|
| 92 |
+
# Load API keys with priority: direct parameters > environment variables > config file
|
| 93 |
+
self.google_api_key = google_api_key or os.environ.get("GOOGLE_API_KEY") or api_config.get("GOOGLE_API_KEY", "")
|
| 94 |
+
self.google_cx_id = google_cx_id or os.environ.get("GOOGLE_CX_ID") or api_config.get("GOOGLE_CX_ID", "")
|
| 95 |
+
self.github_token = github_token or os.environ.get("GITHUB_TOKEN") or api_config.get("GITHUB_TOKEN", "")
|
| 96 |
+
|
| 97 |
+
# Configure chunking and web request parameters
|
| 98 |
+
self.chars_per_chunk = chars_per_chunk or content_config.get("CHARS_PER_CHUNK", 1000)
|
| 99 |
+
self.request_timeout = request_timeout or web_config.get("TIMEOUT", 30)
|
| 100 |
+
self.user_agent = user_agent or web_config.get("USER_AGENT", "Mozilla/5.0")
|
| 101 |
+
self.verify_ssl = web_config.get("VERIFY_SSL", True)
|
| 102 |
+
|
| 103 |
+
# Initialize temporary download directory
|
| 104 |
+
self.temp_download_dir = temp_download_dir or "temp_downloads"
|
| 105 |
+
os.makedirs(self.temp_download_dir, exist_ok=True)
|
| 106 |
+
|
| 107 |
+
# Initialize state variables
|
| 108 |
+
self.current_url = None
|
| 109 |
+
self.current_page_documents = []
|
| 110 |
+
self.current_page_text_chunks = []
|
| 111 |
+
self.current_chunk_index = 0
|
| 112 |
+
|
| 113 |
+
# Research configuration
|
| 114 |
+
self.llm = llm or OpenAI(model="gpt-3.5-turbo")
|
| 115 |
+
self.max_research_iterations = max_research_iterations
|
| 116 |
+
self.verbose = verbose
|
| 117 |
+
|
| 118 |
+
# Initialize API clients if credentials are available
|
| 119 |
+
self._google_search_service = None
|
| 120 |
+
self._github_client = None
|
| 121 |
+
|
| 122 |
+
if self.google_api_key and self.google_cx_id:
|
| 123 |
+
try:
|
| 124 |
+
self._google_search_service = build(
|
| 125 |
+
"customsearch", "v1", developerKey=self.google_api_key
|
| 126 |
+
)
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"Warning: Failed to initialize Google Search API: {str(e)}")
|
| 129 |
+
|
| 130 |
+
if self.github_token:
|
| 131 |
+
try:
|
| 132 |
+
self._github_client = Github(self.github_token)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"Warning: Failed to initialize GitHub client: {str(e)}")
|
| 135 |
+
|
| 136 |
+
# Initialize tools and agent
|
| 137 |
+
self._tools = self._setup_tools()
|
| 138 |
+
|
| 139 |
+
# Set up the agent executor with max_iterations parameter
|
| 140 |
+
self.agent_executor = ReActAgent.from_tools(
|
| 141 |
+
tools=self._tools,
|
| 142 |
+
llm=self.llm,
|
| 143 |
+
verbose=self.verbose,
|
| 144 |
+
system_prompt=self._get_system_prompt(),
|
| 145 |
+
max_iterations=self.max_research_iterations
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
def _setup_tools(self) -> List[FunctionTool]:
|
| 149 |
+
"""
|
| 150 |
+
Set up the LlamaIndex FunctionTool objects wrapping the agent's capabilities.
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
List of FunctionTool objects
|
| 154 |
+
"""
|
| 155 |
+
tools = [
|
| 156 |
+
FunctionTool.from_defaults(
|
| 157 |
+
fn=self._internal_visit_url,
|
| 158 |
+
name="visit_web_page",
|
| 159 |
+
description="Visits a URL and returns the first chunk of its textual content. Use this to view web pages.",
|
| 160 |
+
),
|
| 161 |
+
FunctionTool.from_defaults(
|
| 162 |
+
fn=self._internal_page_down,
|
| 163 |
+
name="scroll_down_page",
|
| 164 |
+
description="Scrolls down the currently loaded webpage and returns the next chunk of text. Call 'visit_web_page' first."
|
| 165 |
+
),
|
| 166 |
+
FunctionTool.from_defaults(
|
| 167 |
+
fn=self._internal_page_up,
|
| 168 |
+
name="scroll_up_page",
|
| 169 |
+
description="Scrolls up the currently loaded webpage and returns the previous chunk of text. Call 'visit_web_page' first."
|
| 170 |
+
),
|
| 171 |
+
FunctionTool.from_defaults(
|
| 172 |
+
fn=self._internal_get_full_page_text,
|
| 173 |
+
name="get_full_page_text",
|
| 174 |
+
description="Returns the entire text content of the currently loaded webpage. Call 'visit_web_page' first."
|
| 175 |
+
),
|
| 176 |
+
FunctionTool.from_defaults(
|
| 177 |
+
fn=self._internal_search_google,
|
| 178 |
+
name="google_search",
|
| 179 |
+
description="Performs a Google search for the given query and returns a list of results (title, link, snippet)."
|
| 180 |
+
),
|
| 181 |
+
FunctionTool.from_defaults(
|
| 182 |
+
fn=self._internal_search_github,
|
| 183 |
+
name="github_search",
|
| 184 |
+
description="Performs a GitHub search. Specify query string and optionally num_results and search_type ('repositories', 'code', 'issues')."
|
| 185 |
+
),
|
| 186 |
+
FunctionTool.from_defaults(
|
| 187 |
+
fn=self._internal_download_and_extract_text,
|
| 188 |
+
name="download_and_read_file",
|
| 189 |
+
description="Downloads a file from a URL and extracts its text content. Works with PDF, DOCX, and PPTX files."
|
| 190 |
+
)
|
| 191 |
+
]
|
| 192 |
+
return tools
|
| 193 |
+
|
| 194 |
+
def _get_system_prompt(self) -> str:
|
| 195 |
+
"""
|
| 196 |
+
Generate the system prompt for the research agent.
|
| 197 |
+
"""
|
| 198 |
+
return """You are an autonomous AI Web Research Agent.
|
| 199 |
+
|
| 200 |
+
Your goal is to conduct thorough web research on topics given to you. You have access to the following tools:
|
| 201 |
+
|
| 202 |
+
1. visit_web_page: Visit a URL and view its content
|
| 203 |
+
2. scroll_down_page: View the next part of a webpage you've visited
|
| 204 |
+
3. scroll_up_page: View the previous part of a webpage you've visited
|
| 205 |
+
4. get_full_page_text: Get the entire text of a webpage you've visited
|
| 206 |
+
5. google_search: Search Google for information
|
| 207 |
+
6. github_search: Search GitHub for repositories, code, or issues
|
| 208 |
+
7. download_and_read_file: Download and extract text from files (PDF, DOCX, PPTX)
|
| 209 |
+
|
| 210 |
+
RESEARCH STRATEGY:
|
| 211 |
+
1. Start by using google_search to find relevant information sources
|
| 212 |
+
2. Visit promising websites with visit_web_page to gather information
|
| 213 |
+
3. Navigate through pages with scroll_down_page and scroll_up_page
|
| 214 |
+
4. When you find a relevant document, use download_and_read_file
|
| 215 |
+
5. Continue until you have gathered sufficient information
|
| 216 |
+
6. Synthesize your findings into a comprehensive report
|
| 217 |
+
|
| 218 |
+
SYNTHESIS INSTRUCTIONS:
|
| 219 |
+
- Provide a thorough and direct answer to the original research task
|
| 220 |
+
- Include key findings and important details at the beginning of your report
|
| 221 |
+
- Present supporting evidence, examples, and detailed explanations
|
| 222 |
+
- Always cite your sources with full URLs whenever possible
|
| 223 |
+
- Organize information logically with clear sections and points
|
| 224 |
+
- Focus on accuracy and comprehensiveness in your final report
|
| 225 |
+
|
| 226 |
+
IMPORTANT GUIDELINES:
|
| 227 |
+
- Be thorough. Your final answer should be comprehensive and directly address the original research task.
|
| 228 |
+
- Cite your sources by including the full URLs of the pages or documents where you found key information.
|
| 229 |
+
- Take detailed notes on important facts, figures, and code snippets as you find them.
|
| 230 |
+
- Your final output should be a complete synthesis of your findings.
|
| 231 |
+
"""
|
| 232 |
+
|
| 233 |
+
def research(self, research_task: str, debug: bool = False) -> Dict[str, Any]:
|
| 234 |
+
"""
|
| 235 |
+
Performs autonomous web research on the given task and returns a structured report.
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
research_task: The research question or topic
|
| 239 |
+
debug: Whether to print detailed debug information about the agent response
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
A dictionary containing:
|
| 243 |
+
- "summary": Concise summary answering the research task
|
| 244 |
+
- "detailed_findings": Detailed information and evidence
|
| 245 |
+
- "sources": List of unique primary source URLs
|
| 246 |
+
- "status": "success", "success_unstructured", or "failure"
|
| 247 |
+
- "error_message": Error message if status is not "success", else None
|
| 248 |
+
"""
|
| 249 |
+
# Initialize default return structure
|
| 250 |
+
result = {
|
| 251 |
+
"summary": "",
|
| 252 |
+
"detailed_findings": "",
|
| 253 |
+
"sources": [],
|
| 254 |
+
"status": "failure",
|
| 255 |
+
"error_message": None
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
# Check if agent executor is initialized
|
| 259 |
+
if not self.agent_executor:
|
| 260 |
+
result["error_message"] = "Error: Agent executor not initialized."
|
| 261 |
+
return result
|
| 262 |
+
|
| 263 |
+
initial_prompt = f"""
|
| 264 |
+
I need to research the following topic thoroughly:
|
| 265 |
+
RESEARCH TASK: {research_task}
|
| 266 |
+
Please conduct comprehensive research using the available tools. When you have gathered sufficient information, synthesize your findings into a well-structured report that thoroughly addresses the research task.
|
| 267 |
+
Begin your research now.
|
| 268 |
+
"""
|
| 269 |
+
|
| 270 |
+
try:
|
| 271 |
+
# Execute the research
|
| 272 |
+
agent_response = self.agent_executor.chat(initial_prompt)
|
| 273 |
+
|
| 274 |
+
if debug:
|
| 275 |
+
self._debug_print_agent_response(agent_response)
|
| 276 |
+
|
| 277 |
+
raw_agent_output = agent_response.response
|
| 278 |
+
acted_upon_urls = []
|
| 279 |
+
history_parts = []
|
| 280 |
+
|
| 281 |
+
agent_sources = getattr(agent_response, 'sources', [])
|
| 282 |
+
for i, source in enumerate(agent_sources):
|
| 283 |
+
tool_name = getattr(source, 'tool_name', 'unknown_tool')
|
| 284 |
+
tool_input = getattr(source, 'raw_input', {})
|
| 285 |
+
observation = getattr(source, 'content', '')
|
| 286 |
+
history_parts.append(
|
| 287 |
+
f"Step {i+1}:\n"
|
| 288 |
+
f"Action: {tool_name}\n"
|
| 289 |
+
f"Action Input: {json.dumps(tool_input)}\n"
|
| 290 |
+
f"Observation: {str(observation)[:1500]}...\n"
|
| 291 |
+
)
|
| 292 |
+
if tool_name in ["visit_web_page", "download_and_read_file"]:
|
| 293 |
+
kwargs = tool_input.get('kwargs', {})
|
| 294 |
+
url = kwargs.get('url')
|
| 295 |
+
if isinstance(url, str) and url.startswith('http'):
|
| 296 |
+
acted_upon_urls.append(url)
|
| 297 |
+
|
| 298 |
+
agent_history = "\n".join(history_parts)
|
| 299 |
+
unique_acted_upon_urls = list(dict.fromkeys(acted_upon_urls))
|
| 300 |
+
acted_upon_urls_context_str = "\n".join([f"- {url}" for url in unique_acted_upon_urls[:20]]) if unique_acted_upon_urls else "No URLs were directly used in 'visit_web_page' or 'download_and_read_file' actions."
|
| 301 |
+
history_for_prompt = agent_history if agent_history else raw_agent_output
|
| 302 |
+
|
| 303 |
+
parsing_prompt = f"""
|
| 304 |
+
Given the following research task, the ReAct agent's final answer, and its detailed execution history,
|
| 305 |
+
extract the information into a structured JSON report. The report must be as detailed as possible,
|
| 306 |
+
comprehensively answering the research task.
|
| 307 |
+
|
| 308 |
+
RESEARCH TASK:
|
| 309 |
+
{research_task}
|
| 310 |
+
|
| 311 |
+
ReAct AGENT'S FINAL ANSWER (Primary basis for your report):
|
| 312 |
+
{raw_agent_output}
|
| 313 |
+
|
| 314 |
+
ReAct AGENT'S DETAILED EXECUTION HISTORY (Use this to extract specific details, code, and verify sources):
|
| 315 |
+
{history_for_prompt}
|
| 316 |
+
|
| 317 |
+
URLs DIRECTLY ACTED UPON BY THE AGENT (High-confidence sources):
|
| 318 |
+
{acted_upon_urls_context_str}
|
| 319 |
+
|
| 320 |
+
Please provide a JSON object with the following keys:
|
| 321 |
+
- "summary": A concise summary that directly answers the research task.
|
| 322 |
+
- "detailed_findings": This MUST be a comprehensive and highly detailed section.
|
| 323 |
+
- Directly address all parts of the research task.
|
| 324 |
+
- If the research task involved finding or generating code, THIS SECTION MUST INCLUDE THE RELEVANT CODE SNIPPETS OR FULL CODE BLOCK(S) as found or synthesized by the agent. Use markdown for code blocks (e.g., ```python ... ```).
|
| 325 |
+
- Include specific facts, figures, explanations, and evidence from the agent's research.
|
| 326 |
+
- Structure the information logically. This should be a single string, potentially long, containing the full detailed report.
|
| 327 |
+
- "sources": A list of unique primary source URLs.
|
| 328 |
+
- Prioritize URLs from the 'URLs DIRECTLY ACTED UPON' list.
|
| 329 |
+
- Supplement with other critical URLs from the AGENT'S FINAL ANSWER or EXECUTION HISTORY from which key information or code was directly obtained.
|
| 330 |
+
- Ensure all listed URLs are complete and valid.
|
| 331 |
+
|
| 332 |
+
Return ONLY the JSON object inside a markdown code block, like this:
|
| 333 |
+
```json
|
| 334 |
+
{{
|
| 335 |
+
"summary": "...",
|
| 336 |
+
"detailed_findings": "...",
|
| 337 |
+
"sources": [...]
|
| 338 |
+
}}
|
| 339 |
+
```
|
| 340 |
+
"""
|
| 341 |
+
# Get structured output from LLM
|
| 342 |
+
try:
|
| 343 |
+
if debug: # Save parsing prompt only if debug is True
|
| 344 |
+
with open("parsing_prompt.txt", "w", encoding='utf-8') as f:
|
| 345 |
+
f.write(parsing_prompt)
|
| 346 |
+
|
| 347 |
+
structured_output_text = self.llm.complete(parsing_prompt).text
|
| 348 |
+
|
| 349 |
+
if debug: # Save structured output only if debug is True
|
| 350 |
+
with open("structured_output.txt", "w", encoding='utf-8') as f:
|
| 351 |
+
f.write(structured_output_text)
|
| 352 |
+
|
| 353 |
+
json_str = None
|
| 354 |
+
# Pattern 1: Look for JSON inside ```json ... ```
|
| 355 |
+
match = re.search(r'```json\s*(\{[\s\S]*\})\s*```', structured_output_text, re.DOTALL)
|
| 356 |
+
if match:
|
| 357 |
+
json_str = match.group(1)
|
| 358 |
+
else:
|
| 359 |
+
# Pattern 2: Be more aggressive and find the first '{' and last '}'
|
| 360 |
+
start_index = structured_output_text.find('{')
|
| 361 |
+
end_index = structured_output_text.rfind('}')
|
| 362 |
+
if start_index != -1 and end_index != -1 and end_index > start_index:
|
| 363 |
+
json_str = structured_output_text[start_index : end_index + 1]
|
| 364 |
+
|
| 365 |
+
if not json_str:
|
| 366 |
+
raise ValueError(f"Could not extract a JSON object from the LLM's response. Raw response: {structured_output_text}")
|
| 367 |
+
|
| 368 |
+
# Clean the extracted string
|
| 369 |
+
json_str = json_str.strip()
|
| 370 |
+
|
| 371 |
+
# Parse the structured output
|
| 372 |
+
structured_output = json.loads(json_str)
|
| 373 |
+
|
| 374 |
+
# Update result with structured data
|
| 375 |
+
result["summary"] = structured_output.get("summary", "")
|
| 376 |
+
|
| 377 |
+
# Handle detailed findings - ensure it's properly formatted
|
| 378 |
+
detailed_findings = structured_output.get("detailed_findings", [])
|
| 379 |
+
if isinstance(detailed_findings, list):
|
| 380 |
+
result["detailed_findings"] = "\n\n".join(detailed_findings)
|
| 381 |
+
else:
|
| 382 |
+
result["detailed_findings"] = str(detailed_findings)
|
| 383 |
+
|
| 384 |
+
# Handle sources - ensure we have URLs
|
| 385 |
+
sources = structured_output.get("sources", [])
|
| 386 |
+
if not sources and unique_acted_upon_urls:
|
| 387 |
+
sources = unique_acted_upon_urls
|
| 388 |
+
result["sources"] = list(dict.fromkeys(sources))
|
| 389 |
+
result["status"] = "success"
|
| 390 |
+
|
| 391 |
+
except (json.JSONDecodeError, ValueError) as json_err:
|
| 392 |
+
result["summary"] = raw_agent_output
|
| 393 |
+
result["detailed_findings"] = agent_history if agent_history else "No detailed history available."
|
| 394 |
+
result["sources"] = unique_acted_upon_urls
|
| 395 |
+
result["status"] = "success_unstructured"
|
| 396 |
+
result["error_message"] = f"Failed to parse agent output into structured JSON: {str(json_err)}"
|
| 397 |
+
|
| 398 |
+
except Exception as e:
|
| 399 |
+
result["error_message"] = str(e)
|
| 400 |
+
if self.verbose or debug:
|
| 401 |
+
traceback.print_exc(file=sys.stdout)
|
| 402 |
+
|
| 403 |
+
return result
|
| 404 |
+
|
| 405 |
+
# Internal tool methods
|
| 406 |
+
def _internal_visit_url(self, url: str) -> str:
|
| 407 |
+
"""
|
| 408 |
+
Internal method for visiting a URL.
|
| 409 |
+
|
| 410 |
+
Args:
|
| 411 |
+
url: The URL to visit
|
| 412 |
+
|
| 413 |
+
Returns:
|
| 414 |
+
String representation of the first chunk of text or error message
|
| 415 |
+
"""
|
| 416 |
+
success, content_or_error = self.visit_url(url)
|
| 417 |
+
if success:
|
| 418 |
+
return content_or_error
|
| 419 |
+
return f"Error visiting URL: {content_or_error}"
|
| 420 |
+
|
| 421 |
+
def _internal_page_down(self) -> str:
|
| 422 |
+
"""
|
| 423 |
+
Internal method for scrolling down the current page.
|
| 424 |
+
|
| 425 |
+
Returns:
|
| 426 |
+
String representation of the next chunk of text or error message
|
| 427 |
+
"""
|
| 428 |
+
content = self.page_down()
|
| 429 |
+
return content if content else "No more content or no page loaded."
|
| 430 |
+
|
| 431 |
+
def _internal_page_up(self) -> str:
|
| 432 |
+
"""
|
| 433 |
+
Internal method for scrolling up the current page.
|
| 434 |
+
|
| 435 |
+
Returns:
|
| 436 |
+
String representation of the previous chunk of text or error message
|
| 437 |
+
"""
|
| 438 |
+
content = self.page_up()
|
| 439 |
+
return content if content else "No previous content or no page loaded."
|
| 440 |
+
|
| 441 |
+
def _internal_get_full_page_text(self) -> str:
|
| 442 |
+
"""
|
| 443 |
+
Internal method for getting the full page text.
|
| 444 |
+
|
| 445 |
+
Returns:
|
| 446 |
+
String representation of the full page text or error message
|
| 447 |
+
"""
|
| 448 |
+
content = self.get_full_page_text()
|
| 449 |
+
return content if content else "No page loaded or no content available."
|
| 450 |
+
|
| 451 |
+
def _internal_search_google(self, query: str, num_results: int = 5) -> str:
|
| 452 |
+
"""
|
| 453 |
+
Internal method for searching Google.
|
| 454 |
+
|
| 455 |
+
Args:
|
| 456 |
+
query: The search query
|
| 457 |
+
num_results: Number of results to return
|
| 458 |
+
|
| 459 |
+
Returns:
|
| 460 |
+
String representation of the search results or error message
|
| 461 |
+
"""
|
| 462 |
+
success, results_or_error = self.search_google(query, num_results)
|
| 463 |
+
if success:
|
| 464 |
+
formatted_results = "\n\n".join([
|
| 465 |
+
f"Title: {r['title']}\nLink: {r['link']}\nSnippet: {r['snippet']}"
|
| 466 |
+
for r in results_or_error
|
| 467 |
+
])
|
| 468 |
+
return f"Google Search Results for '{query}':\n\n{formatted_results}"
|
| 469 |
+
return f"Google Search Error: {results_or_error}"
|
| 470 |
+
|
| 471 |
+
def _internal_search_github(self, query: str, num_results: int = 5, search_type: str = "repositories") -> str:
|
| 472 |
+
"""
|
| 473 |
+
Internal method for searching GitHub.
|
| 474 |
+
|
| 475 |
+
Args:
|
| 476 |
+
query: The search query
|
| 477 |
+
num_results: Number of results to return
|
| 478 |
+
search_type: Type of search ("repositories", "code", "issues")
|
| 479 |
+
|
| 480 |
+
Returns:
|
| 481 |
+
String representation of the search results or error message
|
| 482 |
+
"""
|
| 483 |
+
success, results_or_error = self.search_github(query, num_results, search_type)
|
| 484 |
+
if success:
|
| 485 |
+
if search_type == "repositories":
|
| 486 |
+
formatted_results = "\n\n".join([
|
| 487 |
+
f"Name: {r['name']}\nURL: {r['html_url']}\nDescription: {r.get('description', 'N/A')}\nStars: {r.get('stars', 'N/A')}\nLanguage: {r.get('language', 'N/A')}"
|
| 488 |
+
for r in results_or_error
|
| 489 |
+
])
|
| 490 |
+
elif search_type == "code":
|
| 491 |
+
formatted_results = "\n\n".join([
|
| 492 |
+
f"File: {r.get('name', 'N/A')}\nURL: {r['html_url']}\nRepository: {r.get('repository', 'N/A')}\nPath: {r.get('path', 'N/A')}"
|
| 493 |
+
for r in results_or_error
|
| 494 |
+
])
|
| 495 |
+
elif search_type == "issues":
|
| 496 |
+
formatted_results = "\n\n".join([
|
| 497 |
+
f"Title: {r.get('title', 'N/A')}\nURL: {r['html_url']}\nState: {r.get('state', 'N/A')}\nCreated: {r.get('created_at', 'N/A')}"
|
| 498 |
+
for r in results_or_error
|
| 499 |
+
])
|
| 500 |
+
else:
|
| 501 |
+
formatted_results = str(results_or_error)
|
| 502 |
+
|
| 503 |
+
return f"GitHub Search Results for '{query}' (type: {search_type}):\n\n{formatted_results}"
|
| 504 |
+
return f"GitHub Search Error: {results_or_error}"
|
| 505 |
+
|
| 506 |
+
def _internal_download_and_extract_text(self, url: str, file_type: str = None) -> str:
|
| 507 |
+
"""
|
| 508 |
+
Internal method for downloading and extracting text from a file.
|
| 509 |
+
|
| 510 |
+
Args:
|
| 511 |
+
url: The URL of the file to download
|
| 512 |
+
file_type: Optional file type override ('pdf', 'docx', 'pptx')
|
| 513 |
+
|
| 514 |
+
Returns:
|
| 515 |
+
String representation of the extracted text or error message
|
| 516 |
+
"""
|
| 517 |
+
success, text_or_error = self.download_and_extract_text(url, file_type)
|
| 518 |
+
if success:
|
| 519 |
+
return f"Successfully extracted text from {url}:\n\n{text_or_error}"
|
| 520 |
+
return f"Error extracting text from {url}: {text_or_error}"
|
| 521 |
+
|
| 522 |
+
# Original WebAgent methods
|
| 523 |
+
def visit_url(self, url: str) -> Tuple[bool, str]:
|
| 524 |
+
"""
|
| 525 |
+
Navigates to the given URL, fetches its content, and processes it for text-based browsing.
|
| 526 |
+
|
| 527 |
+
Args:
|
| 528 |
+
url: The URL to visit and load content from
|
| 529 |
+
|
| 530 |
+
Returns:
|
| 531 |
+
A tuple (success, message_or_first_chunk):
|
| 532 |
+
- success: True if the page was loaded successfully, False otherwise
|
| 533 |
+
- message_or_first_chunk: First chunk of text if successful, error message otherwise
|
| 534 |
+
"""
|
| 535 |
+
try:
|
| 536 |
+
# Validate URL format
|
| 537 |
+
parsed_url = urlparse(url)
|
| 538 |
+
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 539 |
+
return False, f"Invalid URL format: {url}"
|
| 540 |
+
|
| 541 |
+
# Configure the web page reader
|
| 542 |
+
# NOTE: SimpleWebPageReader does not support user agent or timeout directly.
|
| 543 |
+
# Only html_to_text is supported as an argument.
|
| 544 |
+
documents = SimpleWebPageReader(html_to_text=True).load_data([url])
|
| 545 |
+
|
| 546 |
+
if not documents:
|
| 547 |
+
return False, f"No content found at URL: {url}"
|
| 548 |
+
|
| 549 |
+
# Update state variables
|
| 550 |
+
self.current_url = url
|
| 551 |
+
self.current_page_documents = documents
|
| 552 |
+
|
| 553 |
+
# Extract and chunk the text content
|
| 554 |
+
full_text = " ".join([doc.text for doc in documents if doc.text])
|
| 555 |
+
self._chunk_text(full_text)
|
| 556 |
+
|
| 557 |
+
# Reset view position
|
| 558 |
+
self.current_chunk_index = 0
|
| 559 |
+
|
| 560 |
+
# Return the first chunk
|
| 561 |
+
if self.current_page_text_chunks:
|
| 562 |
+
return True, self.current_page_text_chunks[0]
|
| 563 |
+
else:
|
| 564 |
+
return False, "URL loaded but no readable text content found"
|
| 565 |
+
|
| 566 |
+
except Exception as e:
|
| 567 |
+
return False, f"Failed to load URL: {str(e)}"
|
| 568 |
+
|
| 569 |
+
def _chunk_text(self, text: str) -> None:
|
| 570 |
+
"""
|
| 571 |
+
Splits the full text into manageable chunks for navigation.
|
| 572 |
+
|
| 573 |
+
Args:
|
| 574 |
+
text: The full text content to chunk
|
| 575 |
+
"""
|
| 576 |
+
# Clear existing chunks
|
| 577 |
+
self.current_page_text_chunks = []
|
| 578 |
+
|
| 579 |
+
if not text:
|
| 580 |
+
return
|
| 581 |
+
|
| 582 |
+
# Split text into paragraphs first (better for readability)
|
| 583 |
+
paragraphs = re.split(r'\n\s*\n', text)
|
| 584 |
+
|
| 585 |
+
current_chunk = ""
|
| 586 |
+
for paragraph in paragraphs:
|
| 587 |
+
paragraph = paragraph.strip()
|
| 588 |
+
if not paragraph:
|
| 589 |
+
continue
|
| 590 |
+
# If adding this paragraph would exceed the chunk size,
|
| 591 |
+
# store the current chunk and start a new one
|
| 592 |
+
if current_chunk and len(current_chunk) + len(paragraph) + 2 > self.chars_per_chunk:
|
| 593 |
+
self.current_page_text_chunks.append(current_chunk.strip())
|
| 594 |
+
current_chunk = ""
|
| 595 |
+
# If the paragraph itself is longer than the chunk size, split it
|
| 596 |
+
while len(paragraph) > self.chars_per_chunk:
|
| 597 |
+
if current_chunk:
|
| 598 |
+
self.current_page_text_chunks.append(current_chunk.strip())
|
| 599 |
+
current_chunk = ""
|
| 600 |
+
self.current_page_text_chunks.append(paragraph[:self.chars_per_chunk].strip())
|
| 601 |
+
paragraph = paragraph[self.chars_per_chunk:]
|
| 602 |
+
# Add the (possibly shortened) paragraph to the current chunk
|
| 603 |
+
if current_chunk:
|
| 604 |
+
current_chunk += "\n\n" + paragraph
|
| 605 |
+
else:
|
| 606 |
+
current_chunk = paragraph
|
| 607 |
+
# Add the last chunk if it's not empty
|
| 608 |
+
if current_chunk.strip():
|
| 609 |
+
self.current_page_text_chunks.append(current_chunk.strip())
|
| 610 |
+
|
| 611 |
+
def get_current_view(self) -> Optional[str]:
|
| 612 |
+
"""
|
| 613 |
+
Returns the current textual "view" of the loaded page based on current_chunk_index.
|
| 614 |
+
|
| 615 |
+
Returns:
|
| 616 |
+
The current text chunk or None if no page is loaded
|
| 617 |
+
"""
|
| 618 |
+
if not self.current_page_text_chunks:
|
| 619 |
+
return None
|
| 620 |
+
|
| 621 |
+
if 0 <= self.current_chunk_index < len(self.current_page_text_chunks):
|
| 622 |
+
return self.current_page_text_chunks[self.current_chunk_index]
|
| 623 |
+
|
| 624 |
+
return None
|
| 625 |
+
|
| 626 |
+
def page_down(self) -> Optional[str]:
|
| 627 |
+
"""
|
| 628 |
+
Simulates scrolling down the loaded text content.
|
| 629 |
+
|
| 630 |
+
Returns:
|
| 631 |
+
The next text chunk or None if no page is loaded or already at the end
|
| 632 |
+
"""
|
| 633 |
+
if not self.current_page_text_chunks:
|
| 634 |
+
return None
|
| 635 |
+
|
| 636 |
+
# Check if already at the end
|
| 637 |
+
if self.current_chunk_index >= len(self.current_page_text_chunks) - 1:
|
| 638 |
+
return "Already at the end of the page.\n\n" + self.current_page_text_chunks[self.current_chunk_index]
|
| 639 |
+
|
| 640 |
+
# Move to the next chunk
|
| 641 |
+
self.current_chunk_index += 1
|
| 642 |
+
return self.current_page_text_chunks[self.current_chunk_index]
|
| 643 |
+
|
| 644 |
+
def page_up(self) -> Optional[str]:
|
| 645 |
+
"""
|
| 646 |
+
Simulates scrolling up the loaded text content.
|
| 647 |
+
|
| 648 |
+
Returns:
|
| 649 |
+
The previous text chunk or None if no page is loaded or already at the beginning
|
| 650 |
+
"""
|
| 651 |
+
if not self.current_page_text_chunks:
|
| 652 |
+
return None
|
| 653 |
+
|
| 654 |
+
# Check if already at the beginning
|
| 655 |
+
if self.current_chunk_index <= 0:
|
| 656 |
+
return "Already at the start of the page.\n\n" + self.current_page_text_chunks[0]
|
| 657 |
+
|
| 658 |
+
# Move to the previous chunk
|
| 659 |
+
self.current_chunk_index -= 1
|
| 660 |
+
return self.current_page_text_chunks[self.current_chunk_index]
|
| 661 |
+
|
| 662 |
+
def get_full_page_text(self) -> Optional[str]:
|
| 663 |
+
"""
|
| 664 |
+
Returns the entire concatenated text content of the currently loaded page.
|
| 665 |
+
|
| 666 |
+
Returns:
|
| 667 |
+
The full text or None if no page is loaded
|
| 668 |
+
"""
|
| 669 |
+
if not self.current_page_text_chunks:
|
| 670 |
+
return None
|
| 671 |
+
|
| 672 |
+
return "\n\n".join(self.current_page_text_chunks)
|
| 673 |
+
|
| 674 |
+
def search_google(
|
| 675 |
+
self, query: str, num_results: Optional[int] = None
|
| 676 |
+
) -> Tuple[bool, Union[List[Dict[str, str]], str]]:
|
| 677 |
+
"""
|
| 678 |
+
Performs a search using Google and returns a list of results.
|
| 679 |
+
|
| 680 |
+
Args:
|
| 681 |
+
query: The search query
|
| 682 |
+
num_results: Number of results to return (uses config default if None)
|
| 683 |
+
|
| 684 |
+
Returns:
|
| 685 |
+
A tuple (success, results_or_error):
|
| 686 |
+
- success: True if search was successful, False otherwise
|
| 687 |
+
- results_or_error: List of search result dictionaries if successful, error message otherwise
|
| 688 |
+
"""
|
| 689 |
+
if not self._google_search_service or not self.google_cx_id:
|
| 690 |
+
return False, "Google Search API not configured. Please provide google_api_key and google_cx_id."
|
| 691 |
+
|
| 692 |
+
try:
|
| 693 |
+
# Get search configuration
|
| 694 |
+
search_config = get_config("SEARCH_CONFIG")
|
| 695 |
+
|
| 696 |
+
# Use config for default num_results if not specified
|
| 697 |
+
if num_results is None:
|
| 698 |
+
num_results = search_config.get("DEFAULT_SEARCH_RESULTS", 5)
|
| 699 |
+
|
| 700 |
+
# Ensure num_results is within acceptable range
|
| 701 |
+
num_results = min(max(1, num_results), search_config.get("MAX_SEARCH_RESULTS", 10))
|
| 702 |
+
|
| 703 |
+
# Execute the search
|
| 704 |
+
search_results = self._google_search_service.cse().list(
|
| 705 |
+
q=query,
|
| 706 |
+
cx=self.google_cx_id,
|
| 707 |
+
num=num_results
|
| 708 |
+
).execute()
|
| 709 |
+
|
| 710 |
+
# Process and format the results
|
| 711 |
+
formatted_results = []
|
| 712 |
+
|
| 713 |
+
if 'items' in search_results:
|
| 714 |
+
for item in search_results['items']:
|
| 715 |
+
result = {
|
| 716 |
+
'title': item.get('title', 'No title'),
|
| 717 |
+
'link': item.get('link', 'No link'),
|
| 718 |
+
'snippet': item.get('snippet', 'No description')
|
| 719 |
+
}
|
| 720 |
+
formatted_results.append(result)
|
| 721 |
+
|
| 722 |
+
return True, formatted_results
|
| 723 |
+
|
| 724 |
+
except HttpError as e:
|
| 725 |
+
return False, f"Google Search API error: {str(e)}"
|
| 726 |
+
except Exception as e:
|
| 727 |
+
return False, f"Error performing Google search: {str(e)}"
|
| 728 |
+
|
| 729 |
+
def search_github(
|
| 730 |
+
self, query: str, num_results: Optional[int] = None, search_type: str = "repositories"
|
| 731 |
+
) -> Tuple[bool, Union[List[Dict[str, Any]], str]]:
|
| 732 |
+
"""
|
| 733 |
+
Performs a search on GitHub for repositories, code, or issues.
|
| 734 |
+
|
| 735 |
+
Args:
|
| 736 |
+
query: The search query
|
| 737 |
+
num_results: Number of results to return (uses config default if None)
|
| 738 |
+
search_type: Type of search ("repositories", "code", "issues")
|
| 739 |
+
|
| 740 |
+
Returns:
|
| 741 |
+
A tuple (success, results_or_error):
|
| 742 |
+
- success: True if search was successful, False otherwise
|
| 743 |
+
- results_or_error: List of search result dictionaries if successful, error message otherwise
|
| 744 |
+
"""
|
| 745 |
+
if not self._github_client:
|
| 746 |
+
return False, "GitHub API not configured. Please provide github_token."
|
| 747 |
+
|
| 748 |
+
try:
|
| 749 |
+
# Get search configuration
|
| 750 |
+
search_config = get_config("SEARCH_CONFIG")
|
| 751 |
+
|
| 752 |
+
# Use config for default num_results if not specified
|
| 753 |
+
if num_results is None:
|
| 754 |
+
num_results = search_config.get("DEFAULT_SEARCH_RESULTS", 5)
|
| 755 |
+
|
| 756 |
+
# Ensure num_results is within acceptable range
|
| 757 |
+
num_results = min(max(1, num_results), search_config.get("MAX_SEARCH_RESULTS", 10))
|
| 758 |
+
|
| 759 |
+
formatted_results = []
|
| 760 |
+
|
| 761 |
+
# Execute search based on search_type
|
| 762 |
+
if search_type == "repositories":
|
| 763 |
+
search_results = self._github_client.search_repositories(query)
|
| 764 |
+
|
| 765 |
+
for i, repo in enumerate(search_results):
|
| 766 |
+
if i >= num_results:
|
| 767 |
+
break
|
| 768 |
+
|
| 769 |
+
result = {
|
| 770 |
+
'name': repo.full_name,
|
| 771 |
+
'html_url': repo.html_url,
|
| 772 |
+
'description': repo.description or "No description",
|
| 773 |
+
'stars': repo.stargazers_count,
|
| 774 |
+
'language': repo.language or "Not specified"
|
| 775 |
+
}
|
| 776 |
+
formatted_results.append(result)
|
| 777 |
+
|
| 778 |
+
elif search_type == "code":
|
| 779 |
+
search_results = self._github_client.search_code(query)
|
| 780 |
+
|
| 781 |
+
for i, code_result in enumerate(search_results):
|
| 782 |
+
if i >= num_results:
|
| 783 |
+
break
|
| 784 |
+
|
| 785 |
+
result = {
|
| 786 |
+
'name': code_result.name,
|
| 787 |
+
'html_url': code_result.html_url,
|
| 788 |
+
'repository': code_result.repository.full_name,
|
| 789 |
+
'path': code_result.path
|
| 790 |
+
}
|
| 791 |
+
formatted_results.append(result)
|
| 792 |
+
|
| 793 |
+
elif search_type == "issues":
|
| 794 |
+
search_results = self._github_client.search_issues(query)
|
| 795 |
+
|
| 796 |
+
for i, issue in enumerate(search_results):
|
| 797 |
+
if i >= num_results:
|
| 798 |
+
break
|
| 799 |
+
|
| 800 |
+
result = {
|
| 801 |
+
'title': issue.title,
|
| 802 |
+
'html_url': issue.html_url,
|
| 803 |
+
'state': issue.state,
|
| 804 |
+
'created_at': str(issue.created_at),
|
| 805 |
+
'repository': issue.repository.full_name
|
| 806 |
+
}
|
| 807 |
+
formatted_results.append(result)
|
| 808 |
+
|
| 809 |
+
else:
|
| 810 |
+
return False, f"Invalid search_type: {search_type}. Must be 'repositories', 'code', or 'issues'."
|
| 811 |
+
|
| 812 |
+
return True, formatted_results
|
| 813 |
+
|
| 814 |
+
except GithubException as e:
|
| 815 |
+
return False, f"GitHub API error: {str(e)}"
|
| 816 |
+
except Exception as e:
|
| 817 |
+
return False, f"Error performing GitHub search: {str(e)}"
|
| 818 |
+
|
| 819 |
+
def download_file(self, url: str, desired_filename: Optional[str] = None) -> Tuple[bool, str]:
|
| 820 |
+
"""
|
| 821 |
+
Downloads a file from the given URL to the temp_downloads directory.
|
| 822 |
+
|
| 823 |
+
Args:
|
| 824 |
+
url: The URL of the file to download
|
| 825 |
+
desired_filename: Optional custom filename for the downloaded file
|
| 826 |
+
|
| 827 |
+
Returns:
|
| 828 |
+
A tuple (success, filepath_or_error_message):
|
| 829 |
+
- success: True if download was successful, False otherwise
|
| 830 |
+
- filepath_or_error_message: Path to downloaded file if successful, error message otherwise
|
| 831 |
+
"""
|
| 832 |
+
try:
|
| 833 |
+
# Validate URL format
|
| 834 |
+
parsed_url = urlparse(url)
|
| 835 |
+
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 836 |
+
return False, f"Invalid URL format: {url}"
|
| 837 |
+
|
| 838 |
+
# Determine filename
|
| 839 |
+
if desired_filename:
|
| 840 |
+
filename = desired_filename
|
| 841 |
+
else:
|
| 842 |
+
# Try to extract filename from URL
|
| 843 |
+
url_path = unquote(parsed_url.path)
|
| 844 |
+
filename_from_url = os.path.basename(url_path)
|
| 845 |
+
|
| 846 |
+
if filename_from_url and '.' in filename_from_url:
|
| 847 |
+
# Use filename from URL if it has an extension
|
| 848 |
+
filename = filename_from_url
|
| 849 |
+
else:
|
| 850 |
+
# Generate a unique filename with URL netloc as prefix
|
| 851 |
+
unique_id = str(uuid.uuid4())[:8]
|
| 852 |
+
host = parsed_url.netloc.split('.')[0] if '.' in parsed_url.netloc else 'download'
|
| 853 |
+
filename = f"{host}_{unique_id}"
|
| 854 |
+
|
| 855 |
+
# Try to determine file extension from Content-Type header
|
| 856 |
+
head_response = requests.head(url, timeout=self.request_timeout,
|
| 857 |
+
allow_redirects=True,
|
| 858 |
+
headers={"User-Agent": self.user_agent})
|
| 859 |
+
content_type = head_response.headers.get('Content-Type', '')
|
| 860 |
+
|
| 861 |
+
# Map common MIME types to file extensions
|
| 862 |
+
extension_map = {
|
| 863 |
+
'application/pdf': '.pdf',
|
| 864 |
+
'application/msword': '.doc',
|
| 865 |
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
| 866 |
+
'application/vnd.ms-powerpoint': '.ppt',
|
| 867 |
+
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
| 868 |
+
'application/vnd.ms-excel': '.xls',
|
| 869 |
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
| 870 |
+
'text/plain': '.txt',
|
| 871 |
+
'text/html': '.html',
|
| 872 |
+
'text/csv': '.csv',
|
| 873 |
+
'image/jpeg': '.jpg',
|
| 874 |
+
'image/png': '.png',
|
| 875 |
+
'image/gif': '.gif',
|
| 876 |
+
'application/json': '.json',
|
| 877 |
+
'application/xml': '.xml',
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
# Extract the main content type
|
| 881 |
+
main_type = content_type.split(';')[0].strip()
|
| 882 |
+
extension = extension_map.get(main_type, '')
|
| 883 |
+
|
| 884 |
+
if extension:
|
| 885 |
+
filename += extension
|
| 886 |
+
elif 'Content-Disposition' in head_response.headers:
|
| 887 |
+
# Try to extract filename from Content-Disposition
|
| 888 |
+
content_disp = head_response.headers['Content-Disposition']
|
| 889 |
+
if 'filename=' in content_disp:
|
| 890 |
+
extracted_name = re.findall('filename=(.+)', content_disp)
|
| 891 |
+
if extracted_name:
|
| 892 |
+
extracted_name = extracted_name[0].strip('"\'')
|
| 893 |
+
if '.' in extracted_name:
|
| 894 |
+
filename = extracted_name
|
| 895 |
+
|
| 896 |
+
# Ensure the filename is safe and doesn't contain invalid characters
|
| 897 |
+
filename = "".join([c for c in filename if c.isalnum() or c in '._- ']).strip()
|
| 898 |
+
if not filename:
|
| 899 |
+
filename = f"download_{str(uuid.uuid4())[:8]}"
|
| 900 |
+
|
| 901 |
+
# Generate full filepath
|
| 902 |
+
filepath = os.path.join(self.temp_download_dir, filename)
|
| 903 |
+
|
| 904 |
+
# Download the file with a streaming approach to handle large files
|
| 905 |
+
with requests.get(url, stream=True, timeout=self.request_timeout,
|
| 906 |
+
headers={"User-Agent": self.user_agent},
|
| 907 |
+
verify=self.verify_ssl) as response:
|
| 908 |
+
response.raise_for_status() # Raise exception for 4XX/5XX responses
|
| 909 |
+
|
| 910 |
+
with open(filepath, 'wb') as file:
|
| 911 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 912 |
+
if chunk:
|
| 913 |
+
file.write(chunk)
|
| 914 |
+
|
| 915 |
+
return True, filepath
|
| 916 |
+
|
| 917 |
+
except RequestException as e:
|
| 918 |
+
return False, f"Request error when downloading file: {str(e)}"
|
| 919 |
+
except IOError as e:
|
| 920 |
+
return False, f"I/O error when saving file: {str(e)}"
|
| 921 |
+
except Exception as e:
|
| 922 |
+
return False, f"Unexpected error when downloading file: {str(e)}"
|
| 923 |
+
|
| 924 |
+
def read_pdf_text(self, filepath: str) -> Tuple[bool, str]:
|
| 925 |
+
"""
|
| 926 |
+
Extracts text content from a PDF file using the utility function.
|
| 927 |
+
|
| 928 |
+
Args:
|
| 929 |
+
filepath: Path to the PDF file
|
| 930 |
+
|
| 931 |
+
Returns:
|
| 932 |
+
A tuple (success, text_or_error_message)
|
| 933 |
+
"""
|
| 934 |
+
return read_pdf_text(filepath)
|
| 935 |
+
|
| 936 |
+
def read_docx_text(self, filepath: str) -> Tuple[bool, str]:
|
| 937 |
+
"""
|
| 938 |
+
Extracts text content from a DOCX file using the utility function.
|
| 939 |
+
|
| 940 |
+
Args:
|
| 941 |
+
filepath: Path to the DOCX file
|
| 942 |
+
|
| 943 |
+
Returns:
|
| 944 |
+
A tuple (success, text_or_error_message)
|
| 945 |
+
"""
|
| 946 |
+
return read_docx_text(filepath)
|
| 947 |
+
|
| 948 |
+
def read_pptx_text(self, filepath: str) -> Tuple[bool, str]:
|
| 949 |
+
"""
|
| 950 |
+
Extracts text content from a PPTX file using the utility function.
|
| 951 |
+
|
| 952 |
+
Args:
|
| 953 |
+
filepath: Path to the PPTX file
|
| 954 |
+
|
| 955 |
+
Returns:
|
| 956 |
+
A tuple (success, text_or_error_message)
|
| 957 |
+
"""
|
| 958 |
+
return read_pptx_text(filepath)
|
| 959 |
+
|
| 960 |
+
def clean_temp_files(self, filepath: Optional[str] = None) -> bool:
|
| 961 |
+
"""
|
| 962 |
+
Cleans up temporary downloaded files.
|
| 963 |
+
|
| 964 |
+
Args:
|
| 965 |
+
filepath: Optional specific file to delete. If None, all files in temp_download_dir are deleted
|
| 966 |
+
|
| 967 |
+
Returns:
|
| 968 |
+
True if cleaning was successful, False otherwise
|
| 969 |
+
"""
|
| 970 |
+
try:
|
| 971 |
+
if filepath:
|
| 972 |
+
# Delete a specific file
|
| 973 |
+
if os.path.exists(filepath) and os.path.isfile(filepath):
|
| 974 |
+
os.remove(filepath)
|
| 975 |
+
else:
|
| 976 |
+
# Delete all files in the temp_download_dir
|
| 977 |
+
for filename in os.listdir(self.temp_download_dir):
|
| 978 |
+
filepath = os.path.join(self.temp_download_dir, filename)
|
| 979 |
+
if os.path.isfile(filepath):
|
| 980 |
+
os.remove(filepath)
|
| 981 |
+
return True
|
| 982 |
+
except Exception as e:
|
| 983 |
+
print(f"Warning: Failed to clean temporary files: {str(e)}")
|
| 984 |
+
return False
|
| 985 |
+
|
| 986 |
+
def download_and_extract_text(self, url: str, file_type: str = None) -> Tuple[bool, str]:
|
| 987 |
+
"""
|
| 988 |
+
Downloads a file and extracts its text content based on file type.
|
| 989 |
+
|
| 990 |
+
Args:
|
| 991 |
+
url: The URL of the file to download
|
| 992 |
+
file_type: Optional file type override ('pdf', 'docx', 'pptx')
|
| 993 |
+
|
| 994 |
+
Returns:
|
| 995 |
+
A tuple (success, text_or_error_message):
|
| 996 |
+
- success: True if operation was successful, False otherwise
|
| 997 |
+
- text_or_error_message: Extracted text if successful, error message otherwise
|
| 998 |
+
"""
|
| 999 |
+
try:
|
| 1000 |
+
# Download the file
|
| 1001 |
+
download_success, download_result = self.download_file(url)
|
| 1002 |
+
|
| 1003 |
+
if not download_success:
|
| 1004 |
+
return False, download_result
|
| 1005 |
+
|
| 1006 |
+
filepath = download_result
|
| 1007 |
+
|
| 1008 |
+
try:
|
| 1009 |
+
# Determine file type if not specified
|
| 1010 |
+
if not file_type:
|
| 1011 |
+
file_type = detect_file_type(filepath)
|
| 1012 |
+
if not file_type:
|
| 1013 |
+
return False, f"Unsupported file type: {os.path.splitext(filepath)[1]}"
|
| 1014 |
+
|
| 1015 |
+
# Extract text based on file type
|
| 1016 |
+
if file_type == 'pdf':
|
| 1017 |
+
success, result = self.read_pdf_text(filepath)
|
| 1018 |
+
elif file_type == 'docx':
|
| 1019 |
+
success, result = self.read_docx_text(filepath)
|
| 1020 |
+
elif file_type == 'pptx':
|
| 1021 |
+
success, result = self.read_pptx_text(filepath)
|
| 1022 |
+
else:
|
| 1023 |
+
success = False
|
| 1024 |
+
result = f"Unsupported file type: {file_type}"
|
| 1025 |
+
|
| 1026 |
+
# Clean up the temporary file
|
| 1027 |
+
self.clean_temp_files(filepath)
|
| 1028 |
+
|
| 1029 |
+
return success, result
|
| 1030 |
+
|
| 1031 |
+
except Exception as e:
|
| 1032 |
+
# Clean up in case of error
|
| 1033 |
+
self.clean_temp_files(filepath)
|
| 1034 |
+
return False, f"Error processing file: {str(e)}"
|
| 1035 |
+
|
| 1036 |
+
except Exception as e:
|
| 1037 |
+
return False, f"Unexpected error: {str(e)}"
|
| 1038 |
+
|
| 1039 |
+
def _debug_print_agent_response(self, agent_response, max_str_length=1000):
|
| 1040 |
+
"""
|
| 1041 |
+
Debug utility to print all components of the agent response object.
|
| 1042 |
+
|
| 1043 |
+
Args:
|
| 1044 |
+
agent_response: The agent response object to inspect
|
| 1045 |
+
max_str_length: Maximum length for string values to display
|
| 1046 |
+
"""
|
| 1047 |
+
def _format_value(value, level=0):
|
| 1048 |
+
indent = " " * level
|
| 1049 |
+
if value is None:
|
| 1050 |
+
return "None"
|
| 1051 |
+
elif isinstance(value, str):
|
| 1052 |
+
if len(value) > max_str_length:
|
| 1053 |
+
return f"'{value[:max_str_length]}... [truncated, total length: {len(value)}]'"
|
| 1054 |
+
return f"'{value}'"
|
| 1055 |
+
elif isinstance(value, (int, float, bool)):
|
| 1056 |
+
return str(value)
|
| 1057 |
+
elif isinstance(value, list):
|
| 1058 |
+
if not value:
|
| 1059 |
+
return "[]"
|
| 1060 |
+
result = "[\n"
|
| 1061 |
+
for i, item in enumerate(value[:5]): # Limit to first 5 items
|
| 1062 |
+
result += f"{indent} [{i}]: {_format_value(item, level+1)},\n"
|
| 1063 |
+
if len(value) > 5:
|
| 1064 |
+
result += f"{indent} ... {len(value) - 5} more items\n"
|
| 1065 |
+
result += f"{indent}]"
|
| 1066 |
+
return result
|
| 1067 |
+
elif isinstance(value, dict):
|
| 1068 |
+
if not value:
|
| 1069 |
+
return "{}"
|
| 1070 |
+
result = "{\n"
|
| 1071 |
+
for i, (k, v) in enumerate(list(value.items())[:5]): # Limit to first 5 items
|
| 1072 |
+
result += f"{indent} '{k}': {_format_value(v, level+1)},\n"
|
| 1073 |
+
if len(value) > 5:
|
| 1074 |
+
result += f"{indent} ... {len(value) - 5} more items\n"
|
| 1075 |
+
result += f"{indent}}}"
|
| 1076 |
+
return result
|
| 1077 |
+
else:
|
| 1078 |
+
try:
|
| 1079 |
+
return f"{type(value).__name__}: {str(value)}"
|
| 1080 |
+
except:
|
| 1081 |
+
return f"{type(value).__name__}: [Error displaying value]"
|
| 1082 |
+
|
| 1083 |
+
print("\n" + "="*80)
|
| 1084 |
+
print("AGENT RESPONSE OBJECT STRUCTURE")
|
| 1085 |
+
print("="*80)
|
| 1086 |
+
|
| 1087 |
+
# Print type information
|
| 1088 |
+
print(f"Type: {type(agent_response).__name__}")
|
| 1089 |
+
|
| 1090 |
+
# Get all attributes
|
| 1091 |
+
if hasattr(agent_response, "__dict__"):
|
| 1092 |
+
attributes = vars(agent_response)
|
| 1093 |
+
print(f"\nAttributes ({len(attributes)}):")
|
| 1094 |
+
|
| 1095 |
+
for attr_name, attr_value in attributes.items():
|
| 1096 |
+
print(f"\n- {attr_name} ({type(attr_value).__name__}):")
|
| 1097 |
+
print(f" {_format_value(attr_value, 1)}")
|
| 1098 |
+
else:
|
| 1099 |
+
print("\nNo __dict__ attribute available.")
|
| 1100 |
+
|
| 1101 |
+
# Special handling for common ReActAgent components
|
| 1102 |
+
special_attributes = [
|
| 1103 |
+
"response", "step_history", "source_nodes", "intermediate_steps",
|
| 1104 |
+
"raw_response", "message", "chat_history"
|
| 1105 |
+
]
|
| 1106 |
+
|
| 1107 |
+
print("\nCommon ReActAgent Components:")
|
| 1108 |
+
for attr in special_attributes:
|
| 1109 |
+
if hasattr(agent_response, attr):
|
| 1110 |
+
value = getattr(agent_response, attr)
|
| 1111 |
+
print(f"\n- {attr} ({type(value).__name__}):")
|
| 1112 |
+
print(f" {_format_value(value, 1)}")
|
| 1113 |
+
|
| 1114 |
+
# Special handling for step_history if it exists
|
| 1115 |
+
if hasattr(agent_response, "step_history") and agent_response.step_history:
|
| 1116 |
+
print("\nDetailed Step History:")
|
| 1117 |
+
for i, step in enumerate(agent_response.step_history):
|
| 1118 |
+
print(f"\nStep {i+1}:")
|
| 1119 |
+
for step_attr in ["thought", "action", "action_input", "observation"]:
|
| 1120 |
+
if hasattr(step, step_attr):
|
| 1121 |
+
step_value = getattr(step, step_attr)
|
| 1122 |
+
print(f" - {step_attr}: {_format_value(step_value, 2)}")
|
| 1123 |
+
|
| 1124 |
+
print("\n" + "="*80)
|
| 1125 |
+
|
| 1126 |
+
|
| 1127 |
+
if __name__ == "__main__":
|
| 1128 |
+
"""
|
| 1129 |
+
Test mode for the WebAgent when executed directly.
|
| 1130 |
+
This demonstrates the autonomous research capabilities.
|
| 1131 |
+
"""
|
| 1132 |
+
import sys
|
| 1133 |
+
import argparse
|
| 1134 |
+
import json
|
| 1135 |
+
import traceback
|
| 1136 |
+
from datetime import datetime
|
| 1137 |
+
from pprint import pprint
|
| 1138 |
+
from llama_index.llms.anthropic import Anthropic
|
| 1139 |
+
|
| 1140 |
+
# ANSI color codes
|
| 1141 |
+
COLOR_RESET = "\033[0m"
|
| 1142 |
+
COLOR_YELLOW = "\033[93m"
|
| 1143 |
+
COLOR_CYAN = "\033[96m"
|
| 1144 |
+
COLOR_GREEN = "\033[92m"
|
| 1145 |
+
COLOR_RED = "\033[91m"
|
| 1146 |
+
COLOR_BLUE = "\033[94m"
|
| 1147 |
+
COLOR_MAGENTA = "\033[95m"
|
| 1148 |
+
COLOR_GRAY = "\033[90m"
|
| 1149 |
+
|
| 1150 |
+
def color_text(text, color):
|
| 1151 |
+
return f"{color}{text}{COLOR_RESET}"
|
| 1152 |
+
|
| 1153 |
+
# Create a custom log writer that will write to both console and file
|
| 1154 |
+
class TeeWriter:
|
| 1155 |
+
def __init__(self, file_path):
|
| 1156 |
+
self.terminal = sys.stdout
|
| 1157 |
+
self.file = open(file_path, 'w', encoding='utf-8')
|
| 1158 |
+
self.log_buffer = [] # Buffer to store log content if needed elsewhere
|
| 1159 |
+
|
| 1160 |
+
def write(self, message):
|
| 1161 |
+
self.terminal.write(message)
|
| 1162 |
+
# Strip ANSI color codes for file output
|
| 1163 |
+
clean_message = re.sub(r'\033\[\d+m', '', message)
|
| 1164 |
+
self.file.write(clean_message)
|
| 1165 |
+
self.log_buffer.append(clean_message) # Store clean message
|
| 1166 |
+
|
| 1167 |
+
def flush(self):
|
| 1168 |
+
self.terminal.flush()
|
| 1169 |
+
self.file.flush()
|
| 1170 |
+
|
| 1171 |
+
def close(self):
|
| 1172 |
+
self.file.close()
|
| 1173 |
+
|
| 1174 |
+
def get_log_content(self): # Method to retrieve buffered log
|
| 1175 |
+
return "".join(self.log_buffer)
|
| 1176 |
+
|
| 1177 |
+
parser = argparse.ArgumentParser(description="WebAgent Research Tool")
|
| 1178 |
+
parser.add_argument("--query", "-q", type=str, help="Research query to process")
|
| 1179 |
+
parser.add_argument("--model", "-m", type=str, default="claude-3-5-sonnet-latest",
|
| 1180 |
+
help="Anthropic model to use (default: claude-3-haiku-20240307)")
|
| 1181 |
+
parser.add_argument("--verbose", "-v", action="store_true", default=True, help="Enable verbose output")
|
| 1182 |
+
parser.add_argument("--debug", "-d", action="store_true", default=True, help="Enable debug output with full agent response structure")
|
| 1183 |
+
parser.add_argument("--log-file", "-l", type=str, help="Log file name (default: research_log_TIMESTAMP.txt)")
|
| 1184 |
+
args = parser.parse_args()
|
| 1185 |
+
|
| 1186 |
+
# Create log file name with timestamp if not provided
|
| 1187 |
+
if not args.log_file:
|
| 1188 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1189 |
+
log_file = f"research_log_{timestamp}.txt"
|
| 1190 |
+
else:
|
| 1191 |
+
log_file = args.log_file
|
| 1192 |
+
|
| 1193 |
+
# Set up the TeeWriter to capture all output
|
| 1194 |
+
tee = TeeWriter(log_file)
|
| 1195 |
+
original_stdout = sys.stdout # Save the original stdout
|
| 1196 |
+
sys.stdout = tee # Redirect stdout to TeeWriter
|
| 1197 |
+
|
| 1198 |
+
print(color_text("WebAgent Research Tool", COLOR_CYAN))
|
| 1199 |
+
print(color_text("=====================", COLOR_CYAN) + "\n")
|
| 1200 |
+
print(color_text(f"All output will be logged to: {log_file}", COLOR_GREEN))
|
| 1201 |
+
|
| 1202 |
+
# Check if Anthropic API key is available
|
| 1203 |
+
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
|
| 1204 |
+
if not anthropic_key:
|
| 1205 |
+
api_config = get_config("API_CONFIG")
|
| 1206 |
+
anthropic_key = api_config.get("ANTHROPIC_API_KEY", "")
|
| 1207 |
+
if not anthropic_key:
|
| 1208 |
+
print(color_text("Error: ANTHROPIC_API_KEY not found in environment variables or config.", COLOR_RED))
|
| 1209 |
+
print("Please set your Anthropic API key with:")
|
| 1210 |
+
print(" export ANTHROPIC_API_KEY='your-api-key'")
|
| 1211 |
+
print(" or add it to api_keys.py")
|
| 1212 |
+
sys.stdout = original_stdout # Restore original stdout before exit
|
| 1213 |
+
tee.close()
|
| 1214 |
+
sys.exit(1)
|
| 1215 |
+
|
| 1216 |
+
# Initialize the LLM
|
| 1217 |
+
try:
|
| 1218 |
+
print(color_text(f"Initializing Anthropic Claude (model: {args.model})...", COLOR_CYAN))
|
| 1219 |
+
llm = Anthropic(model=args.model, api_key=anthropic_key, max_tokens=8192) # Increased max_tokens
|
| 1220 |
+
except Exception as e:
|
| 1221 |
+
print(color_text(f"Error initializing Anthropic LLM: {str(e)}", COLOR_RED))
|
| 1222 |
+
sys.stdout = original_stdout # Restore original stdout before exit
|
| 1223 |
+
tee.close()
|
| 1224 |
+
sys.exit(1)
|
| 1225 |
+
|
| 1226 |
+
# Create WebAgent instance
|
| 1227 |
+
try:
|
| 1228 |
+
print(color_text("Creating WebAgent...", COLOR_CYAN))
|
| 1229 |
+
agent = WebAgent(
|
| 1230 |
+
llm=llm,
|
| 1231 |
+
verbose=args.verbose # Pass verbose flag to WebAgent
|
| 1232 |
+
)
|
| 1233 |
+
except Exception as e:
|
| 1234 |
+
print(color_text(f"Error creating WebAgent: {str(e)}", COLOR_RED))
|
| 1235 |
+
sys.stdout = original_stdout # Restore original stdout before exit
|
| 1236 |
+
tee.close()
|
| 1237 |
+
sys.exit(1)
|
| 1238 |
+
|
| 1239 |
+
# Display configuration status
|
| 1240 |
+
print(color_text("\nConfiguration:", COLOR_CYAN))
|
| 1241 |
+
print(f" LLM Model: Anthropic {args.model}")
|
| 1242 |
+
print(f" Verbose Mode: {'Enabled' if args.verbose else 'Disabled'}")
|
| 1243 |
+
print(f" Debug Mode: {'Enabled' if args.debug else 'Disabled'}")
|
| 1244 |
+
print(f" Google Search: {'Configured' if agent.google_api_key and agent.google_cx_id else 'Not configured'}")
|
| 1245 |
+
print(f" GitHub: {'Configured' if agent.github_token else 'Not configured'}")
|
| 1246 |
+
print()
|
| 1247 |
+
|
| 1248 |
+
# If query was provided as argument, use it
|
| 1249 |
+
if args.query:
|
| 1250 |
+
query = args.query
|
| 1251 |
+
else:
|
| 1252 |
+
# Otherwise prompt for research task
|
| 1253 |
+
query = input(color_text("Enter research task: ", COLOR_CYAN)).strip()
|
| 1254 |
+
|
| 1255 |
+
if not query:
|
| 1256 |
+
print(color_text("Error: No research task provided.", COLOR_RED))
|
| 1257 |
+
sys.stdout = original_stdout # Restore original stdout before exit
|
| 1258 |
+
tee.close()
|
| 1259 |
+
sys.exit(1)
|
| 1260 |
+
|
| 1261 |
+
print(color_text(f"\nResearch Task: ", COLOR_CYAN) + color_text(query, COLOR_YELLOW))
|
| 1262 |
+
print(color_text("\nStarting autonomous research. This may take some time...", COLOR_CYAN))
|
| 1263 |
+
|
| 1264 |
+
try:
|
| 1265 |
+
# Perform the research
|
| 1266 |
+
report = agent.research(query, debug=args.debug)
|
| 1267 |
+
|
| 1268 |
+
print(color_text("\n=============== STRUCTURED JSON REPORT ===============", COLOR_MAGENTA))
|
| 1269 |
+
# Pretty print the JSON report
|
| 1270 |
+
print(color_text(json.dumps(report, indent=4, ensure_ascii=False), COLOR_GRAY))
|
| 1271 |
+
|
| 1272 |
+
print(color_text("\n=============== FORMATTED RESEARCH REPORT ===============", COLOR_GREEN))
|
| 1273 |
+
if report["status"] == "success":
|
| 1274 |
+
print(color_text("SUMMARY:", COLOR_YELLOW))
|
| 1275 |
+
print(report["summary"])
|
| 1276 |
+
print("\n")
|
| 1277 |
+
print(color_text("DETAILED FINDINGS:", COLOR_YELLOW))
|
| 1278 |
+
|
| 1279 |
+
# Format detailed findings section more clearly
|
| 1280 |
+
detailed_findings = report["detailed_findings"]
|
| 1281 |
+
if isinstance(detailed_findings, list):
|
| 1282 |
+
for finding in detailed_findings:
|
| 1283 |
+
print(f"• {finding}")
|
| 1284 |
+
else:
|
| 1285 |
+
# If it's a string, try to split by newlines or periods to create bullet points
|
| 1286 |
+
findings_text = str(detailed_findings)
|
| 1287 |
+
if "\n\n" in findings_text:
|
| 1288 |
+
findings_list = [f.strip() for f in findings_text.split("\n\n") if f.strip()]
|
| 1289 |
+
for finding in findings_list:
|
| 1290 |
+
print(f"• {finding}")
|
| 1291 |
+
else:
|
| 1292 |
+
print(findings_text)
|
| 1293 |
+
|
| 1294 |
+
print("\n")
|
| 1295 |
+
print(color_text("SOURCES:", COLOR_YELLOW))
|
| 1296 |
+
if report["sources"] and len(report["sources"]) > 0:
|
| 1297 |
+
for source in report["sources"]:
|
| 1298 |
+
print(f"• {source}")
|
| 1299 |
+
else:
|
| 1300 |
+
print("No sources identified")
|
| 1301 |
+
|
| 1302 |
+
elif report["status"] == "success_unstructured":
|
| 1303 |
+
print(color_text("Research completed, but output could not be structured.", COLOR_YELLOW))
|
| 1304 |
+
print("\n")
|
| 1305 |
+
print(color_text("AGENT'S FULL TRACE:", COLOR_YELLOW))
|
| 1306 |
+
print(report["detailed_findings"])
|
| 1307 |
+
|
| 1308 |
+
else: # failure
|
| 1309 |
+
print(color_text(f"Research completed with status: {report['status']}", COLOR_YELLOW))
|
| 1310 |
+
if report["error_message"]:
|
| 1311 |
+
print(color_text(f"Error: {report['error_message']}", COLOR_RED))
|
| 1312 |
+
print(report["detailed_findings"])
|
| 1313 |
+
|
| 1314 |
+
print(color_text("===============================================", COLOR_GREEN))
|
| 1315 |
+
print(color_text(f"\nResearch log saved to: {log_file}", COLOR_GREEN))
|
| 1316 |
+
|
| 1317 |
+
except Exception as e:
|
| 1318 |
+
print(color_text(f"Error during research: {str(e)}", COLOR_RED))
|
| 1319 |
+
traceback.print_exc()
|
| 1320 |
+
|
| 1321 |
+
finally:
|
| 1322 |
+
# Restore original stdout and close log file
|
| 1323 |
+
sys.stdout = original_stdout # Restore original stdout
|
| 1324 |
+
tee.close()
|
| 1325 |
+
# Print final message to original stdout after tee is closed
|
| 1326 |
+
original_stdout.write(f"Research log saved to: {log_file}\n")
|
components/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .Web_agent_related.web_agent import WebAgent
|
| 2 |
+
from .mcp_registry.registry import Registry
|
| 3 |
+
from .code_generator_loop.mcp_brainstormer import Brainstormer
|
| 4 |
+
from .code_generator_loop.script_generator import ScriptGenerator
|
| 5 |
+
from .code_generator_loop.code_runner import CodeRunner
|
| 6 |
+
from .mcp_registry.use_registry_tool import BasicMCPClient
|
| 7 |
+
|
| 8 |
+
# This makes the components available when importing the package
|
| 9 |
+
__all__ = [
|
| 10 |
+
|
| 11 |
+
"WebAgent",
|
| 12 |
+
"Registry",
|
| 13 |
+
"Brainstormer",
|
| 14 |
+
"ScriptGenerator",
|
| 15 |
+
"CodeRunner",
|
| 16 |
+
"BasicMCPClient"
|
| 17 |
+
]
|
components/code_generator_loop/__pycache__/code_runner.cpython-313.pyc
ADDED
|
Binary file (15 kB). View file
|
|
|
components/code_generator_loop/__pycache__/mcp_brainstormer.cpython-313.pyc
ADDED
|
Binary file (6.85 kB). View file
|
|
|
components/code_generator_loop/__pycache__/script_generator.cpython-313.pyc
ADDED
|
Binary file (5.09 kB). View file
|
|
|
components/code_generator_loop/code_runner.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import subprocess
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
import tempfile
|
| 6 |
+
from typing import Optional, Dict, Any
|
| 7 |
+
from models import MCPExecutionResult
|
| 8 |
+
import sys # Import sys to check platform
|
| 9 |
+
|
| 10 |
+
class CodeRunner:
|
| 11 |
+
"""
|
| 12 |
+
Executes Python scripts in isolated environments (using conda).
|
| 13 |
+
Manages environment setup and captures output/errors.
|
| 14 |
+
"""
|
| 15 |
+
def __init__(self, base_env_dir=".alita_envs"):
|
| 16 |
+
self.base_env_dir = os.path.abspath(base_env_dir) # Use absolute path
|
| 17 |
+
os.makedirs(self.base_env_dir, exist_ok=True)
|
| 18 |
+
print(f"CodeRunner initialized. Environments will be created in {self.base_env_dir}")
|
| 19 |
+
self._check_conda_available()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _check_conda_available(self):
|
| 23 |
+
"""Checks if the 'conda' command is available in the PATH."""
|
| 24 |
+
try:
|
| 25 |
+
subprocess.run(["conda", "--version"], check=True, capture_output=True, text=True)
|
| 26 |
+
print("Conda command found.")
|
| 27 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 28 |
+
print("Warning: 'conda' command not found or not working. Environment setup/execution may fail.")
|
| 29 |
+
print("Please ensure conda is installed and accessible in your system's PATH.")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _run_command(self, command: list[str], cwd: Optional[str] = None, env: Optional[Dict[str, str]] = None, timeout: int = 300) -> tuple[int, str, str]:
|
| 33 |
+
"""Helper to run a shell command and capture output."""
|
| 34 |
+
print(f"CodeRunner: Running command: {' '.join(command)}")
|
| 35 |
+
try:
|
| 36 |
+
process = subprocess.Popen(
|
| 37 |
+
command,
|
| 38 |
+
stdout=subprocess.PIPE,
|
| 39 |
+
stderr=subprocess.PIPE,
|
| 40 |
+
text=True, # Decode stdout/stderr as text
|
| 41 |
+
cwd=cwd,
|
| 42 |
+
env=env
|
| 43 |
+
)
|
| 44 |
+
stdout, stderr = process.communicate(timeout=timeout)
|
| 45 |
+
print(f"CodeRunner: Command finished with return code {process.returncode}")
|
| 46 |
+
print("--- STDOUT ---")
|
| 47 |
+
print(stdout)
|
| 48 |
+
print("--- STDERR ---")
|
| 49 |
+
print(stderr)
|
| 50 |
+
return process.returncode, stdout, stderr
|
| 51 |
+
except FileNotFoundError:
|
| 52 |
+
print(f"CodeRunner Error: Command not found: {command[0]}")
|
| 53 |
+
return 1, "", f"Command not found: {command[0]}"
|
| 54 |
+
except subprocess.TimeoutExpired:
|
| 55 |
+
process.kill()
|
| 56 |
+
stdout, stderr = process.communicate()
|
| 57 |
+
print(f"CodeRunner Error: Command timed out after {timeout} seconds.")
|
| 58 |
+
print("--- STDOUT (before timeout) ---")
|
| 59 |
+
print(stdout)
|
| 60 |
+
print("--- STDERR (before timeout) ---")
|
| 61 |
+
print(stderr)
|
| 62 |
+
return 1, stdout, stderr
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"CodeRunner Error: An exception occurred running command: {e}")
|
| 65 |
+
return 1, "", str(e)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def setup_environment(self, env_script: str, env_name: str) -> bool:
|
| 69 |
+
"""
|
| 70 |
+
Sets up a conda environment by running the provided script.
|
| 71 |
+
Returns True on success, False on failure.
|
| 72 |
+
"""
|
| 73 |
+
print(f"CodeRunner: Attempting to set up environment '{env_name}'...")
|
| 74 |
+
# Create a temporary directory to run the setup script from
|
| 75 |
+
# This helps if the script assumes being run in a specific location
|
| 76 |
+
temp_dir = tempfile.mkdtemp(prefix=f"alita_env_setup_{env_name}_", dir=self.base_env_dir)
|
| 77 |
+
temp_script_path = os.path.join(temp_dir, f"setup_{env_name}.sh")
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
# Write the setup script to a temporary file
|
| 81 |
+
with open(temp_script_path, "w") as f:
|
| 82 |
+
f.write(env_script)
|
| 83 |
+
|
| 84 |
+
# Make the script executable
|
| 85 |
+
os.chmod(temp_script_path, 0o755)
|
| 86 |
+
|
| 87 |
+
# Command to execute the setup script
|
| 88 |
+
# We need to source the user's bash profile to ensure conda is initialized
|
| 89 |
+
# This is a common pattern on Unix-like systems. Adjust for Windows if needed.
|
| 90 |
+
if sys.platform == "win32":
|
| 91 |
+
# Windows specific logic (might need adjustments for cmd vs powershell)
|
| 92 |
+
# Example using Git Bash or similar environment
|
| 93 |
+
# command = ["bash", "-c", f"source ~/.bashrc && bash {temp_script_path}"]
|
| 94 |
+
print("Warning: Conda environment setup on Windows requires specific shell configurations.")
|
| 95 |
+
print("Attempting a generic bash execution which might not work depending on your setup.")
|
| 96 |
+
command = ["bash", "-c", f"source ~/.bashrc; {temp_script_path}"] # Basic attempt
|
| 97 |
+
else: # Linux, macOS etc.
|
| 98 |
+
command = ["bash", "-c", f"source ~/.bashrc || source ~/.bash_profile || source /opt/anaconda3/etc/profile.d/conda.sh; {temp_script_path}"] # Try common paths
|
| 99 |
+
|
| 100 |
+
# Need to pass the environment name to the script somehow if the script needs it
|
| 101 |
+
# For now, assume the script is generic or doesn't need the specific name passed this way.
|
| 102 |
+
# A better approach might be to generate the script *with* the env_name embedded.
|
| 103 |
+
|
| 104 |
+
# Run the setup command from the temporary directory
|
| 105 |
+
returncode, stdout, stderr = self._run_command(command, cwd=temp_dir, timeout=120) # Setup can take time
|
| 106 |
+
|
| 107 |
+
if returncode != 0:
|
| 108 |
+
print(f"CodeRunner: Environment setup failed for '{env_name}'. Return code: {returncode}")
|
| 109 |
+
print("STDOUT:\n", stdout)
|
| 110 |
+
print("STDERR:\n", stderr)
|
| 111 |
+
return False
|
| 112 |
+
|
| 113 |
+
print(f"CodeRunner: Environment '{env_name}' setup successful.")
|
| 114 |
+
return True
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f"CodeRunner: Error during environment setup for '{env_name}': {e}")
|
| 118 |
+
return False
|
| 119 |
+
finally:
|
| 120 |
+
# Clean up the temporary script file and directory
|
| 121 |
+
if os.path.exists(temp_script_path):
|
| 122 |
+
os.remove(temp_script_path)
|
| 123 |
+
if os.path.exists(temp_dir):
|
| 124 |
+
try:
|
| 125 |
+
os.rmdir(temp_dir) # Will only work if empty
|
| 126 |
+
except OSError:
|
| 127 |
+
print(f"Warning: Could not remove temporary setup directory {temp_dir}. It might not be empty.")
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def execute(self, python_script: str, env_name: str, input_data: Optional[Dict[str, Any]] = None, timeout: int = 60) -> MCPExecutionResult:
|
| 131 |
+
"""
|
| 132 |
+
Executes a Python script within a specified conda environment.
|
| 133 |
+
Passes input_data as a JSON string via command line argument.
|
| 134 |
+
Captures stdout, stderr, and return code.
|
| 135 |
+
"""
|
| 136 |
+
print(f"CodeRunner: Executing script in environment '{env_name}'...")
|
| 137 |
+
# Create a temporary directory to run the script from
|
| 138 |
+
temp_dir = tempfile.mkdtemp(prefix=f"alita_script_run_{env_name}_", dir=self.base_env_dir)
|
| 139 |
+
temp_script_path = os.path.join(temp_dir, "script.py") # Fixed script name
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
with open(temp_script_path, "w") as f:
|
| 143 |
+
f.write(python_script)
|
| 144 |
+
|
| 145 |
+
# Make the script executable (optional but good practice)
|
| 146 |
+
os.chmod(temp_script_path, 0o755)
|
| 147 |
+
|
| 148 |
+
# Command to activate conda env and run the script
|
| 149 |
+
# Requires conda to be initialized in the shell context
|
| 150 |
+
# Pass input_data as a JSON string command-line argument
|
| 151 |
+
input_json_str = json.dumps(input_data) if input_data is not None else json.dumps({}) # Ensure input is always a JSON string
|
| 152 |
+
|
| 153 |
+
if sys.platform == "win32":
|
| 154 |
+
# Windows specific logic
|
| 155 |
+
print("Warning: Script execution on Windows requires specific shell configurations.")
|
| 156 |
+
print("Attempting a generic bash execution which might not work depending on your setup.")
|
| 157 |
+
command_parts = [
|
| 158 |
+
"bash", "-c",
|
| 159 |
+
f"source ~/.bashrc || source ~/.bash_profile || source /opt/anaconda3/etc/profile.d/conda.sh; " # Source conda
|
| 160 |
+
f"conda activate {env_name} && " # Activate env
|
| 161 |
+
f"python {temp_script_path} --input '{input_json_str}'" # Run script with input arg
|
| 162 |
+
]
|
| 163 |
+
else: # Linux, macOS etc.
|
| 164 |
+
command_parts = [
|
| 165 |
+
"bash", "-c",
|
| 166 |
+
f"source ~/.bashrc || source ~/.bash_profile || source /opt/anaconda3/etc/profile.d/conda.sh; " # Source conda
|
| 167 |
+
f"conda activate {env_name} && " # Activate env
|
| 168 |
+
f"python {temp_script_path} --input '{input_json_str}'" # Run script with input arg
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
returncode, stdout, stderr = self._run_command(command_parts, cwd=temp_dir, timeout=timeout) # Script execution timeout
|
| 173 |
+
|
| 174 |
+
output_data = None
|
| 175 |
+
error_message = None
|
| 176 |
+
success = (returncode == 0)
|
| 177 |
+
|
| 178 |
+
if success:
|
| 179 |
+
try:
|
| 180 |
+
# Attempt to parse stdout as JSON
|
| 181 |
+
output_data = json.loads(stdout.strip())
|
| 182 |
+
except json.JSONDecodeError:
|
| 183 |
+
# If stdout isn't JSON, return it as a raw string output
|
| 184 |
+
print("CodeRunner: Script stdout was not valid JSON. Returning raw output.")
|
| 185 |
+
output_data = {"raw_output": stdout.strip()}
|
| 186 |
+
except Exception as e:
|
| 187 |
+
print(f"CodeRunner: Error parsing JSON output: {e}")
|
| 188 |
+
# Even if parsing failed, mark as success if return code was 0
|
| 189 |
+
output_data = {"raw_output": stdout.strip(), "parsing_error": str(e)}
|
| 190 |
+
|
| 191 |
+
else:
|
| 192 |
+
error_message = f"Script execution failed with return code {returncode}.\nSTDERR:\n{stderr}"
|
| 193 |
+
print(error_message)
|
| 194 |
+
|
| 195 |
+
return MCPExecutionResult(
|
| 196 |
+
success=success,
|
| 197 |
+
output=output_data if success else None,
|
| 198 |
+
logs=f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}",
|
| 199 |
+
error_message=error_message,
|
| 200 |
+
return_code=returncode
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
except FileNotFoundError:
|
| 204 |
+
error_msg = f"CodeRunner Error: Python interpreter or script not found. Ensure '{env_name}' env is correct and script path exists ({temp_script_path})."
|
| 205 |
+
print(error_msg)
|
| 206 |
+
return MCPExecutionResult(success=False, error_message=error_msg)
|
| 207 |
+
except Exception as e:
|
| 208 |
+
error_msg = f"CodeRunner Error during script execution: {e}"
|
| 209 |
+
print(error_msg)
|
| 210 |
+
return MCPExecutionResult(success=False, error_message=error_msg)
|
| 211 |
+
finally:
|
| 212 |
+
# Clean up the temporary script file and directory
|
| 213 |
+
if os.path.exists(temp_script_path):
|
| 214 |
+
os.remove(temp_script_path)
|
| 215 |
+
if os.path.exists(temp_dir):
|
| 216 |
+
try:
|
| 217 |
+
os.rmdir(temp_dir) # Will only work if empty
|
| 218 |
+
except OSError:
|
| 219 |
+
print(f"Warning: Could not remove temporary script run directory {temp_dir}. It might not be empty.")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def cleanup_environment(self, env_name: str):
|
| 223 |
+
"""Cleans up a conda environment."""
|
| 224 |
+
print(f"CodeRunner: Cleaning up environment '{env_name}'...")
|
| 225 |
+
try:
|
| 226 |
+
# Command to deactivate and remove the environment
|
| 227 |
+
# Need to source bash profile to ensure conda command works
|
| 228 |
+
if sys.platform == "win32":
|
| 229 |
+
command = [
|
| 230 |
+
"bash", "-c",
|
| 231 |
+
f"source ~/.bashrc || source ~/.bash_profile || source /opt/anaconda3/etc/profile.d/conda.sh; " # Source conda
|
| 232 |
+
f"conda env remove -n {env_name} -y"
|
| 233 |
+
]
|
| 234 |
+
else: # Linux, macOS etc.
|
| 235 |
+
command = [
|
| 236 |
+
"bash", "-c",
|
| 237 |
+
f"source ~/.bashrc || source ~/.bash_profile || source /opt/anaconda3/etc/profile.d/conda.sh; " # Source conda
|
| 238 |
+
f"conda env remove -n {env_name} -y"
|
| 239 |
+
]
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
returncode, stdout, stderr = self._run_command(command, timeout=60)
|
| 243 |
+
if returncode != 0:
|
| 244 |
+
print(f"CodeRunner: Environment cleanup failed for '{env_name}'. Return code: {returncode}")
|
| 245 |
+
print("STDOUT:\n", stdout)
|
| 246 |
+
print("STDERR:\n", stderr)
|
| 247 |
+
else:
|
| 248 |
+
print(f"CodeRunner: Environment '{env_name}' cleaned up successfully.")
|
| 249 |
+
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print(f"CodeRunner: Error during environment cleanup for '{env_name}': {e}")
|
components/code_generator_loop/mcp_brainstormer.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from typing import Any, Dict, List, Union
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from llama_index.llms.anthropic import Anthropic
|
| 7 |
+
from llama_index.core.prompts import PromptTemplate
|
| 8 |
+
|
| 9 |
+
class Brainstormer:
|
| 10 |
+
def __init__(self, model_name: str = "claude-sonnet-4-0"):
|
| 11 |
+
self.model_name = model_name
|
| 12 |
+
self.example_tool = json.dumps(
|
| 13 |
+
[
|
| 14 |
+
{
|
| 15 |
+
"name": "tool_a",
|
| 16 |
+
"input_schema": {"param1": "string"},
|
| 17 |
+
"output_schema": {"result": "string"},
|
| 18 |
+
"description": "Does X for Y",
|
| 19 |
+
"pseudo_code": "def tool_a(param1: str) -> str:\n # …",
|
| 20 |
+
"source_hint": "use library_x"
|
| 21 |
+
}
|
| 22 |
+
],
|
| 23 |
+
indent=2
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
self.mcp_tools_spec_prompt = PromptTemplate(
|
| 27 |
+
"""
|
| 28 |
+
You are the MCPBrainstorm agent for OpenALITA, responsible for proposing MCPToolSpec entries needed to fulfill a new user task.
|
| 29 |
+
|
| 30 |
+
User Task:
|
| 31 |
+
{task}
|
| 32 |
+
|
| 33 |
+
Already‐available tools (JSON array):
|
| 34 |
+
{tools_list}
|
| 35 |
+
|
| 36 |
+
Example to illustrate format (single JSON object):
|
| 37 |
+
{example_tool}
|
| 38 |
+
|
| 39 |
+
• If one or more of the listed tools can exactly fulfill the task as‐is, copy their JSON entries verbatim.
|
| 40 |
+
• Otherwise, add only the minimal new specifications required—no extras.
|
| 41 |
+
|
| 42 |
+
Note: A web agent is available for Internet or GitHub searches. Do not create a tool for that.
|
| 43 |
+
|
| 44 |
+
Propose only generic tools, not highly specific ones.
|
| 45 |
+
|
| 46 |
+
Output exactly a JSON ARRAY of objects, each with these keys in this order:
|
| 47 |
+
- "name": string — unique tool name in snake_case (no spaces)
|
| 48 |
+
- "input_schema": dict — JSON describing argument names and types
|
| 49 |
+
- "output_schema": dict — JSON describing return names and types
|
| 50 |
+
- "description": string — one‐sentence summary of what this tool does
|
| 51 |
+
- "pseudo_code": string — brief multi‐line pseudo‐code sketch
|
| 52 |
+
- "source_hint": string — short hint about which library or resource to use
|
| 53 |
+
- "state" : string - indicator if the tool is deployed or not. Put deactivated in case of a new tool
|
| 54 |
+
|
| 55 |
+
Do not include any explanatory text, comments, or formatting outside valid JSON.
|
| 56 |
+
"""
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def _extract_json_array(self, text: str) -> Union[List[Any], None]:
|
| 60 |
+
idx = text.find('[')
|
| 61 |
+
if idx < 0:
|
| 62 |
+
return None
|
| 63 |
+
depth = 0
|
| 64 |
+
for i, ch in enumerate(text[idx:], idx):
|
| 65 |
+
if ch == '[':
|
| 66 |
+
depth += 1
|
| 67 |
+
elif ch == ']':
|
| 68 |
+
depth -= 1
|
| 69 |
+
if depth == 0:
|
| 70 |
+
try:
|
| 71 |
+
return json.loads(text[idx : i+1])
|
| 72 |
+
except json.JSONDecodeError:
|
| 73 |
+
return None
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
def generate_mcp_specs_to_fulfill_user_task(
|
| 77 |
+
self,
|
| 78 |
+
task: str,
|
| 79 |
+
tools_list: str,
|
| 80 |
+
retries: int = 3,
|
| 81 |
+
) -> Union[List[Dict[str, Any]], Dict[str, str]]:
|
| 82 |
+
# 1) Validate inputs
|
| 83 |
+
if not task.strip():
|
| 84 |
+
return {"error": "User task must not be empty."}
|
| 85 |
+
raw = tools_list.strip()
|
| 86 |
+
if not raw:
|
| 87 |
+
return {"error": "Please provide a list of existing tools (or 'none')."}
|
| 88 |
+
|
| 89 |
+
# 2) Normalize the tools_list so that each tool is on its own line
|
| 90 |
+
# (Assumes comma‐ or newline‐separated)
|
| 91 |
+
entries = re.split(r"[\n,]+", raw)
|
| 92 |
+
normalized = "\n".join(f"- {e.strip()}" for e in entries if e.strip())
|
| 93 |
+
|
| 94 |
+
# 3) Build the prompt
|
| 95 |
+
prompt_text = self.mcp_tools_spec_prompt.format(
|
| 96 |
+
task=task,
|
| 97 |
+
tools_list=normalized,
|
| 98 |
+
example_tool=self.example_tool
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# 4) Call the LLM with retry logic
|
| 102 |
+
last_error = None
|
| 103 |
+
for _ in range(retries):
|
| 104 |
+
try:
|
| 105 |
+
llm = Anthropic(model=self.model_name, temperature=0.0, max_tokens=512, timeout=30)
|
| 106 |
+
response_text = llm.complete(prompt_text).text.strip()
|
| 107 |
+
except Exception as e:
|
| 108 |
+
last_error = e
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
specs = json.loads(response_text)
|
| 113 |
+
except json.JSONDecodeError:
|
| 114 |
+
# This is a fallback: find the first top‐level JSON array
|
| 115 |
+
specs = self._extract_json_array(response_text)
|
| 116 |
+
|
| 117 |
+
if isinstance(specs, list):
|
| 118 |
+
return specs
|
| 119 |
+
|
| 120 |
+
if last_error:
|
| 121 |
+
return {"error": f"Failed to call LLM: {last_error}"}
|
| 122 |
+
else:
|
| 123 |
+
return {"error": "Could not extract a valid JSON array from the LLM output."}
|
| 124 |
+
|
| 125 |
+
def create_interface(self):
|
| 126 |
+
return gr.Interface(
|
| 127 |
+
fn=self.generate_mcp_specs_to_fulfill_user_task,
|
| 128 |
+
inputs=[
|
| 129 |
+
gr.TextArea(
|
| 130 |
+
label="Enter your task for OpenALITA",
|
| 131 |
+
placeholder="e.g., Extract subtitles from a video online",
|
| 132 |
+
lines=2,
|
| 133 |
+
),
|
| 134 |
+
gr.TextArea(
|
| 135 |
+
label="Available Tools List (one per line or comma-separated)",
|
| 136 |
+
placeholder="e.g.\ntool_a, tool_b\nor:\ntool_a\ntool_b\ntool_c",
|
| 137 |
+
lines=7,
|
| 138 |
+
),
|
| 139 |
+
],
|
| 140 |
+
outputs=gr.JSON(label="Generated MCPToolSpecs"),
|
| 141 |
+
flagging_mode="never",
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
def launch_interface(self, share=False):
|
| 145 |
+
iface = self.create_interface()
|
| 146 |
+
iface.launch(share=share, server_name="0.0.0.0")
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if __name__ == "__main__":
|
| 150 |
+
# Create an instance of the MCPBrainstormer class
|
| 151 |
+
brainstormer = Brainstormer()
|
| 152 |
+
# Launch the interface
|
| 153 |
+
brainstormer.launch_interface()
|
components/code_generator_loop/script_generator.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import anthropic
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def clean_script_output(script: str) -> str:
|
| 5 |
+
# Remove Markdown code fences or leading markdown syntax
|
| 6 |
+
lines = script.strip().splitlines()
|
| 7 |
+
if lines and lines[0].strip().startswith("```") or lines[0].strip().startswith("'''"):
|
| 8 |
+
lines = lines[1:] # Remove first line
|
| 9 |
+
if lines and lines[-1].strip().startswith("```") or lines[-1].strip().startswith("'''"):
|
| 10 |
+
lines = lines[:-1] # Remove last line
|
| 11 |
+
return "\n".join(lines)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ScriptGenerator:
|
| 15 |
+
def __init__(self, task_prompt: str, claude_api_key: str, model="claude-3-7-sonnet-20250219"):
|
| 16 |
+
self.task_prompt = task_prompt
|
| 17 |
+
self.client = anthropic.Anthropic(api_key=claude_api_key)
|
| 18 |
+
self.model = model
|
| 19 |
+
|
| 20 |
+
def build_prompt(self) -> str:
|
| 21 |
+
return f"""
|
| 22 |
+
You are a Python expert working in an AI pipeline. Your job is to write a Python script based on the user's task description.
|
| 23 |
+
|
| 24 |
+
The Python script must follow this exact structure:
|
| 25 |
+
1. Modal Image setup to define the execution environment using `modal.Image.debian_slim().pip_install(...)`
|
| 26 |
+
2. A tool function wrapped with `@stub.function` that performs the task
|
| 27 |
+
3. A Gradio app setup that launches in the cloud using `@stub.local_entrypoint`
|
| 28 |
+
|
| 29 |
+
🛑 VERY IMPORTANT:
|
| 30 |
+
- Use `app = modal.App("tool-name")` (DO NOT use `stub = modal.Stub`)
|
| 31 |
+
- All decorators must use `@app.function` and `@app.local_entrypoint`
|
| 32 |
+
- La logique doit être dans une fonction locale (local_xyz()).
|
| 33 |
+
- @app.function doit juste appeler cette fonction locale.
|
| 34 |
+
- Gradio doit utiliser uniquement la fonction locale, pas celle décorée par Modal.
|
| 35 |
+
|
| 36 |
+
The tool should:
|
| 37 |
+
- Be self-contained
|
| 38 |
+
- Import necessary libraries
|
| 39 |
+
- Run via `modal` and expose an input/output via `gr.Interface`
|
| 40 |
+
|
| 41 |
+
User's Task:
|
| 42 |
+
\"\"\"{self.task_prompt}\"\"\"
|
| 43 |
+
|
| 44 |
+
Output the full Python script only, no commentary or markdown.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def call_claude(self) -> str:
|
| 48 |
+
response = self.client.messages.create(
|
| 49 |
+
model=self.model,
|
| 50 |
+
max_tokens=1000,
|
| 51 |
+
temperature=0.5,
|
| 52 |
+
messages=[
|
| 53 |
+
{"role": "user", "content": self.build_prompt()}
|
| 54 |
+
]
|
| 55 |
+
)
|
| 56 |
+
return response.content[0].text.strip()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def clean_script_output(self, script: str) -> str:
|
| 60 |
+
# Remove Markdown code fences like '''python or ```python
|
| 61 |
+
lines = script.strip().splitlines()
|
| 62 |
+
if lines and lines[0].strip().startswith(("```", "'''")):
|
| 63 |
+
lines = lines[1:] # Remove first line
|
| 64 |
+
if lines and lines[-1].strip().startswith(("```", "'''")):
|
| 65 |
+
lines = lines[:-1] # Remove last line
|
| 66 |
+
return "\n".join(lines)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def generate_script(self) -> str:
|
| 70 |
+
script = self.call_claude()
|
| 71 |
+
script = self.clean_script_output(script)
|
| 72 |
+
return script
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
import os
|
| 77 |
+
from dotenv import load_dotenv
|
| 78 |
+
load_dotenv()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
claude_key = os.getenv("CLAUDE_API_KEY") # Or set manually
|
| 82 |
+
task = "Create a tool that takes a text input and returns the number of words in the text."
|
| 83 |
+
|
| 84 |
+
generator = ScriptGenerator(task_prompt=task, claude_api_key=claude_key)
|
| 85 |
+
script = generator.generate_script()
|
| 86 |
+
|
| 87 |
+
# Print or save to file
|
| 88 |
+
print(script)
|
| 89 |
+
|
| 90 |
+
with open("generated_tool.py", "w") as f:
|
| 91 |
+
f.write(script)
|
components/mcp_registry/__pycache__/registry.cpython-313.pyc
ADDED
|
Binary file (11.2 kB). View file
|
|
|
components/mcp_registry/__pycache__/use_registry_tool.cpython-313.pyc
ADDED
|
Binary file (5.32 kB). View file
|
|
|
components/mcp_registry/mcp_registry.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import subprocess
|
| 4 |
+
import threading
|
| 5 |
+
from typing import Dict, Optional, List, Any
|
| 6 |
+
from models import MCPToolSpec
|
| 7 |
+
import modal
|
| 8 |
+
|
| 9 |
+
# ---------- CONFIG ----------
|
| 10 |
+
REGISTRY_FILE = "mcp_registry.json"
|
| 11 |
+
UNDEPLOY_DELAY = 30 * 60 # seconds (30 minutes)
|
| 12 |
+
|
| 13 |
+
# ---------- HELPER FUNCTIONS ----------
|
| 14 |
+
|
| 15 |
+
def _read_registry() -> Dict[str, MCPToolSpec]:
|
| 16 |
+
if not os.path.exists(REGISTRY_FILE):
|
| 17 |
+
return {}
|
| 18 |
+
try:
|
| 19 |
+
with open(REGISTRY_FILE, 'r') as f:
|
| 20 |
+
data = json.load(f)
|
| 21 |
+
return {name: MCPToolSpec.from_dict(tool_data) for name, tool_data in data.items()}
|
| 22 |
+
except (json.JSONDecodeError, TypeError) as e:
|
| 23 |
+
print(f"Error reading registry: {e}. Returning empty registry.")
|
| 24 |
+
return {}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _write_registry(tools: Dict[str, MCPToolSpec]) -> None:
|
| 28 |
+
try:
|
| 29 |
+
serializable = {name: spec.to_dict() for name, spec in tools.items()}
|
| 30 |
+
with open(REGISTRY_FILE, 'w') as f:
|
| 31 |
+
json.dump(serializable, f, indent=4)
|
| 32 |
+
except IOError as e:
|
| 33 |
+
print(f"Error writing registry: {e}")
|
| 34 |
+
|
| 35 |
+
# ---------- REGISTRY OPERATIONS ----------
|
| 36 |
+
|
| 37 |
+
def register_tool(spec: MCPToolSpec) -> None:
|
| 38 |
+
tools = _read_registry()
|
| 39 |
+
tools[spec.name] = spec
|
| 40 |
+
_write_registry(tools)
|
| 41 |
+
print(f"Registered tool: {spec.name}")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def get_tool(name: str) -> Optional[MCPToolSpec]:
|
| 45 |
+
tools = _read_registry()
|
| 46 |
+
return tools.get(name)
|
| 47 |
+
|
| 48 |
+
# To be used by the manager to get the list of tools
|
| 49 |
+
def list_tools() -> List[MCPToolSpec]:
|
| 50 |
+
return list(_read_registry().values())
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def find_relevant_tools(query: str) -> List[Dict[str, Any]]:
|
| 54 |
+
query_lower = query.lower()
|
| 55 |
+
tools = _read_registry()
|
| 56 |
+
relevant = []
|
| 57 |
+
for spec in tools.values():
|
| 58 |
+
if query_lower in spec.name.lower() or query_lower in getattr(spec, 'description', '').lower():
|
| 59 |
+
relevant.append({"name": spec.name, "description": spec.description})
|
| 60 |
+
print(f"Found {len(relevant)} relevant tools for query '{query}'")
|
| 61 |
+
return relevant
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _schedule_undeploy(name: str):
|
| 65 |
+
"""
|
| 66 |
+
Schedule undeployment after a delay.
|
| 67 |
+
"""
|
| 68 |
+
timer = threading.Timer(UNDEPLOY_DELAY, undeploy_tool, args=[name])
|
| 69 |
+
timer.daemon = False
|
| 70 |
+
timer.start()
|
| 71 |
+
print(f"Scheduled undeploy of {name} in {UNDEPLOY_DELAY} seconds.")
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# A function to be used by the manager to deploy deactivated tools
|
| 75 |
+
def deploy_tool(name: str) -> Optional[str]:
|
| 76 |
+
spec = get_tool(name)
|
| 77 |
+
if not spec:
|
| 78 |
+
print(f"Error: tool '{name}' not found in registry.")
|
| 79 |
+
return None
|
| 80 |
+
script_path = os.path.join("mcp_tool_box", f"{name}.py")
|
| 81 |
+
if not os.path.isfile(script_path):
|
| 82 |
+
print(f"Error: script file '{script_path}' does not exist.")
|
| 83 |
+
return None
|
| 84 |
+
try:
|
| 85 |
+
# Deploy via CLI
|
| 86 |
+
subprocess.run(["modal", "deploy", script_path], check=True)
|
| 87 |
+
# Retrieve URL via SDK
|
| 88 |
+
func = modal.Function.from_name(name, name)
|
| 89 |
+
url = func.get_web_url()
|
| 90 |
+
spec.url = url
|
| 91 |
+
spec.state = "activated"
|
| 92 |
+
register_tool(spec)
|
| 93 |
+
print(f"Deployed {name} at {url}")
|
| 94 |
+
# Schedule automatic undeploy
|
| 95 |
+
_schedule_undeploy(name)
|
| 96 |
+
return url
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Deployment failed for {name}: {e}")
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def undeploy_tool(name: str) -> bool:
|
| 103 |
+
spec = get_tool(name)
|
| 104 |
+
if not spec:
|
| 105 |
+
print(f"Error: tool '{name}' not found in registry.")
|
| 106 |
+
return False
|
| 107 |
+
try:
|
| 108 |
+
app = modal.App(name)
|
| 109 |
+
app.delete()
|
| 110 |
+
spec.state = "deactivated"
|
| 111 |
+
register_tool(spec)
|
| 112 |
+
print(f"Undeployed {name}")
|
| 113 |
+
return True
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Undeployment failed for {name}: {e}")
|
| 116 |
+
return False
|
| 117 |
+
|
components/mcp_registry/registry.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import subprocess
|
| 4 |
+
import threading
|
| 5 |
+
from typing import Dict, Optional, List, Any
|
| 6 |
+
from models import MCPToolSpec
|
| 7 |
+
import modal
|
| 8 |
+
from llama_index.tools.mcp import BasicMCPClient
|
| 9 |
+
|
| 10 |
+
class Registry:
|
| 11 |
+
"""
|
| 12 |
+
Registry class for managing MCP tools.
|
| 13 |
+
This class encapsulates tool registration, lookup, and deployment functionality.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, registry_file: str = "mcp_registry.json", undeploy_delay: int = 30 * 60):
|
| 17 |
+
"""
|
| 18 |
+
Initialize the MCPRegistry.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
registry_file: Path to the JSON file storing registry data
|
| 22 |
+
undeploy_delay: Delay in seconds before undeploying a tool (default: 30 minutes)
|
| 23 |
+
"""
|
| 24 |
+
self.registry_file = registry_file
|
| 25 |
+
self.undeploy_delay = undeploy_delay
|
| 26 |
+
self.tools = self._read_registry()
|
| 27 |
+
print(f"MCPRegistry initialized. Using file: {self.registry_file}")
|
| 28 |
+
|
| 29 |
+
def _read_registry(self) -> Dict[str, MCPToolSpec]:
|
| 30 |
+
"""
|
| 31 |
+
Read the registry file and parse its contents.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
Dictionary mapping tool names to MCPToolSpec objects
|
| 35 |
+
"""
|
| 36 |
+
if not os.path.exists(self.registry_file):
|
| 37 |
+
return {}
|
| 38 |
+
try:
|
| 39 |
+
with open(self.registry_file, 'r') as f:
|
| 40 |
+
data = json.load(f)
|
| 41 |
+
return {name: MCPToolSpec.from_dict(tool_data) for name, tool_data in data.items()}
|
| 42 |
+
except (json.JSONDecodeError, TypeError) as e:
|
| 43 |
+
print(f"Error reading registry: {e}. Returning empty registry.")
|
| 44 |
+
return {}
|
| 45 |
+
|
| 46 |
+
def _write_registry(self) -> None:
|
| 47 |
+
"""
|
| 48 |
+
Write the current tools to the registry file.
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
serializable = {name: spec.to_dict() for name, spec in self.tools.items()}
|
| 52 |
+
with open(self.registry_file, 'w') as f:
|
| 53 |
+
json.dump(serializable, f, indent=4)
|
| 54 |
+
except IOError as e:
|
| 55 |
+
print(f"Error writing registry: {e}")
|
| 56 |
+
|
| 57 |
+
def register_tool(self, spec: MCPToolSpec) -> None:
|
| 58 |
+
"""
|
| 59 |
+
Register a new tool in the registry.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
spec: MCPToolSpec object describing the tool
|
| 63 |
+
"""
|
| 64 |
+
self.tools[spec.name] = spec
|
| 65 |
+
self._write_registry()
|
| 66 |
+
print(f"Registered tool: {spec.name}")
|
| 67 |
+
|
| 68 |
+
def get_tool(self, name: str) -> Optional[MCPToolSpec]:
|
| 69 |
+
"""
|
| 70 |
+
Get a tool from the registry by name.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
name: Name of the tool to retrieve
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
MCPToolSpec object if found, None otherwise
|
| 77 |
+
"""
|
| 78 |
+
return self.tools.get(name)
|
| 79 |
+
|
| 80 |
+
def list_tools(self) -> List[MCPToolSpec]:
|
| 81 |
+
"""
|
| 82 |
+
Get a list of all tools in the registry.
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
List of MCPToolSpec objects
|
| 86 |
+
"""
|
| 87 |
+
return list(self.tools.values())
|
| 88 |
+
|
| 89 |
+
def find_relevant_tools(self, query: str) -> List[Dict[str, Any]]:
|
| 90 |
+
"""
|
| 91 |
+
Find tools relevant to a query.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
query: Search query
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
List of dictionaries with tool name and description
|
| 98 |
+
"""
|
| 99 |
+
query_lower = query.lower()
|
| 100 |
+
relevant = []
|
| 101 |
+
for spec in self.tools.values():
|
| 102 |
+
if query_lower in spec.name.lower() or query_lower in getattr(spec, 'description', '').lower():
|
| 103 |
+
relevant.append({"name": spec.name, "description": spec.description})
|
| 104 |
+
print(f"Found {len(relevant)} relevant tools for query '{query}'")
|
| 105 |
+
return relevant
|
| 106 |
+
|
| 107 |
+
def _schedule_undeploy(self, name: str) -> None:
|
| 108 |
+
"""
|
| 109 |
+
Schedule undeployment of a tool after a delay.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
name: Name of the tool to undeploy
|
| 113 |
+
"""
|
| 114 |
+
timer = threading.Timer(self.undeploy_delay, self.undeploy_tool, args=[name])
|
| 115 |
+
timer.daemon = False
|
| 116 |
+
timer.start()
|
| 117 |
+
print(f"Scheduled undeploy of {name} in {self.undeploy_delay} seconds.")
|
| 118 |
+
|
| 119 |
+
def deploy_tool(self, name: str) -> Optional[str]:
|
| 120 |
+
"""
|
| 121 |
+
Deploy a tool using Modal.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
name: Name of the tool to deploy
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
URL of the deployed tool if successful, None otherwise
|
| 128 |
+
"""
|
| 129 |
+
spec = self.get_tool(name)
|
| 130 |
+
if not spec:
|
| 131 |
+
print(f"Error: tool '{name}' not found in registry.")
|
| 132 |
+
return None
|
| 133 |
+
script_path = os.path.join("mcp_tool_box", f"{name}.py")
|
| 134 |
+
if not os.path.isfile(script_path):
|
| 135 |
+
print(f"Error: script file '{script_path}' does not exist.")
|
| 136 |
+
return None
|
| 137 |
+
try:
|
| 138 |
+
# Deploy via CLI
|
| 139 |
+
subprocess.run(["modal", "deploy", script_path], check=True)
|
| 140 |
+
# Retrieve URL via SDK
|
| 141 |
+
func = modal.Function.from_name(name, name)
|
| 142 |
+
url = func.get_web_url()
|
| 143 |
+
spec.url = url
|
| 144 |
+
spec.state = "activated"
|
| 145 |
+
self.register_tool(spec)
|
| 146 |
+
print(f"Deployed {name} at {url}")
|
| 147 |
+
# Schedule automatic undeploy
|
| 148 |
+
self._schedule_undeploy(name)
|
| 149 |
+
return url
|
| 150 |
+
except Exception as e:
|
| 151 |
+
print(f"Deployment failed for {name}: {e}")
|
| 152 |
+
return None
|
| 153 |
+
|
| 154 |
+
def undeploy_tool(self, name: str) -> bool:
|
| 155 |
+
"""
|
| 156 |
+
Undeploy a tool using Modal.
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
name: Name of the tool to undeploy
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
True if successful, False otherwise
|
| 163 |
+
"""
|
| 164 |
+
spec = self.get_tool(name)
|
| 165 |
+
if not spec:
|
| 166 |
+
print(f"Error: tool '{name}' not found in registry.")
|
| 167 |
+
return False
|
| 168 |
+
try:
|
| 169 |
+
app = modal.App(name)
|
| 170 |
+
app.delete()
|
| 171 |
+
spec.state = "deactivated"
|
| 172 |
+
self.register_tool(spec)
|
| 173 |
+
print(f"Undeployed {name}")
|
| 174 |
+
return True
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"Undeployment failed for {name}: {e}")
|
| 177 |
+
return False
|
| 178 |
+
|
| 179 |
+
def use_tool(self, tool_name: str, *args, **kwargs) -> Dict[str, Any]:
|
| 180 |
+
"""
|
| 181 |
+
Use a registered tool directly by invoking its Modal endpoint.
|
| 182 |
+
|
| 183 |
+
This method checks if the tool exists in the registry, ensures it's deployed,
|
| 184 |
+
and then invokes it using the BasicMCPClient.
|
| 185 |
+
|
| 186 |
+
Args:
|
| 187 |
+
tool_name: Name of the tool to use
|
| 188 |
+
*args: Positional arguments to pass to the tool
|
| 189 |
+
**kwargs: Keyword arguments to pass to the tool
|
| 190 |
+
|
| 191 |
+
Returns:
|
| 192 |
+
The response from the tool as a Python object
|
| 193 |
+
|
| 194 |
+
Raises:
|
| 195 |
+
ValueError: If the tool doesn't exist or isn't deployed
|
| 196 |
+
"""
|
| 197 |
+
print(f"🔧 Using tool: {tool_name}")
|
| 198 |
+
|
| 199 |
+
# Check if tool exists in registry
|
| 200 |
+
spec = self.get_tool(tool_name)
|
| 201 |
+
if not spec:
|
| 202 |
+
error_msg = f"Tool '{tool_name}' not found in registry"
|
| 203 |
+
print(f"❌ {error_msg}")
|
| 204 |
+
raise ValueError(error_msg)
|
| 205 |
+
|
| 206 |
+
# Check if tool is deployed
|
| 207 |
+
if getattr(spec, 'state', 'deactivated') != 'activated' or not getattr(spec, 'url', None):
|
| 208 |
+
print(f"Tool '{tool_name}' is not deployed. Attempting to deploy...")
|
| 209 |
+
url = self.deploy_tool(tool_name)
|
| 210 |
+
if not url:
|
| 211 |
+
error_msg = f"Failed to deploy tool '{tool_name}'"
|
| 212 |
+
print(f"❌ {error_msg}")
|
| 213 |
+
raise ValueError(error_msg)
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
# Construct the Modal endpoint URL
|
| 217 |
+
if hasattr(spec, 'url') and spec.url:
|
| 218 |
+
base_url = spec.url
|
| 219 |
+
else:
|
| 220 |
+
base_url = f"https://{tool_name}--{tool_name}.modal.run"
|
| 221 |
+
|
| 222 |
+
print(f"🔄 Connecting to tool at: {base_url}")
|
| 223 |
+
client = BasicMCPClient(base_url)
|
| 224 |
+
|
| 225 |
+
# Invoke the remote tool
|
| 226 |
+
print(f"▶️ Invoking tool with args: {args}, kwargs: {kwargs}")
|
| 227 |
+
response = client.call(*args, **kwargs)
|
| 228 |
+
print(f"✅ Tool execution successful")
|
| 229 |
+
|
| 230 |
+
return response
|
| 231 |
+
|
| 232 |
+
except Exception as e:
|
| 233 |
+
error_msg = f"Error using tool '{tool_name}': {str(e)}"
|
| 234 |
+
print(f"🚨 {error_msg}")
|
| 235 |
+
raise RuntimeError(error_msg) from e
|
components/mcp_registry/use_registry_tool.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import aiohttp
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
import modal
|
| 4 |
+
|
| 5 |
+
class BasicMCPClient:
|
| 6 |
+
"""
|
| 7 |
+
A basic MCP client implementation since llama_index.tools.mcp might not be available
|
| 8 |
+
"""
|
| 9 |
+
def __init__(self, base_url: str):
|
| 10 |
+
self.base_url = base_url.rstrip('/')
|
| 11 |
+
|
| 12 |
+
async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
| 13 |
+
"""
|
| 14 |
+
Call an MCP tool via HTTP POST request
|
| 15 |
+
"""
|
| 16 |
+
mcp_request = {
|
| 17 |
+
"jsonrpc": "2.0",
|
| 18 |
+
"id": 1,
|
| 19 |
+
"method": "tools/call",
|
| 20 |
+
"params": {
|
| 21 |
+
"name": tool_name,
|
| 22 |
+
"arguments": arguments
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
async with aiohttp.ClientSession() as session:
|
| 27 |
+
try:
|
| 28 |
+
async with session.post(
|
| 29 |
+
self.base_url,
|
| 30 |
+
json=mcp_request,
|
| 31 |
+
headers={"Content-Type": "application/json"}
|
| 32 |
+
) as response:
|
| 33 |
+
if response.status == 200:
|
| 34 |
+
result = await response.json()
|
| 35 |
+
return result
|
| 36 |
+
else:
|
| 37 |
+
error_text = await response.text()
|
| 38 |
+
return {
|
| 39 |
+
"error": f"HTTP {response.status}: {error_text}"
|
| 40 |
+
}
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return {
|
| 43 |
+
"error": f"Request failed: {str(e)}"
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
async def list_tools(self) -> Dict[str, Any]:
|
| 47 |
+
"""
|
| 48 |
+
List available tools from the MCP server
|
| 49 |
+
"""
|
| 50 |
+
mcp_request = {
|
| 51 |
+
"jsonrpc": "2.0",
|
| 52 |
+
"id": 1,
|
| 53 |
+
"method": "tools/list",
|
| 54 |
+
"params": {}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
async with aiohttp.ClientSession() as session:
|
| 58 |
+
try:
|
| 59 |
+
async with session.post(
|
| 60 |
+
self.base_url,
|
| 61 |
+
json=mcp_request,
|
| 62 |
+
headers={"Content-Type": "application/json"}
|
| 63 |
+
) as response:
|
| 64 |
+
if response.status == 200:
|
| 65 |
+
result = await response.json()
|
| 66 |
+
return result
|
| 67 |
+
else:
|
| 68 |
+
error_text = await response.text()
|
| 69 |
+
return {
|
| 70 |
+
"error": f"HTTP {response.status}: {error_text}"
|
| 71 |
+
}
|
| 72 |
+
except Exception as e:
|
| 73 |
+
return {
|
| 74 |
+
"error": f"Request failed: {str(e)}"
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
# Function to invoke an external MCP server
|
| 78 |
+
async def use_registry_tool(tool_name: str, **kwargs):
|
| 79 |
+
"""
|
| 80 |
+
Call a registered MCP tool by name with the provided arguments.
|
| 81 |
+
Args:
|
| 82 |
+
tool_name: Name of the tool.
|
| 83 |
+
**kwargs: Keyword arguments matching the tool signature.
|
| 84 |
+
Returns:
|
| 85 |
+
The JSON response from the MCP server as a Python object.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
base_url = modal.Function.from_name("tool_name", "mcp_server").get_web_url()
|
| 89 |
+
client = BasicMCPClient(base_url)
|
| 90 |
+
response = await client.call_tool(tool_name, kwargs)
|
| 91 |
+
return response
|
manager_agent.py
ADDED
|
@@ -0,0 +1,663 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from typing import Optional, Dict, Any, List, Generator, Callable
|
| 5 |
+
from models import TaskPrompt, MCPToolSpec, MCPExecutionResult
|
| 6 |
+
from components import (
|
| 7 |
+
WebAgent,
|
| 8 |
+
ScriptGenerator,
|
| 9 |
+
CodeRunner,
|
| 10 |
+
Registry,
|
| 11 |
+
Brainstormer,
|
| 12 |
+
)
|
| 13 |
+
from llama_index.core.llms import LLM
|
| 14 |
+
from llama_index.core.agent import ReActAgent
|
| 15 |
+
from llama_index.core.tools import FunctionTool
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Load environment variables from .env file
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
class ManagerAgent:
|
| 22 |
+
"""
|
| 23 |
+
The central orchestrator of the Alita agent - Revised for Gradio integration.
|
| 24 |
+
|
| 25 |
+
Workflow:
|
| 26 |
+
1. Analyze user prompt to understand the request
|
| 27 |
+
2. Check existing tools in registry first
|
| 28 |
+
3. If research needed, formulate search queries and use WebAgent
|
| 29 |
+
4. If tool needed but not found, brainstorm new tool requirements
|
| 30 |
+
5. Search for open source tools/solutions via WebAgent
|
| 31 |
+
6. Create implementation plan via Brainstormer
|
| 32 |
+
7. Return comprehensive response
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, llm: LLM, max_iterations: int = 10000000, update_callback: Optional[Callable[[str], None]] = None):
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
self.llm = llm
|
| 39 |
+
self.registry = Registry()
|
| 40 |
+
self.web_agent = WebAgent(llm=llm, max_research_iterations=10000000)
|
| 41 |
+
self.code_runner = CodeRunner()
|
| 42 |
+
self.brainstormer = Brainstormer(model_name="claude-sonnet-4-0")
|
| 43 |
+
self.script_generator = ScriptGenerator(task_prompt="", claude_api_key=os.getenv("CLAUDE_API_KEY", ""))
|
| 44 |
+
self.max_iterations = max_iterations
|
| 45 |
+
self.update_callback = update_callback
|
| 46 |
+
|
| 47 |
+
# Define the tools available to the internal LlamaIndex Agent
|
| 48 |
+
self._agent_tools = self._define_agent_tools()
|
| 49 |
+
|
| 50 |
+
# Initialize the internal LlamaIndex ReAct Agent with improved system prompt
|
| 51 |
+
self.agent = ReActAgent.from_tools(
|
| 52 |
+
tools=self._agent_tools,
|
| 53 |
+
llm=self.llm,
|
| 54 |
+
verbose=True,
|
| 55 |
+
system_prompt=self._get_system_prompt(),
|
| 56 |
+
max_iterations=self.max_iterations, # Use the configurable max_iterations parameter
|
| 57 |
+
temperature=0.2 # Lower temperature for more focused responses
|
| 58 |
+
)
|
| 59 |
+
print("🤖 ManagerAgent initialized with ReActAgent and enhanced workflow (temperature=0.2).")
|
| 60 |
+
|
| 61 |
+
def send_update(self, message: str) -> None:
|
| 62 |
+
"""
|
| 63 |
+
Send an update message to the user about the agent's progress.
|
| 64 |
+
"""
|
| 65 |
+
if not any(emoji in message[:2] for emoji in ["📢", "🔄", "✅", "❌", "⚠️", "💬", "🔍", "🚀", "✨"]):
|
| 66 |
+
message = f"📢 {message}"
|
| 67 |
+
|
| 68 |
+
print(f"📣 AGENT: ManagerAgent.send_update CALLED with message: {message}") # DEBUG
|
| 69 |
+
print(f"📣 AGENT: self.update_callback is: {self.update_callback}") # DEBUG
|
| 70 |
+
|
| 71 |
+
if self.update_callback:
|
| 72 |
+
try:
|
| 73 |
+
self.update_callback(message) # This should call update_status_callback in app.py
|
| 74 |
+
print(f"📣 AGENT: Callback invoked successfully.") # DEBUG
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"❌ AGENT: Error sending update via callback: {e}")
|
| 77 |
+
import traceback
|
| 78 |
+
traceback.print_exc()
|
| 79 |
+
else:
|
| 80 |
+
print("📣 AGENT: No update_callback configured for ManagerAgent.") # DEBUG
|
| 81 |
+
# Return a string confirmation, as ReAct tools often expect a string output
|
| 82 |
+
return f"Update sent: {message}" # MODIFICATION: Return a string
|
| 83 |
+
|
| 84 |
+
def _get_system_prompt(self) -> str:
|
| 85 |
+
"""Enhanced system prompt for better workflow orchestration"""
|
| 86 |
+
return """You are ALITA, an advanced generalist agent. You are here to help people with their requests. You can do many tasks like research, tool creation, automation, analysis, and much more. What is unique about you is that you can create tools to help people with their requests, even if they are not in your capabilities.
|
| 87 |
+
|
| 88 |
+
Your primary workflow for ANY user request:
|
| 89 |
+
|
| 90 |
+
1. **ANALYZE PHASE**:
|
| 91 |
+
* Understand the user's request deeply.
|
| 92 |
+
* Identify if it's: an information request, a tool request, task automation, research, or creative work.
|
| 93 |
+
* Decide whether to answer the request directly, create a new tool, or perform web research.
|
| 94 |
+
* If you decide to answer directly, provide your answer right away.
|
| 95 |
+
* If you decide to perform web research, use the `perform_web_research` tool with specific queries. Inform the user you are starting research before taking this action.
|
| 96 |
+
* If the task requires more than simple text generation or basic web research, proceed to check for existing tools.
|
| 97 |
+
* Use `send_user_update` to inform the user about what you're doing and your progress if you don't answer directly.
|
| 98 |
+
* Do not apologize for not being able to answer the prompt until you have attempted all subsequent steps (EXISTING TOOLS CHECK, TOOL ANALYSIS PHASE, RESEARCH PHASE, TOOL CREATION PHASE). If all fail, then apologize.
|
| 99 |
+
|
| 100 |
+
2. **EXISTING TOOLS CHECK**:
|
| 101 |
+
* ALWAYS first use `get_available_tools` to list all tools in your registry.
|
| 102 |
+
* If suitable tools exist but are not deployed (check their 'state'), use `deploy_tool` to activate them.
|
| 103 |
+
* Once tools are active and deployed, use `use_registry_tool` to execute them with the necessary inputs.
|
| 104 |
+
* Keep the user informed of your progress with `send_user_update`.
|
| 105 |
+
|
| 106 |
+
3. **TOOL ANALYSIS PHASE**:
|
| 107 |
+
* If you need to determine whether existing tools are sufficient or new tools are needed, use `brainstorm_tools`.
|
| 108 |
+
* Provide the `brainstorm_tools` function with the `user_task` and the `available_tools` (a comma-separated string of tool names from `get_available_tools`).
|
| 109 |
+
* If there are no tools available, provide "none" as the input for `available_tools` to the `brainstorm_tools` function.
|
| 110 |
+
* Follow the recommendations from the brainstorming phase.
|
| 111 |
+
* Send an update to the user with `send_user_update` about your findings.
|
| 112 |
+
|
| 113 |
+
4. **RESEARCH PHASE** (if needed for information or tool creation):
|
| 114 |
+
* Use the `perform_web_research` tool for all web-based information gathering.
|
| 115 |
+
* For general information or in-depth research on a topic, provide a clear query to `perform_web_research`.
|
| 116 |
+
* If you are looking for open-source code, libraries, or technical solutions (including from GitHub), instruct `perform_web_research` in your query to focus on finding code examples or repositories. For instance: "perform_web_research: Find Python code snippets for parsing CSV files from GitHub."
|
| 117 |
+
* Send updates to the user with `send_user_update` about your research progress.
|
| 118 |
+
|
| 119 |
+
5. **TOOL CREATION PHASE** (if no existing tool works or can be adapted):
|
| 120 |
+
* First, use `brainstorm_tools` to define the specifications of the new tool needed.
|
| 121 |
+
* Next, use `perform_web_research` to find existing open-source solutions, code examples, or libraries that can help build the tool. Be specific in your query to `perform_web_research` about looking for implementation details.
|
| 122 |
+
* Then, use `generate_mcp_script` to create the Python code and environment script for the tool, using the specification from `brainstorm_tools` and insights from your research.
|
| 123 |
+
* Finally, use `execute_and_register_mcp` to test the new tool in a safe environment and, if successful, register it in your tool registry.
|
| 124 |
+
* Keep the user informed of your progress with `send_user_update`.
|
| 125 |
+
|
| 126 |
+
6. **EXECUTION PHASE** (after a tool is ready, either existing or newly created):
|
| 127 |
+
* Ensure the required tool is deployed using `deploy_tool` if it's not already active.
|
| 128 |
+
* Use `use_registry_tool` to run the active tool with the appropriate inputs.
|
| 129 |
+
* Provide comprehensive results with explanations.
|
| 130 |
+
* Send a final update to the user with `send_user_update` about the results.
|
| 131 |
+
|
| 132 |
+
**Key Principles**:
|
| 133 |
+
* Be proactive in tool discovery and creation.
|
| 134 |
+
* Always search for existing solutions before creating new ones.
|
| 135 |
+
* Provide detailed explanations of your reasoning process.
|
| 136 |
+
* Focus on practical, actionable results.
|
| 137 |
+
* Leverage open-source resources extensively via `perform_web_research`.
|
| 138 |
+
* Keep the user informed of your progress with regular updates using `send_user_update`.
|
| 139 |
+
|
| 140 |
+
**Tool Management Capabilities**:
|
| 141 |
+
* Use `get_available_tools` to see all tools in your registry.
|
| 142 |
+
* Use `brainstorm_tools` to analyze if existing tools are sufficient or new ones are needed.
|
| 143 |
+
* Check tool 'state' from `get_available_tools` to determine if they are active ('activated' or similar) or inactive.
|
| 144 |
+
* Use `deploy_tool` to activate any inactive tools before running them. Tools must be deployed before they can be executed by `use_registry_tool`.
|
| 145 |
+
|
| 146 |
+
**Response Style**:
|
| 147 |
+
* Structure your responses clearly with headers where appropriate.
|
| 148 |
+
* Explain what you're doing and why.
|
| 149 |
+
* Provide context and next steps.
|
| 150 |
+
* Be conversational but informative.
|
| 151 |
+
* Use `send_user_update` to keep the user informed throughout the process.
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
+
def _define_agent_tools(self) -> List[FunctionTool]:
|
| 155 |
+
"""Enhanced tool definition with better descriptions"""
|
| 156 |
+
tools = []
|
| 157 |
+
|
| 158 |
+
# User update tool
|
| 159 |
+
tools.append(
|
| 160 |
+
FunctionTool.from_defaults(
|
| 161 |
+
self.send_update,
|
| 162 |
+
name="send_user_update",
|
| 163 |
+
description="Send an update message to the user about your current progress or actions. Takes 'message' (string) containing the update information. Use this tool frequently to keep the user informed about what you're doing."
|
| 164 |
+
)
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Add research tool
|
| 168 |
+
tools.append(
|
| 169 |
+
FunctionTool.from_defaults(
|
| 170 |
+
self.research,
|
| 171 |
+
name="perform_web_research",
|
| 172 |
+
description="Performs comprehensive web research on a given topic. Takes 'query' (string) containing the research question or topic to investigate. Returns a detailed research report with findings and sources."
|
| 173 |
+
)
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# Get all available tools
|
| 177 |
+
tools.append(
|
| 178 |
+
FunctionTool.from_defaults(
|
| 179 |
+
self.get_available_tools,
|
| 180 |
+
name="get_available_tools",
|
| 181 |
+
description="Get a list of all tools currently available in the registry. Returns a list of tool specifications with names, descriptions, and states."
|
| 182 |
+
)
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Use a registered tool
|
| 186 |
+
tools.append(
|
| 187 |
+
FunctionTool.from_defaults(
|
| 188 |
+
self.use_registry_tool,
|
| 189 |
+
name="use_registry_tool",
|
| 190 |
+
description="Use a registered tool directly by invoking its endpoint. Takes 'tool_name' (string) and any additional arguments required by the tool. Automatically deploys the tool if needed. Returns the response from the tool."
|
| 191 |
+
)
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# Tool brainstorming
|
| 195 |
+
tools.append(
|
| 196 |
+
FunctionTool.from_defaults(
|
| 197 |
+
self.brainstorm_tools,
|
| 198 |
+
name="brainstorm_tools",
|
| 199 |
+
description="Analyze the user request against available tools to determine if existing tools are sufficient or new tools are needed. Takes 'user_task' (string) containing the user's request and optionally 'available_tools' (string) with comma-separated tool names. Returns recommendations on which tools to use or what new tools to create."
|
| 200 |
+
)
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Deploy a specific tool
|
| 204 |
+
tools.append(
|
| 205 |
+
FunctionTool.from_defaults(
|
| 206 |
+
self.deploy_tool,
|
| 207 |
+
name="deploy_tool",
|
| 208 |
+
description="Deploy and activate a specific tool from the registry. Takes 'tool_name' (string) containing the name of the tool to deploy. Returns the URL of the deployed tool if successful, or an error message if deployment fails."
|
| 209 |
+
)
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Add analysis tool for better decision making
|
| 213 |
+
tools.append(
|
| 214 |
+
FunctionTool.from_defaults(
|
| 215 |
+
self._analyze_user_request,
|
| 216 |
+
name="analyze_user_request",
|
| 217 |
+
description="Analyze user request to determine the best approach (research, existing tool, new tool creation). Takes 'user_message' (string). Returns analysis with recommended actions."
|
| 218 |
+
)
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
return tools
|
| 222 |
+
|
| 223 |
+
def _analyze_user_request(self, user_message: str) -> Dict[str, Any]:
|
| 224 |
+
"""Analyze user request to determine optimal workflow path"""
|
| 225 |
+
analysis = {
|
| 226 |
+
"request_type": "unknown",
|
| 227 |
+
"complexity": "medium",
|
| 228 |
+
"requires_research": False,
|
| 229 |
+
"requires_tools": False,
|
| 230 |
+
"suggested_approach": [],
|
| 231 |
+
"key_concepts": []
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
message_lower = user_message.lower()
|
| 235 |
+
|
| 236 |
+
# Look for comprehensive research indicators
|
| 237 |
+
research_terms = ["comprehensive", "thorough", "in-depth", "detailed", "extensive",
|
| 238 |
+
"research", "investigate", "analyze", "report", "study"]
|
| 239 |
+
|
| 240 |
+
# Determine request type
|
| 241 |
+
if any(word in message_lower for word in research_terms):
|
| 242 |
+
analysis["request_type"] = "deep_research"
|
| 243 |
+
analysis["requires_research"] = True
|
| 244 |
+
analysis["complexity"] = "high"
|
| 245 |
+
analysis["suggested_approach"].append("research")
|
| 246 |
+
|
| 247 |
+
elif any(word in message_lower for word in ["recherche", "search", "find", "lookup", "information", "what is", "explain"]):
|
| 248 |
+
analysis["request_type"] = "information_request"
|
| 249 |
+
analysis["requires_research"] = True
|
| 250 |
+
analysis["suggested_approach"].append("web_search")
|
| 251 |
+
|
| 252 |
+
elif any(word in message_lower for word in ["outil", "tool", "script", "automatise", "automate", "create", "build"]):
|
| 253 |
+
analysis["request_type"] = "tool_request"
|
| 254 |
+
analysis["requires_tools"] = True
|
| 255 |
+
analysis["suggested_approach"].extend(["find_existing_tools", "brainstorm_if_needed"])
|
| 256 |
+
|
| 257 |
+
elif any(word in message_lower for word in ["analyse", "analyze", "process", "calculate", "compute"]):
|
| 258 |
+
analysis["request_type"] = "analysis_task"
|
| 259 |
+
analysis["requires_tools"] = True
|
| 260 |
+
analysis["suggested_approach"].extend(["find_existing_tools", "research_methods"])
|
| 261 |
+
|
| 262 |
+
elif any(word in message_lower for word in ["tendance", "trend", "market", "news", "current"]):
|
| 263 |
+
analysis["request_type"] = "research_task"
|
| 264 |
+
analysis["requires_research"] = True
|
| 265 |
+
analysis["complexity"] = "high"
|
| 266 |
+
analysis["suggested_approach"].extend(["web_search", "github_search"])
|
| 267 |
+
|
| 268 |
+
# Extract key concepts for better tool matching
|
| 269 |
+
concepts = []
|
| 270 |
+
tech_keywords = ["python", "javascript", "api", "database", "csv", "json", "web", "scraping", "ml", "ai"]
|
| 271 |
+
for keyword in tech_keywords:
|
| 272 |
+
if keyword in message_lower:
|
| 273 |
+
concepts.append(keyword)
|
| 274 |
+
analysis["key_concepts"] = concepts
|
| 275 |
+
|
| 276 |
+
return analysis
|
| 277 |
+
|
| 278 |
+
def _run_and_register_mcp(self, spec: Dict[str, Any], python_script: str, env_script: str, input_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 279 |
+
"""Enhanced MCP execution and registration with better error handling"""
|
| 280 |
+
print(f"🔧 ManagerAgent: Executing and registering MCP: {spec.get('name', 'Unnamed Tool')}")
|
| 281 |
+
|
| 282 |
+
try:
|
| 283 |
+
mcp_spec_obj = MCPToolSpec.from_dict(spec)
|
| 284 |
+
env_name_suffix = mcp_spec_obj.name.lower().replace(' ', '-')[:10]
|
| 285 |
+
env_name = f"alita-{env_name_suffix}-{uuid.uuid4().hex[:8]}"
|
| 286 |
+
|
| 287 |
+
print(f"🔄 Setting up environment: {env_name}")
|
| 288 |
+
env_success = self.code_runner.setup_environment(env_script, env_name)
|
| 289 |
+
|
| 290 |
+
if not env_success:
|
| 291 |
+
result = MCPExecutionResult(
|
| 292 |
+
success=False,
|
| 293 |
+
error_message=f"Environment setup failed for '{env_name}'. Check dependencies in env_script."
|
| 294 |
+
)
|
| 295 |
+
return result.to_dict()
|
| 296 |
+
|
| 297 |
+
print(f"▶️ Executing script in environment: {env_name}")
|
| 298 |
+
execution_result = self.code_runner.execute(python_script, env_name, input_data)
|
| 299 |
+
|
| 300 |
+
if execution_result.success:
|
| 301 |
+
print(f"✅ Script execution successful. Registering tool: {mcp_spec_obj.name}")
|
| 302 |
+
mcp_spec_obj.validated_script = python_script
|
| 303 |
+
mcp_spec_obj.environment_script = env_script
|
| 304 |
+
self.registry.register_tool(mcp_spec_obj)
|
| 305 |
+
print(f"🎯 Tool '{mcp_spec_obj.name}' successfully registered in registry")
|
| 306 |
+
|
| 307 |
+
# Add success message to result
|
| 308 |
+
execution_result.output_data = execution_result.output_data or {}
|
| 309 |
+
execution_result.output_data["registration_status"] = "Successfully registered"
|
| 310 |
+
|
| 311 |
+
else:
|
| 312 |
+
print(f"❌ Script execution failed for '{mcp_spec_obj.name}': {execution_result.error_message}")
|
| 313 |
+
|
| 314 |
+
# Always cleanup after validation
|
| 315 |
+
self.code_runner.cleanup_environment(env_name)
|
| 316 |
+
return execution_result.to_dict()
|
| 317 |
+
|
| 318 |
+
except Exception as e:
|
| 319 |
+
error_msg = f"Unexpected error in MCP execution: {str(e)}"
|
| 320 |
+
print(f"🚨 {error_msg}")
|
| 321 |
+
|
| 322 |
+
# Cleanup on error
|
| 323 |
+
try:
|
| 324 |
+
if 'env_name' in locals():
|
| 325 |
+
self.code_runner.cleanup_environment(env_name)
|
| 326 |
+
except:
|
| 327 |
+
pass
|
| 328 |
+
|
| 329 |
+
return MCPExecutionResult(success=False, error_message=error_msg).to_dict()
|
| 330 |
+
|
| 331 |
+
def _run_registered_mcp(self, tool_name: str, input_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 332 |
+
"""Enhanced registered tool execution with better logging"""
|
| 333 |
+
print(f"🎯 ManagerAgent: Running registered tool: {tool_name}")
|
| 334 |
+
|
| 335 |
+
spec = self.registry.get_tool(tool_name)
|
| 336 |
+
if not spec:
|
| 337 |
+
error_msg = f"Tool '{tool_name}' not found in registry. Available tools: {list(self.registry.tools.keys())}"
|
| 338 |
+
print(f"❌ {error_msg}")
|
| 339 |
+
return MCPExecutionResult(success=False, error_message=error_msg).to_dict()
|
| 340 |
+
|
| 341 |
+
if not spec.validated_script or not spec.environment_script:
|
| 342 |
+
error_msg = f"Tool '{tool_name}' missing validated script or environment configuration"
|
| 343 |
+
print(f"❌ {error_msg}")
|
| 344 |
+
return MCPExecutionResult(success=False, error_message=error_msg).to_dict()
|
| 345 |
+
|
| 346 |
+
# Create fresh environment for execution
|
| 347 |
+
env_name_suffix = spec.name.lower().replace(' ', '-')[:10]
|
| 348 |
+
env_name = f"alita-run-{env_name_suffix}-{uuid.uuid4().hex[:8]}"
|
| 349 |
+
|
| 350 |
+
try:
|
| 351 |
+
print(f"🔄 Setting up execution environment: {env_name}")
|
| 352 |
+
env_success = self.code_runner.setup_environment(spec.environment_script, env_name)
|
| 353 |
+
|
| 354 |
+
if not env_success:
|
| 355 |
+
return MCPExecutionResult(
|
| 356 |
+
success=False,
|
| 357 |
+
error_message=f"Failed to setup environment for tool '{tool_name}'"
|
| 358 |
+
).to_dict()
|
| 359 |
+
|
| 360 |
+
print(f"▶️ Executing registered tool: {tool_name}")
|
| 361 |
+
execution_result = self.code_runner.execute(spec.validated_script, env_name, input_data)
|
| 362 |
+
|
| 363 |
+
print(f"{'✅' if execution_result.success else '❌'} Tool execution completed. Success: {execution_result.success}")
|
| 364 |
+
|
| 365 |
+
return execution_result.to_dict()
|
| 366 |
+
|
| 367 |
+
except Exception as e:
|
| 368 |
+
error_msg = f"Error executing registered tool '{tool_name}': {str(e)}"
|
| 369 |
+
print(f"🚨 {error_msg}")
|
| 370 |
+
return MCPExecutionResult(success=False, error_message=error_msg).to_dict()
|
| 371 |
+
|
| 372 |
+
finally:
|
| 373 |
+
# Always cleanup
|
| 374 |
+
try:
|
| 375 |
+
self.code_runner.cleanup_environment(env_name)
|
| 376 |
+
except:
|
| 377 |
+
pass
|
| 378 |
+
|
| 379 |
+
def run_task(self, prompt: TaskPrompt) -> str:
|
| 380 |
+
"""
|
| 381 |
+
Enhanced task execution with detailed logging and structured workflow
|
| 382 |
+
Optimized for Gradio integration with comprehensive responses
|
| 383 |
+
"""
|
| 384 |
+
print(f"\n{'='*60}")
|
| 385 |
+
print(f"🚀 ALITA ManagerAgent: Starting task execution")
|
| 386 |
+
print(f"📝 User prompt: {prompt.text[:100]}{'...' if len(prompt.text) > 100 else ''}")
|
| 387 |
+
print(f"{'='*60}")
|
| 388 |
+
|
| 389 |
+
# Send initial update to the user
|
| 390 |
+
self.send_update(f"Starting to process your request: '{prompt.text[:50]}{'...' if len(prompt.text) > 50 else ''}'")
|
| 391 |
+
|
| 392 |
+
try:
|
| 393 |
+
# Use the internal ReAct agent to handle the complete workflow
|
| 394 |
+
print("🧠 Engaging ReAct Agent for intelligent task orchestration...")
|
| 395 |
+
|
| 396 |
+
# The ReAct agent will use its tools to:
|
| 397 |
+
# 1. Analyze the request
|
| 398 |
+
# 2. Search existing tools
|
| 399 |
+
# 3. Perform web research if needed
|
| 400 |
+
# 4. Brainstorm solutions
|
| 401 |
+
# 5. Create/execute tools as necessary
|
| 402 |
+
# 6. Provide comprehensive response
|
| 403 |
+
|
| 404 |
+
response = self.agent.chat(prompt.text)
|
| 405 |
+
|
| 406 |
+
print("✅ Task execution completed successfully")
|
| 407 |
+
print(f"{'='*60}\n")
|
| 408 |
+
|
| 409 |
+
# Send final update to the user
|
| 410 |
+
self.send_update("Task completed successfully! Here's your response.")
|
| 411 |
+
|
| 412 |
+
# Format response for better Gradio presentation
|
| 413 |
+
formatted_response = self._format_response_for_gradio(response.response)
|
| 414 |
+
return formatted_response
|
| 415 |
+
|
| 416 |
+
except Exception as e:
|
| 417 |
+
error_msg = f"🚨 ManagerAgent encountered an error during task execution:\n\n**Error Details:**\n{str(e)}\n\n**Next Steps:**\n- Check your API key and network connection\n- Verify all components are properly initialized\n- Try a simpler request to test basic functionality"
|
| 418 |
+
|
| 419 |
+
print(f"❌ Task execution failed: {e}")
|
| 420 |
+
print(f"{'='*60}\n")
|
| 421 |
+
|
| 422 |
+
# Send error update to the user
|
| 423 |
+
self.send_update(f"An error occurred while processing your request: {str(e)}")
|
| 424 |
+
|
| 425 |
+
return error_msg
|
| 426 |
+
|
| 427 |
+
def _format_response_for_gradio(self, response: str) -> str:
|
| 428 |
+
"""Format the agent response for better presentation in Gradio"""
|
| 429 |
+
|
| 430 |
+
# Add header if not present
|
| 431 |
+
if not response.startswith("##") and not response.startswith("#"):
|
| 432 |
+
response = f"## 🤖 {response}"
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
return response
|
| 437 |
+
|
| 438 |
+
def get_registry_status(self) -> Dict[str, Any]:
|
| 439 |
+
"""Get current status of the tool registry"""
|
| 440 |
+
return {
|
| 441 |
+
"total_tools": len(self.registry.tools),
|
| 442 |
+
"tool_names": list(self.registry.tools.keys()),
|
| 443 |
+
"registry_ready": len(self.registry.tools) > 0
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
def reset_registry(self):
|
| 447 |
+
"""Reset the tool registry (useful for testing)"""
|
| 448 |
+
self.registry = Registry()
|
| 449 |
+
print("🔄 Tool registry has been reset")
|
| 450 |
+
|
| 451 |
+
def __str__(self):
|
| 452 |
+
return f"ManagerAgent(llm={type(self.llm).__name__}, tools_registered={len(self.registry.tools)})"
|
| 453 |
+
|
| 454 |
+
def research(self, query: str, max_iterations: int = None, verbose: bool = None) -> str:
|
| 455 |
+
"""
|
| 456 |
+
Performs autonomous web research on the given query using the WebAgent's research function.
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
query: The research question or topic
|
| 460 |
+
max_iterations: Optional override for the maximum number of research iterations
|
| 461 |
+
verbose: Optional override for verbose mode
|
| 462 |
+
|
| 463 |
+
Returns:
|
| 464 |
+
A comprehensive textual report based on web research
|
| 465 |
+
"""
|
| 466 |
+
print(f"\n{'='*60}")
|
| 467 |
+
print(f"🌐 ALITA ManagerAgent: Starting web research")
|
| 468 |
+
print(f"📝 Research query: {query[:100]}{'...' if len(query) > 100 else ''}")
|
| 469 |
+
print(f"{'='*60}")
|
| 470 |
+
|
| 471 |
+
try:
|
| 472 |
+
# Configure WebAgent for this research session
|
| 473 |
+
if max_iterations is not None:
|
| 474 |
+
self.web_agent.max_research_iterations = max_iterations
|
| 475 |
+
|
| 476 |
+
if verbose is not None:
|
| 477 |
+
self.web_agent.verbose = verbose
|
| 478 |
+
|
| 479 |
+
# Perform the research
|
| 480 |
+
print("🔍 Initiating autonomous web research. This may take some time... here is the query: ", query)
|
| 481 |
+
report = self.web_agent.research(query)
|
| 482 |
+
print("🔍 here is the report: ", report)
|
| 483 |
+
|
| 484 |
+
print("✅ Research completed successfully")
|
| 485 |
+
print(f"{'='*60}\n")
|
| 486 |
+
|
| 487 |
+
return report
|
| 488 |
+
|
| 489 |
+
except Exception as e:
|
| 490 |
+
error_msg = f"🚨 Error during web research: {str(e)}"
|
| 491 |
+
print(f"❌ Research failed: {e}")
|
| 492 |
+
print(f"{'='*60}\n")
|
| 493 |
+
|
| 494 |
+
import traceback
|
| 495 |
+
print(traceback.format_exc())
|
| 496 |
+
|
| 497 |
+
return error_msg
|
| 498 |
+
|
| 499 |
+
def get_available_tools(self) -> List[Dict[str, Any]]:
|
| 500 |
+
"""
|
| 501 |
+
Get a list of all tools currently available in the registry.
|
| 502 |
+
|
| 503 |
+
Returns:
|
| 504 |
+
List of dictionaries containing tool information (name, description, state)
|
| 505 |
+
"""
|
| 506 |
+
print("📋 ManagerAgent: Retrieving list of all available tools")
|
| 507 |
+
tools = self.registry.list_tools()
|
| 508 |
+
|
| 509 |
+
# Format the tools for easier consumption by the agent
|
| 510 |
+
formatted_tools = []
|
| 511 |
+
for tool in tools:
|
| 512 |
+
formatted_tools.append({
|
| 513 |
+
"name": tool.name,
|
| 514 |
+
"description": tool.description,
|
| 515 |
+
"state": getattr(tool, "state", "unknown"),
|
| 516 |
+
"input_schema": tool.input_schema if hasattr(tool, "input_schema") else {},
|
| 517 |
+
"output_schema": tool.output_schema if hasattr(tool, "output_schema") else {}
|
| 518 |
+
})
|
| 519 |
+
|
| 520 |
+
print(f"🔍 Found {len(formatted_tools)} tools in registry")
|
| 521 |
+
return formatted_tools
|
| 522 |
+
|
| 523 |
+
def deploy_tool(self, tool_name: str) -> Dict[str, Any]:
|
| 524 |
+
"""
|
| 525 |
+
Deploy and activate a specific tool from the registry.
|
| 526 |
+
|
| 527 |
+
Args:
|
| 528 |
+
tool_name: Name of the tool to deploy
|
| 529 |
+
|
| 530 |
+
Returns:
|
| 531 |
+
Dictionary with deployment status and URL (if successful)
|
| 532 |
+
"""
|
| 533 |
+
print(f"🚀 ManagerAgent: Deploying tool '{tool_name}'")
|
| 534 |
+
|
| 535 |
+
# Check if tool exists in registry
|
| 536 |
+
if not self.registry.get_tool(tool_name):
|
| 537 |
+
error_msg = f"Tool '{tool_name}' not found in registry"
|
| 538 |
+
print(f"❌ {error_msg}")
|
| 539 |
+
return {"success": False, "error": error_msg}
|
| 540 |
+
|
| 541 |
+
# Attempt to deploy the tool
|
| 542 |
+
try:
|
| 543 |
+
url = self.registry.deploy_tool(tool_name)
|
| 544 |
+
|
| 545 |
+
if url:
|
| 546 |
+
print(f"✅ Successfully deployed tool '{tool_name}' at {url}")
|
| 547 |
+
return {
|
| 548 |
+
"success": True,
|
| 549 |
+
"tool_name": tool_name,
|
| 550 |
+
"url": url,
|
| 551 |
+
"message": f"Tool '{tool_name}' successfully deployed"
|
| 552 |
+
}
|
| 553 |
+
else:
|
| 554 |
+
error_msg = f"Failed to deploy tool '{tool_name}'"
|
| 555 |
+
print(f"❌ {error_msg}")
|
| 556 |
+
return {"success": False, "error": error_msg}
|
| 557 |
+
|
| 558 |
+
except Exception as e:
|
| 559 |
+
error_msg = f"Error deploying tool '{tool_name}': {str(e)}"
|
| 560 |
+
print(f"🚨 {error_msg}")
|
| 561 |
+
return {"success": False, "error": error_msg}
|
| 562 |
+
|
| 563 |
+
def brainstorm_tools(self, user_task: str, available_tools: str = "") -> Dict[str, Any]:
|
| 564 |
+
"""
|
| 565 |
+
Use the Brainstormer to analyze if existing tools are sufficient or new tools are needed.
|
| 566 |
+
|
| 567 |
+
Args:
|
| 568 |
+
user_task: The user's request or task
|
| 569 |
+
available_tools: Optional comma-separated list of available tool names
|
| 570 |
+
|
| 571 |
+
Returns:
|
| 572 |
+
Dictionary with tool recommendations or specifications for new tools
|
| 573 |
+
"""
|
| 574 |
+
print(f"🧠 ManagerAgent: Brainstorming tools for task: {user_task[:100]}{'...' if len(user_task) > 100 else ''}")
|
| 575 |
+
|
| 576 |
+
# If available_tools is not provided, get them from the registry
|
| 577 |
+
if not available_tools:
|
| 578 |
+
tools = self.get_available_tools()
|
| 579 |
+
available_tools = ", ".join([tool["name"] for tool in tools])
|
| 580 |
+
|
| 581 |
+
try:
|
| 582 |
+
# Call the brainstormer to analyze the task and available tools
|
| 583 |
+
result = self.brainstormer.generate_mcp_specs_to_fulfill_user_task(
|
| 584 |
+
task=user_task,
|
| 585 |
+
tools_list=available_tools
|
| 586 |
+
)
|
| 587 |
+
|
| 588 |
+
if isinstance(result, dict) and "error" in result:
|
| 589 |
+
print(f"❌ Brainstorming failed: {result['error']}")
|
| 590 |
+
return {
|
| 591 |
+
"success": False,
|
| 592 |
+
"error": result["error"],
|
| 593 |
+
"recommendations": "Unable to analyze tools for this task."
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
print(f"✅ Brainstorming complete. Found {len(result)} tool recommendations.")
|
| 597 |
+
|
| 598 |
+
# Format the result for better consumption by the agent
|
| 599 |
+
return {
|
| 600 |
+
"success": True,
|
| 601 |
+
"recommendations": result,
|
| 602 |
+
"summary": f"Analysis complete. Found {len(result)} tool recommendations."
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
except Exception as e:
|
| 606 |
+
error_msg = f"Error during tool brainstorming: {str(e)}"
|
| 607 |
+
print(f"🚨 {error_msg}")
|
| 608 |
+
return {
|
| 609 |
+
"success": False,
|
| 610 |
+
"error": error_msg,
|
| 611 |
+
"recommendations": "Unable to analyze tools due to an error."
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
def use_registry_tool(self, tool_name: str, *args, **kwargs) -> Dict[str, Any]:
|
| 615 |
+
"""
|
| 616 |
+
Use a registered tool directly by invoking its endpoint.
|
| 617 |
+
|
| 618 |
+
This method utilizes the Registry's use_tool method to invoke a registered tool.
|
| 619 |
+
It handles tool deployment if needed and provides proper error handling and user feedback.
|
| 620 |
+
|
| 621 |
+
Args:
|
| 622 |
+
tool_name: Name of the tool to use
|
| 623 |
+
*args: Positional arguments to pass to the tool
|
| 624 |
+
**kwargs: Keyword arguments to pass to the tool
|
| 625 |
+
|
| 626 |
+
Returns:
|
| 627 |
+
The response from the tool as a Python object
|
| 628 |
+
"""
|
| 629 |
+
try:
|
| 630 |
+
# Send update to user
|
| 631 |
+
self.send_update(f"Using tool: {tool_name}")
|
| 632 |
+
|
| 633 |
+
# Check if tool exists in registry
|
| 634 |
+
if not self.registry.get_tool(tool_name):
|
| 635 |
+
error_msg = f"Tool '{tool_name}' not found in registry"
|
| 636 |
+
self.send_update(error_msg)
|
| 637 |
+
return {"error": error_msg, "success": False}
|
| 638 |
+
|
| 639 |
+
# Use the tool via Registry's use_tool method
|
| 640 |
+
self.send_update(f"Executing tool: {tool_name}")
|
| 641 |
+
result = self.registry.use_tool(tool_name, *args, **kwargs)
|
| 642 |
+
|
| 643 |
+
# Send success update
|
| 644 |
+
self.send_update(f"Tool '{tool_name}' executed successfully")
|
| 645 |
+
|
| 646 |
+
# Return result with success flag
|
| 647 |
+
if isinstance(result, dict):
|
| 648 |
+
result["success"] = True
|
| 649 |
+
return result
|
| 650 |
+
else:
|
| 651 |
+
return {"result": result, "success": True}
|
| 652 |
+
|
| 653 |
+
except ValueError as e:
|
| 654 |
+
# Handle expected errors (tool not found, deployment failed)
|
| 655 |
+
error_msg = str(e)
|
| 656 |
+
self.send_update(f"Error: {error_msg}")
|
| 657 |
+
return {"error": error_msg, "success": False}
|
| 658 |
+
|
| 659 |
+
except Exception as e:
|
| 660 |
+
# Handle unexpected errors
|
| 661 |
+
error_msg = f"Unexpected error using tool '{tool_name}': {str(e)}"
|
| 662 |
+
self.send_update(f"Error: {error_msg}")
|
| 663 |
+
return {"error": error_msg, "success": False}
|
models/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .mcp_tool_spec import MCPToolSpec
|
| 2 |
+
from .mcp_execution_result import MCPExecutionResult
|
| 3 |
+
from .task_prompt import TaskPrompt
|
models/mcp_execution_result.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import Optional, Dict, Any
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class MCPExecutionResult:
|
| 6 |
+
"""
|
| 7 |
+
Represents the result of executing an MCP tool script.
|
| 8 |
+
"""
|
| 9 |
+
success: bool
|
| 10 |
+
output: Optional[Dict[str, Any]] = None # Structured output if successful
|
| 11 |
+
logs: Optional[str] = None # Combined stdout/stderr
|
| 12 |
+
error_message: Optional[str] = None # Error details if execution failed
|
| 13 |
+
return_code: Optional[int] = None # Process return code
|
| 14 |
+
|
| 15 |
+
# Add methods to convert to/from dictionary for serialization
|
| 16 |
+
def to_dict(self):
|
| 17 |
+
return self.__dict__
|
| 18 |
+
|
| 19 |
+
@classmethod
|
| 20 |
+
def from_dict(cls, data: Dict[str, Any]):
|
| 21 |
+
return cls(**data)
|
models/mcp_tool_spec.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
@dataclass
|
| 4 |
+
class MCPToolSpec:
|
| 5 |
+
name: str
|
| 6 |
+
input_schema: dict
|
| 7 |
+
output_schema: dict
|
| 8 |
+
description: str
|
| 9 |
+
pseudo_code: str
|
| 10 |
+
source_hint: str
|
| 11 |
+
state: str
|
models/task_prompt.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, Optional
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class TaskPrompt:
|
| 6 |
+
"""
|
| 7 |
+
Represents a task prompt with its associated metadata and content.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
text: str
|
| 11 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 12 |
+
|
| 13 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 14 |
+
"""Convert the TaskPrompt to a dictionary."""
|
| 15 |
+
return {
|
| 16 |
+
"text": self.content,
|
| 17 |
+
"metadata": self.metadata or {}
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
@classmethod
|
| 21 |
+
def from_dict(cls, data: Dict[str, Any]) -> 'TaskPrompt':
|
| 22 |
+
"""Create a TaskPrompt from a dictionary."""
|
| 23 |
+
return cls(
|
| 24 |
+
content=data["text"],
|
| 25 |
+
metadata=data.get("metadata")
|
| 26 |
+
)
|