import modal import os # Create Modal app app = modal.App("alita-chat-app") # Define the image with all required dependencies image = ( modal.Image.debian_slim(python_version="3.11") .pip_install([ "gradio>=4.0.0", "llama-index-core", "llama-index-llms-anthropic", "python-dotenv", "openai", "llama-index", "anthropic", "requests", "dataclasses", "beautifulsoup4", "duckduckgo-search", "llama-index-tools-duckduckgo" ]) # Main script .add_local_file("manager_agent2.py", "/app/manager_agent2.py") # Models .add_local_file("models/__init__.py", "/app/models/__init__.py") .add_local_file("models/mcp_tool_spec.py", "/app/models/mcp_tool_spec.py") .add_local_file("models/mcp_execution_result.py", "/app/models/mcp_execution_result.py") .add_local_file("models/task_prompt.py", "/app/models/task_prompt.py") # Components .add_local_file("components/__init__.py", "/app/components/__init__.py") .add_local_file("components/mcp_brainstormer.py", "/app/components/mcp_brainstormer.py") .add_local_file("components/web_agent.py", "/app/components/web_agent.py") .add_local_file("components/script_generator.py", "/app/components/script_generator.py") .add_local_file("components/code_runner.py", "/app/components/code_runner.py") .add_local_file("components/mcp_registry.py", "/app/components/mcp_registry.py") ) # Global variables to store initialized components llm = None manager_agent = None @app.function( image=image, secrets=[modal.Secret.from_name("anthropic")], max_containers=10, timeout=300, min_containers=1, cpu=2, memory=2048 ) def initialize_components(): """Initialize LLM and Manager Agent""" global llm, manager_agent import sys sys.path.append("/app") try: # Import required modules from llama_index.core import Settings from llama_index.llms.anthropic import Anthropic from models import TaskPrompt from manager_agent import ManagerAgent # Get API key from environment api_key = os.environ.get("ANTHROPIC_API_KEY") if not api_key: raise ValueError("ANTHROPIC_API_KEY not found in environment variables") # Initialize LLM llm = Anthropic(model="claude-3-5-sonnet-20241022", api_key=api_key) Settings.llm = llm print("Successfully initialized LlamaIndex with Anthropic model") # Initialize the ManagerAgent manager_agent = ManagerAgent(llm) print("✅ ManagerAgent initialized successfully") return True except Exception as e: print(f"Error initializing components: {e}") import traceback traceback.print_exc() return False @app.function( image=image, secrets=[modal.Secret.from_name("anthropic-api-key")], max_containers=10, timeout=60, min_containers=1, cpu=2, memory=2048 ) def process_message(message: str): """Process a single message through the ManagerAgent""" import sys sys.path.append("/app") try: from models import TaskPrompt from manager_agent import ManagerAgent from llama_index.core import Settings from llama_index.llms.anthropic import Anthropic # Initialize components if needed api_key = os.environ.get("ANTHROPIC_API_KEY") if not api_key: return "❌ ANTHROPIC_API_KEY not found in environment variables" llm = Anthropic(model="claude-3-5-sonnet-20241022", api_key=api_key) Settings.llm = llm manager_agent = ManagerAgent(llm) # Process the message task_prompt = TaskPrompt(text=message) response = manager_agent.run_task(task_prompt) return response except Exception as e: import traceback error_msg = f"❌ Error processing message: {str(e)}\n{traceback.format_exc()}" print(error_msg) return error_msg # FIXED: Simple web server approach @app.function( image=image, secrets=[modal.Secret.from_name("anthropic-api-key")], max_containers=10, timeout=300, min_containers=1, cpu=2, memory=2048 ) @modal.web_server(port=7860, startup_timeout=180) def gradio_app(): """Simple Gradio app without complex initialization""" import gradio as gr import asyncio async def chat_function(message, history): """Simple chat function that calls the Modal function""" try: # Call the Modal function to process the message response = process_message.remote(message) # Stream the response word by word for better UX words = response.split() partial_response = "" for i, word in enumerate(words): partial_response += word + " " if i % 3 == 0 or i == len(words) - 1: yield partial_response.strip() await asyncio.sleep(0.01) except Exception as e: yield f"❌ Error: {str(e)}" # Create simple Gradio interface interface = gr.ChatInterface( fn=chat_function, type="messages", title="ALITA", description="ALITA: the self learning AI", examples=[ "🔍 search for information about AI", "🛠️ Analyse this csv file", "⚡ Generate a script to automate a repetitive task", "🌐 Find open source resources for machine learning", ], theme="soft" ) # Launch the interface with Modal-compatible settings interface.launch( server_name="0.0.0.0", # Must bind to all interfaces for Modal server_port=7840, # Must match the port in @modal.web_server share=False, # Don't create public links quiet=True, # Reduce logging noise show_error=True, prevent_thread_lock=True # Important: prevents blocking Modal's event loop ) # For local development and testing if __name__ == "__main__": app.deploy("alita-chat-app")