Spaces:

ryding
/

HistoPath

Build error

App Files Files Community

ryanDing26 commited on 19 days ago

Commit

f2a52eb

1 Parent(s): 127ace6

App release

Browse files

Files changed (36) hide show

LICENSE +21 -0
README.md +33 -7
app.py +602 -0
histopath/__init__.py +0 -0
histopath/__pycache__/__init__.cpython-311.pyc +0 -0
histopath/__pycache__/env_desc.cpython-311.pyc +0 -0
histopath/__pycache__/llm.cpython-311.pyc +0 -0
histopath/__pycache__/utils.cpython-311.pyc +0 -0
histopath/agent/__init__.py +1 -0
histopath/agent/__pycache__/__init__.cpython-311.pyc +0 -0
histopath/agent/__pycache__/agent.cpython-311.pyc +0 -0
histopath/agent/agent.py +705 -0
histopath/config.py +91 -0
histopath/llm.py +235 -0
histopath/model/__init__.py +0 -0
histopath/model/__pycache__/__init__.cpython-311.pyc +0 -0
histopath/model/__pycache__/retriever.cpython-311.pyc +0 -0
histopath/model/retriever.py +127 -0
histopath/retriever_benchmark.py +101 -0
histopath/tool/__init__.py +1 -0
histopath/tool/__pycache__/__init__.cpython-311.pyc +0 -0
histopath/tool/__pycache__/pathology.cpython-311.pyc +0 -0
histopath/tool/__pycache__/support_tools.cpython-311.pyc +0 -0
histopath/tool/__pycache__/tool_registry.cpython-311.pyc +0 -0
histopath/tool/pathology.py +458 -0
histopath/tool/support_tools.py +66 -0
histopath/tool/tool_description/__pycache__/pathology.cpython-311.pyc +0 -0
histopath/tool/tool_description/__pycache__/support_tools.cpython-311.pyc +0 -0
histopath/tool/tool_description/pathology.py +156 -0
histopath/tool/tool_description/support_tools.py +30 -0
histopath/tool/tool_registry.py +84 -0
histopath/utils.py +722 -0
histopath_env/environment.yml +33 -0
histopath_env/histo_env.yml +32 -0
histopath_env/setup.sh +108 -0
requirements.txt +45 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Ryan Ding
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,14 +1,40 @@
 ---
-title: HistoPath
-emoji: 📚
-colorFrom: green
-colorTo: red
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: aging-based pathology agent
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: HistoPath Agent
+emoji: 🔬
+colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
+models:
+  - paige-ai/Virchow2
+  - paige-ai/Prism
 ---
+# 🔬 HistoPath Agent
+AI-Powered Histopathology Analysis Assistant for whole slide image analysis, segmentation, and captioning.
+## Features
+- 📸 Whole Slide Image (WSI) Captioning
+- 🔍 Cell and Tissue Segmentation
+- 🏷️ Zero-Shot Classification
+- 📊 Quantitative Analysis (TILs, Fibrosis)
+## Usage
+1. Enter the passcode to access the application
+2. Upload your histopathology image (.svs, .png, .jpg, .tif)
+3. Enter your analysis request
+4. View results in the Images and Data tabs
+## Environment Variables
+Set these in your Hugging Face Spaces secrets:
+- `GRADIO_PASSWORD`: Access passcode
+- `HUGGINGFACE_ACCESS_TOKEN`: For accessing gated models
+- `ANTHROPIC_API_KEY`: For Claude LLM (optional)
+- `OPENAI_API_KEY`: For OpenAI models (optional)
+## Credits
+Built with LazySlide, LangChain, and Gradio

app.py ADDED Viewed

	@@ -0,0 +1,602 @@

+import os
+import re
+import shutil
+import traceback
+import gradio as gr
+from pathlib import Path
+from histopath.agent import A1
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Get passcode from environment
+PASSCODE = os.getenv("GRADIO_PASSWORD")
+# Initialize agent (will be created after passcode validation)
+agent = None
+def check_for_output_files():
+    """Check for all files in the output directory and return their paths."""
+    output_dir = Path("./output")
+    if not output_dir.exists():
+        return [], []
+    image_extensions = {".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff"}
+    data_extensions = {".csv", ".txt", ".json", ".npy"}
+    images = []
+    data_files = []
+    for file in output_dir.iterdir():
+        if file.is_file():
+            if file.suffix.lower() in image_extensions:
+                images.append(str(file))
+            elif file.suffix.lower() in data_extensions:
+                data_files.append(str(file))
+    return images, data_files
+def preview_uploaded_file(uploaded_file):
+    """Preview the uploaded file - show image or file info."""
+    if uploaded_file is None:
+        return None, None, "No file uploaded"
+    file_path = Path(uploaded_file.name)
+    file_ext = file_path.suffix.lower()
+    image_extensions = {".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff", ".svs"}
+    if file_ext in image_extensions:
+        # Show image preview
+        return uploaded_file.name, None, f"📷 Previewing: {file_path.name}"
+    else:
+        # Show file info
+        file_size = Path(uploaded_file.name).stat().st_size / 1024  # KB
+        return None, uploaded_file.name, f"📄 File: {file_path.name} ({file_size:.1f} KB)"
+def parse_agent_output(output):
+    """Parse agent output to extract code blocks, observations, and regular text."""
+    # Strip out the message divider bars
+    output = re.sub(r'={30,}\s*(Human|Ai)\s+Message\s*={30,}', '', output)
+    output = output.strip()
+    parsed = {
+        "type": "text",
+        "content": output,
+        "code": None,
+        "observation": None,
+        "thinking": None
+    }
+    # Check for code execution block
+    execute_match = re.search(r'<execute>(.*?)</execute>', output, re.DOTALL)
+    if execute_match:
+        parsed["type"] = "code"
+        parsed["code"] = execute_match.group(1).strip()
+        # Extract text before the code block (thinking/explanation)
+        text_before = output[:execute_match.start()].strip()
+        # Remove any think tags but keep the content
+        text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
+        text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
+        parsed["thinking"] = text_before if text_before else None
+        return parsed
+    # Check for observation block
+    observation_match = re.search(r'<observation>(.*?)</observation>', output, re.DOTALL)
+    if observation_match:
+        parsed["type"] = "observation"
+        parsed["observation"] = observation_match.group(1).strip()
+        # Extract text before observation if any
+        text_before = output[:observation_match.start()].strip()
+        text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
+        text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
+        parsed["thinking"] = text_before if text_before else None
+        return parsed
+    # Check for solution block
+    solution_match = re.search(r'<solution>(.*?)</solution>', output, re.DOTALL)
+    if solution_match:
+        parsed["type"] = "solution"
+        parsed["content"] = solution_match.group(1).strip()
+        # Get thinking before solution
+        text_before = output[:solution_match.start()].strip()
+        text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
+        text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
+        parsed["thinking"] = text_before if text_before else None
+        return parsed
+    # Clean up any remaining tags for display
+    cleaned = re.sub(r'<think>(.*?)</think>', r'\1', output, flags=re.DOTALL)
+    cleaned = re.sub(r'={30,}.*?={30,}', '', cleaned).strip()
+    parsed["content"] = cleaned
+    return parsed
+def format_message_for_display(parsed_output):
+    """Format parsed output into a readable message for the chatbot."""
+    msg_parts = []
+    # Add thinking/explanation text first if present
+    if parsed_output.get("thinking"):
+        msg_parts.append(parsed_output["thinking"])
+    if parsed_output["type"] == "code":
+        # Add separator if there was thinking text
+        if parsed_output.get("thinking"):
+            msg_parts.append("\n---\n")
+        msg_parts.append("### 💻 Executing Code\n")
+        msg_parts.append(f"```python\n{parsed_output['code']}\n```")
+    elif parsed_output["type"] == "observation":
+        # Add separator if there was thinking text
+        if parsed_output.get("thinking"):
+            msg_parts.append("\n---\n")
+        msg_parts.append("### 📊 Observation\n")
+        msg_parts.append(f"```\n{parsed_output['observation']}\n```")
+    elif parsed_output["type"] == "solution":
+        # Add separator if there was thinking text
+        if parsed_output.get("thinking"):
+            msg_parts.append("\n---\n")
+        msg_parts.append("### ✅ Solution\n")
+        msg_parts.append(parsed_output['content'])
+    else:
+        # For regular text, just add the content if thinking wasn't already set
+        if not parsed_output.get("thinking"):
+            msg_parts.append(parsed_output["content"])
+    return "\n\n".join(msg_parts)
+def process_agent_response(prompt, uploaded_file, chatbot_history):
+    """Process the agent response and update chatbot."""
+    global agent
+    if agent is None:
+        chatbot_history.append({
+            "role": "assistant",
+            "content": "⚠️ Please enter the passcode first to initialize the agent."
+        })
+        yield chatbot_history, None, None, None, None, "⚠️ Agent not initialized"
+        return
+    if not prompt.strip() and uploaded_file is None:
+        chatbot_history.append({
+            "role": "assistant",
+            "content": "⚠️ Please provide a prompt or upload a file."
+        })
+        yield chatbot_history, None, None, None, None, "⚠️ No input provided"
+        return
+    # Handle file upload
+    file_path = None
+    file_info = ""
+    if uploaded_file is not None:
+        try:
+            # Create data directory if it doesn't exist
+            data_dir = Path("./data")
+            data_dir.mkdir(exist_ok=True)
+            # Copy uploaded file to data directory
+            file_name = Path(uploaded_file.name).name
+            file_path = data_dir / file_name
+            shutil.copy(uploaded_file.name, file_path)
+            file_info = f"\n\n📎 **Uploaded file:** `{file_path}`\n"
+            # Augment prompt with file path
+            if prompt.strip():
+                prompt = f"{prompt}\n\nUploaded file path: {file_path}"
+            else:
+                prompt = f"I have uploaded a file at: {file_path}. Please analyze it."
+        except Exception as e:
+            error_msg = f"❌ Error handling file upload: {str(e)}"
+            chatbot_history.append({
+                "role": "assistant",
+                "content": error_msg
+            })
+            yield chatbot_history, None, None, None, None, error_msg
+            return
+    # Add user message to chat
+    user_message = prompt if not file_info else f"{prompt}{file_info}"
+    chatbot_history.append({"role": "user", "content": user_message})
+    yield chatbot_history, None, None, None, None, "🔄 Processing..."
+    try:
+        # Stream agent responses
+        step_count = 0
+        for step in agent.go_stream(prompt):
+            step_count += 1
+            output = step.get("output", "")
+            if output:
+                # Parse the output
+                parsed = parse_agent_output(output)
+                # Add thinking text as separate message if present
+                if parsed.get("thinking"):
+                    chatbot_history.append({
+                        "role": "assistant",
+                        "content": parsed["thinking"]
+                    })
+                # Add the block (code/observation/solution) as separate message if present
+                if parsed["type"] == "code" and parsed["code"]:
+                    chatbot_history.append({
+                        "role": "assistant",
+                        "content": f"### 💻 Executing Code\n\n```python\n{parsed['code']}\n```"
+                    })
+                elif parsed["type"] == "observation" and parsed["observation"]:
+                    chatbot_history.append({
+                        "role": "assistant",
+                        "content": f"### 📊 Observation\n\n```\n{parsed['observation']}\n```"
+                    })
+                elif parsed["type"] == "solution":
+                    chatbot_history.append({
+                        "role": "assistant",
+                        "content": f"### ✅ Solution\n\n{parsed['content']}"
+                    })
+                elif parsed["type"] == "text" and parsed["content"]:
+                    # Only add if we haven't already added it as thinking
+                    if not parsed.get("thinking"):
+                        chatbot_history.append({
+                            "role": "assistant",
+                            "content": parsed["content"]
+                        })
+                # Check for output files after each step
+                images, data_files = check_for_output_files()
+                # Create status message
+                status = f"🔄 Step {step_count}"
+                if parsed["type"] == "code":
+                    status += " - Executing code..."
+                elif parsed["type"] == "observation":
+                    status += " - Processing results..."
+                elif parsed["type"] == "solution":
+                    status += " - Finalizing solution..."
+                yield (
+                    chatbot_history,
+                    images if images else None,
+                    data_files if data_files else None,
+                    None,
+                    None,
+                    status
+                )
+        # Final check for files
+        final_images, final_data = check_for_output_files()
+        # Create download links message if files were generated
+        if final_images or final_data:
+            download_msg = "\n\n---\n\n### 📁 Generated Files Ready for Download\n\n"
+            if final_images:
+                download_msg += f"**🖼️ Images ({len(final_images)})** - Available in the **Images** tab →\n"
+                for img_path in final_images:
+                    img_name = Path(img_path).name
+                    download_msg += f"- `{img_name}`\n"
+                download_msg += "\n"
+            if final_data:
+                download_msg += f"**📄 Data Files ({len(final_data)})** - Available in the **Data** tab →\n"
+                for data_path in final_data:
+                    data_name = Path(data_path).name
+                    download_msg += f"- `{data_name}`\n"
+            download_msg += "\n*Click the download button on each file in the respective tabs above.*"
+            # Add download message as separate bubble
+            chatbot_history.append({
+                "role": "assistant",
+                "content": download_msg
+            })
+        status = "✅ Complete"
+        if final_images:
+            status += f" | {len(final_images)} image(s)"
+        if final_data:
+            status += f" | {len(final_data)} data file(s)"
+        yield chatbot_history, final_images if final_images else None, final_data if final_data else None, None, None, status
+    except Exception as e:
+        error_msg = f"❌ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```"
+        chatbot_history.append({
+            "role": "assistant",
+            "content": error_msg
+        })
+        yield chatbot_history, None, None, None, None, "❌ Error occurred"
+def validate_passcode(passcode):
+    """Validate the passcode and initialize the agent."""
+    global agent
+    if passcode == PASSCODE:
+        # Initialize agent
+        try:
+            agent = A1()
+            return (
+                gr.update(visible=False),  # Hide passcode section
+                gr.update(visible=True),   # Show main interface
+                "✅ Access granted! Agent initialized and ready."
+            )
+        except Exception as e:
+            error_trace = traceback.format_exc()
+            return (
+                gr.update(visible=True),
+                gr.update(visible=False),
+                f"❌ Error initializing agent:\n{str(e)}\n\n{error_trace}"
+            )
+    else:
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            "❌ Invalid passcode. Please try again."
+        )
+def clear_chat():
+    """Clear the chat history and output files."""
+    # Clean up output directory
+    output_dir = Path("./output")
+    if output_dir.exists():
+        shutil.rmtree(output_dir)
+    output_dir.mkdir(exist_ok=True)
+    # Clean up data directory
+    data_dir = Path("./data")
+    if data_dir.exists():
+        for file in data_dir.iterdir():
+            if file.is_file():
+                file.unlink()
+    return [], None, None, None, None, "🗑️ Chat cleared"
+# Create Gradio interface with custom theme
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="slate",
+    spacing_size="sm",
+    radius_size="md",
+).set(
+    button_primary_background_fill="*primary_500",
+    button_primary_background_fill_hover="*primary_600",
+    block_label_text_weight="600",
+    block_title_text_weight="600",
+)
+with gr.Blocks(title="HistoPath Agent", theme=custom_theme, css="""
+    .gradio-container {
+        max-width: 100% !important;
+    }
+    .main-header {
+        text-align: center;
+        padding: 1.5rem 0;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border-radius: 8px;
+        margin-bottom: 1.5rem;
+    }
+    .main-header h1 {
+        margin: 0;
+        font-size: 2.2rem;
+        font-weight: 700;
+    }
+    .main-header p {
+        margin: 0.5rem 0 0 0;
+        opacity: 0.95;
+        font-size: 1.1rem;
+    }
+    .file-upload-box .wrap {
+        min-width: 0 !important;
+    }
+    .file-upload-box .file-name {
+        word-break: break-word !important;
+        white-space: normal !important;
+        overflow-wrap: break-word !important;
+    }
+    .tab-nav {
+        margin-bottom: 0.5rem;
+    }
+    /* Better styling for code and observation blocks */
+    .message.bot pre {
+        background-color: #f6f8fa !important;
+        border: 1px solid #d0d7de !important;
+        border-radius: 6px !important;
+        padding: 12px !important;
+        margin: 8px 0 !important;
+    }
+    .message.bot h3 {
+        margin-top: 12px !important;
+        margin-bottom: 8px !important;
+        font-weight: 600 !important;
+    }
+    .message.bot hr {
+        border: none !important;
+        border-top: 2px solid #e1e4e8 !important;
+        margin: 16px 0 !important;
+    }
+""") as demo:
+    # Header
+    gr.HTML("""
+        <div class="main-header">
+            <h1>🔬 HistoPath Agent</h1>
+            <p>AI-Powered Histopathology Analysis Assistant</p>
+        </div>
+    """)
+    # Passcode section
+    with gr.Group(visible=True) as passcode_section:
+        gr.Markdown("### 🔐 Authentication Required")
+        with gr.Row():
+            passcode_input = gr.Textbox(
+                label="Passcode",
+                type="password",
+                placeholder="Enter your passcode...",
+                scale=3
+            )
+            passcode_btn = gr.Button("🔓 Unlock", variant="primary", scale=1, size="lg")
+        passcode_status = gr.Textbox(
+            label="Status",
+            interactive=False,
+            lines=2
+        )
+    # Main interface (hidden initially)
+    with gr.Group(visible=False) as main_interface:
+        with gr.Row(equal_height=True):
+            # Left column - Chat interface
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(
+                    label="💬 Conversation",
+                    height=550,
+                    type="messages",
+                    show_label=True,
+                    avatar_images=(None, "🤖"),
+                    render_markdown=True,
+                )
+                # Input area
+                with gr.Row():
+                    with gr.Column(scale=7):
+                        prompt_input = gr.Textbox(
+                            label="Your Query",
+                            placeholder="E.g., 'Caption the uploaded whole slide image' or 'Segment cells using instanseg model'",
+                            lines=2,
+                            max_lines=5,
+                            show_label=False,
+                        )
+                    with gr.Column(scale=3):
+                        file_upload = gr.File(
+                            label="📎 Upload File",
+                            file_types=[".svs", ".png", ".jpg", ".jpeg", ".tif", ".tiff", ".csv", ".txt", ".json", ".npy"],
+                            height=75,
+                            elem_classes="file-upload-box",
+                        )
+                with gr.Row():
+                    submit_btn = gr.Button("🚀 Submit", variant="primary", scale=3, size="lg")
+                    clear_btn = gr.Button("🗑️ Clear", scale=1, size="lg", variant="secondary")
+                status_text = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    value="Ready",
+                    show_label=False,
+                    container=False,
+                )
+            # Right column - Outputs
+            with gr.Column(scale=2):
+                with gr.Tabs():
+                    with gr.Tab("📥 Input"):
+                        with gr.Column():
+                            input_image_preview = gr.Image(
+                                label="Input Image",
+                                height=400,
+                                show_label=False,
+                                container=True,
+                            )
+                            input_file_preview = gr.File(
+                                label="Input File",
+                                interactive=False,
+                                height=100,
+                                show_label=False,
+                                container=True,
+                            )
+                            input_status = gr.Textbox(
+                                value="Upload a file to preview",
+                                show_label=False,
+                                interactive=False,
+                                container=False,
+                            )
+                    with gr.Tab("🖼️ Images"):
+                        output_gallery = gr.Gallery(
+                            label="Generated Visualizations",
+                            columns=1,
+                            height=600,
+                            object_fit="contain",
+                            show_label=False,
+                            show_download_button=True,
+                        )
+                    with gr.Tab("📄 Data"):
+                        data_files = gr.File(
+                            label="Generated Data Files",
+                            file_count="multiple",
+                            interactive=False,
+                            height=600,
+                            show_label=False,
+                        )
+    # Event handlers
+    passcode_btn.click(
+        fn=validate_passcode,
+        inputs=[passcode_input],
+        outputs=[passcode_section, main_interface, passcode_status]
+    )
+    # File upload preview
+    file_upload.change(
+        fn=preview_uploaded_file,
+        inputs=[file_upload],
+        outputs=[input_image_preview, input_file_preview, input_status]
+    )
+    submit_btn.click(
+        fn=process_agent_response,
+        inputs=[prompt_input, file_upload, chatbot],
+        outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
+    )
+    # Allow enter key to submit
+    prompt_input.submit(
+        fn=process_agent_response,
+        inputs=[prompt_input, file_upload, chatbot],
+        outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
+    )
+if __name__ == "__main__":
+    # Create necessary directories
+    Path("./data").mkdir(exist_ok=True)
+    Path("./output").mkdir(exist_ok=True)
+    print("=" * 60)
+    print("🔬 HistoPath Agent - Gradio Interface")
+    print("=" * 60)
+    print(f"Passcode: {PASSCODE}")
+    print("Starting server...")
+    print("=" * 60)
+    # Launch the app
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=None,  # Let Gradio auto-pick an available port
+        share=False,
+        show_error=True,
+    )

histopath/__init__.py ADDED Viewed

File without changes

histopath/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (171 Bytes). View file

histopath/__pycache__/env_desc.cpython-311.pyc ADDED Viewed

Binary file (858 Bytes). View file

histopath/__pycache__/llm.cpython-311.pyc ADDED Viewed

Binary file (8.72 kB). View file

histopath/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (34.5 kB). View file

histopath/agent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from histopath.agent.agent import A1

histopath/agent/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (246 Bytes). View file

histopath/agent/__pycache__/agent.cpython-311.pyc ADDED Viewed

Binary file (30.3 kB). View file

histopath/agent/agent.py ADDED Viewed

	@@ -0,0 +1,705 @@

+import os
+import re
+import glob
+import inspect
+import pandas as pd
+from pathlib import Path
+from dotenv import load_dotenv
+from collections.abc import Generator
+from typing import Any, Literal, TypedDict
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import END, START, StateGraph
+from histopath.env_desc import library_content_dict
+from histopath.llm import SourceType, get_llm
+from histopath.model.retriever import ToolRetriever
+from histopath.tool.support_tools import run_python_repl
+from histopath.tool.tool_registry import ToolRegistry
+from histopath.utils import (
+    pretty_print,
+    read_module2api,
+    run_bash_script,
+    run_with_timeout,
+    textify_api_dict,
+)
+if os.path.exists(".env"):
+    load_dotenv(".env", override=False)
+    print("Loaded environment variables from .env")
+class AgentState(TypedDict):
+    messages: list[BaseMessage]
+    next_step: str | None
+class A1:
+    def __init__(
+        self,
+        path="./data",
+        llm="claude-sonnet-4-20250514",
+        source: SourceType | None = None,
+        use_tool_retriever=True,
+        timeout_seconds=600,
+        base_url: str | None = None,
+        api_key: str = "EMPTY",
+    ):
+        """Initialize the HistoPath agent.
+        Args:
+            path: Path to the data
+            llm: LLM to use for the agent
+            source (str): Source provider: "OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", "HuggingFace", or "Custom"
+            use_tool_retriever: If True, use a tool retriever
+            timeout_seconds: Timeout for code execution in seconds
+            base_url: Base URL for custom model serving (e.g., "http://localhost:8000/v1")
+            api_key: API key for the custom LLM
+        """
+        self.path = path
+        if not os.path.exists(path):
+            os.makedirs(path)
+            print(f"Created directory: {path}")
+        self.path = os.path.join(path, "histopath_data")
+        module2api = read_module2api()
+        self.llm = get_llm(
+            llm, stop_sequences=["</execute>", "</solution>"], source=source, base_url=base_url, api_key=api_key
+        )
+        self.module2api = module2api
+        self.use_tool_retriever = use_tool_retriever
+        if self.use_tool_retriever:
+            self.tool_registry = ToolRegistry(module2api)
+            self.retriever = ToolRetriever()
+        # Add timeout parameter
+        self.timeout_seconds = timeout_seconds  # 10 minutes default timeout
+        self.configure()
+###########################
+# Agent Prompting Section #
+###########################
+    def _generate_system_prompt(
+        self,
+        tool_desc,
+        library_content_list,
+        self_critic=False,
+        is_retrieval=False,
+    ):
+        """Generate the system prompt based on the provided resources.
+        Args:
+            tool_desc: Dictionary of tool descriptions
+            library_content_list: List of libraries
+            self_critic: Whether to include self-critic instructions
+            is_retrieval: Whether this is for retrieval (True) or initial configuration (False)
+        Returns:
+            The generated system prompt
+        """
+        def format_item_with_description(name, description):
+            """Format an item with its description in a readable way."""
+            # Handle None or empty descriptions
+            if not description:
+                description = f"Library or Tooling Item: {name}"
+            # Check if the item is already formatted (contains a colon)
+            if isinstance(name, str) and ": " in name:
+                return name
+            # Wrap long descriptions to make them more readable
+            max_line_length = 80
+            if len(description) > max_line_length:
+                # Simple wrapping for long descriptions
+                wrapped_desc = []
+                words = description.split()
+                current_line = ""
+                for word in words:
+                    if len(current_line) + len(word) + 1 <= max_line_length:
+                        if current_line:
+                            current_line += " " + word
+                        else:
+                            current_line = word
+                    else:
+                        wrapped_desc.append(current_line)
+                        current_line = word
+                if current_line:
+                    wrapped_desc.append(current_line)
+                # Join with newlines and proper indentation
+                formatted_desc = f"{name}:\n  " + "\n  ".join(wrapped_desc)
+                return formatted_desc
+            else:
+                return f"{name}: {description}"
+        library_content_list = []
+        for lib in library_content_list:
+            if isinstance(lib, dict):
+                name = lib.get("name", "")
+                library_content_list.append(lib)
+            else:
+                library_content_list.append(lib)
+        # Format the default library content
+        if isinstance(library_content_list, list) and all(
+            isinstance(item, str) for item in library_content_list
+        ):
+            if (
+                len(library_content_list) > 0
+                and isinstance(library_content_list[0], str)
+                and "," not in library_content_list[0]
+            ):
+                # Simple list of strings
+                libraries_formatted = []
+                for lib in library_content_list:
+                    description = self.library_content_dict.get(lib, f"Software library: {lib}")
+                    libraries_formatted.append(format_item_with_description(lib, description))
+            else:
+                # Already formatted string
+                libraries_formatted = library_content_list
+        else:
+            # List with descriptions
+            libraries_formatted = []
+            for lib in library_content_list:
+                if isinstance(lib, dict):
+                    name = lib.get("name", "")
+                    description = self.library_content_dict.get(name, f"Software library: {name}")
+                    libraries_formatted.append(format_item_with_description(name, description))
+                else:
+                    description = self.library_content_dict.get(lib, f"Software library: {lib}")
+                    libraries_formatted.append(format_item_with_description(lib, description))
+        # Base prompt
+        prompt_modifier = """
+You are a helpful histopathology researcher assigned with the task of problem-solving.
+To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions and softwares to assist you throughout the process.
+Given a task, make a plan first. The plan should be a numbered list of steps that you will take to solve the task. Be specific and detailed.
+Format your plan as a checklist with empty checkboxes like this:
+1. [ ] First step
+2. [ ] Second step
+3. [ ] Third step
+Follow the plan step by step. After completing each step, update the checklist by replacing the empty checkbox with a checkmark:
+1. [✓] First step (completed)
+2. [ ] Second step
+3. [ ] Third step
+If a step fails or needs modification, mark it with an X and explain why:
+1. [✓] First step (completed)
+2. [✗] Second step (failed because...)
+3. [ ] Modified second step
+4. [ ] Third step
+Always show the updated plan after each step so the user can track progress.
+At each turn, you should first provide your thinking and reasoning given the conversation history.
+After that, you have two options:
+1) Interact with a programming environment and receive the corresponding output within <observe></observe>. Your code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. IMPORTANT: You must end the code block with </execute> tag.
+   - For Python code (default): <execute> print("Hello World!") </execute>
+   - For Bash scripts and commands: <execute> #!BASH\necho "Hello from Bash"\nls -la </execute>
+   - For CLI softwares, use Bash scripts.
+2) When you think it is ready, directly provide a solution that adheres to the required format for the given task to the user. Your solution should be enclosed using "<solution>" tag, for example: The answer is <solution> A </solution>. IMPORTANT: You must end the solution block with </solution> tag.
+You have many chances to interact with the environment to receive the observation. So you can decompose your code into multiple steps.
+Don't overcomplicate the code. Keep it simple and easy to understand.
+When writing the code, please print out the steps and results in a clear and concise manner, like a research log.
+When calling the existing python functions in the function dictionary, YOU MUST SAVE THE OUTPUT and PRINT OUT the result.
+For example, result = understand_scRNA(XXX) print(result)
+Otherwise the system will not be able to know what has been done.
+For Bash scripts and commands, use the #!BASH marker at the beginning of your code block. This allows for both simple commands and multi-line scripts with variables, loops, conditionals, loops, and other Bash features.
+In each response, you must include EITHER <execute> or <solution> tag. Not both at the same time. Do not respond with messages without any tags. No empty messages.
+If you feel that a task is not at all histopathology-related or related at all to any surrounding concepts within pathology, you should not execute your plan at all.
+If you have no knowledge of a provided library that you feel is highly useful to a given task (such as the imperatively useful LazySlide package), please do a thorough exploration of the library's capabilities prior to experimentation
+"""
+        # Add self-critic instructions if needed
+        if self_critic:
+            prompt_modifier += """
+You may or may not receive feedbacks from human. If so, address the feedbacks by following the same procedure of multiple rounds of thinking, execution, and then coming up with a new solution.
+"""
+        # Add environment resources
+        prompt_modifier += """
+Environment Resources:
+- Function Dictionary:
+{function_intro}
+---
+{tool_desc}
+---
+{import_instruction}
+- Software Library:
+{library_intro}
+Each library is listed with its description to help you understand its functionality.
+----
+{library_content_formatted}
+----
+- Note on using Bash scripts:
+  - Bash scripts and commands: Use the #!BASH marker in your execute block for both simple commands and complex shell scripts with variables, loops, conditionals, etc.
+        """
+        # Set appropriate text based on whether this is initial configuration or after retrieval
+        if is_retrieval:
+            function_intro = "Based on your query, I've identified the following most relevant functions that you can use in your code:"
+            library_intro = (
+                "Based on your query, I've identified the following most relevant libraries that you can use:"
+            )
+            import_instruction = "IMPORTANT: When using any function, you MUST first import it from its module. For example:\nfrom [module_name] import [function_name]"
+        else:
+            function_intro = "In your code, you will need to import the function location using the following dictionary of functions:"
+            library_intro = "The environment supports a list of libraries that can be directly used. Do not forget the import statement:"
+            import_instruction = ""
+        # Format the content consistently for both initial and retrieval cases
+        library_content_formatted = "\n".join(libraries_formatted)
+        # Format the prompt with the appropriate values
+        format_dict = {
+            "function_intro": function_intro,
+            "tool_desc": textify_api_dict(tool_desc) if isinstance(tool_desc, dict) else tool_desc,
+            "import_instruction": import_instruction,
+            "library_intro": library_intro,
+            "library_content_formatted": library_content_formatted,
+        }
+        formatted_prompt = prompt_modifier.format(**format_dict)
+        return formatted_prompt
+    def configure(self, self_critic=False, test_time_scale_round=0):
+        """Configure the agent with the initial system prompt and workflow.
+        Args:
+            self_critic: Whether to enable self-critic mode
+            test_time_scale_round: Number of rounds for test time scaling
+        """
+        # Store self_critic for later use
+        self.self_critic = self_critic
+        # Store library_content_dict directly without library_content
+        self.library_content_dict = library_content_dict
+        # Prepare tool descriptions
+        tool_desc = {i: [x for x in j if x["name"] != "run_python_repl"] for i, j in self.module2api.items()}
+        # Prepare library content list
+        library_content_list = list(self.library_content_dict.keys())
+        self.system_prompt = self._generate_system_prompt(
+            tool_desc=tool_desc,
+            library_content_list=library_content_list,
+            self_critic=self_critic,
+            is_retrieval=False
+        )
+        # Define the nodes
+        def generate(state: AgentState) -> AgentState:
+            messages = [SystemMessage(content=self.system_prompt)] + state["messages"]
+            response = self.llm.invoke(messages)
+            # Parse the response
+            msg = str(response.content)
+            # Check for incomplete tags and fix them
+            if "<execute>" in msg and "</execute>" not in msg:
+                msg += "</execute>"
+            if "<solution>" in msg and "</solution>" not in msg:
+                msg += "</solution>"
+            if "<think>" in msg and "</think>" not in msg:
+                msg += "</think>"
+            think_match = re.search(r"<think>(.*?)</think>", msg, re.DOTALL)
+            execute_match = re.search(r"<execute>(.*?)</execute>", msg, re.DOTALL)
+            answer_match = re.search(r"<solution>(.*?)</solution>", msg, re.DOTALL)
+            # Add the message to the state before checking for errors
+            state["messages"].append(AIMessage(content=msg.strip()))
+            if answer_match:
+                state["next_step"] = "end"
+            elif execute_match:
+                state["next_step"] = "execute"
+            elif think_match:
+                state["next_step"] = "generate"
+            else:
+                print("parsing error...")
+                # Check if we already added an error message to avoid infinite loops
+                error_count = sum(
+                    1 for m in state["messages"] if isinstance(m, AIMessage) and "There are no tags" in m.content
+                )
+                if error_count >= 2:
+                    # If we've already tried to correct the model twice, just end the conversation
+                    print("Detected repeated parsing errors, ending conversation")
+                    state["next_step"] = "end"
+                    # Add a final message explaining the termination
+                    state["messages"].append(
+                        AIMessage(
+                            content="Execution terminated due to repeated parsing errors. Please check your input and try again."
+                        )
+                    )
+                else:
+                    # Try to correct it
+                    state["messages"].append(
+                        HumanMessage(
+                            content="Each response must include thinking process followed by either <execute> or <solution> tag. But there are no tags in the current response. Please follow the instruction, fix and regenerate the response again."
+                        )
+                    )
+                    state["next_step"] = "generate"
+            return state
+        def execute(state: AgentState) -> AgentState:
+            last_message = state["messages"][-1].content
+            # Only add the closing tag if it's not already there
+            if "<execute>" in last_message and "</execute>" not in last_message:
+                last_message += "</execute>"
+            execute_match = re.search(r"<execute>(.*?)</execute>", last_message, re.DOTALL)
+            if execute_match:
+                code = execute_match.group(1)
+                # Set timeout duration (10 minutes = 600 seconds)
+                timeout = self.timeout_seconds
+                # Check if the code is a Bash script or CLI command
+                if (
+                    code.strip().startswith("#!BASH")
+                    or code.strip().startswith("# Bash script")
+                    or code.strip().startswith("#!CLI")
+                ):
+                    # Handle both Bash scripts and CLI commands with the same function
+                    if code.strip().startswith("#!CLI"):
+                        # For CLI commands, extract the command and run it as a simple bash script
+                        cli_command = re.sub(r"^#!CLI", "", code, 1).strip()  # noqa: B034
+                        # Remove any newlines to ensure it's a single command
+                        cli_command = cli_command.replace("\n", " ")
+                        result = run_with_timeout(run_bash_script, [cli_command], timeout=timeout)
+                    else:
+                        # For Bash scripts, remove the marker and run as a bash script
+                        bash_script = re.sub(r"^#!BASH|^# Bash script", "", code, 1).strip()  # noqa: B034
+                        result = run_with_timeout(run_bash_script, [bash_script], timeout=timeout)
+                # Otherwise, run as Python code
+                else:
+                    result = run_with_timeout(run_python_repl, [code], timeout=timeout)
+                if len(result) > 10000:
+                    result = (
+                        "The output is too long to be added to context. Here are the first 10K characters...\n"
+                        + result[:10000]
+                    )
+                observation = f"\n<observation>{result}</observation>"
+                state["messages"].append(AIMessage(content=observation.strip()))
+            return state
+        def routing_function(
+            state: AgentState,
+        ) -> Literal["execute", "generate", "end"]:
+            next_step = state.get("next_step")
+            if next_step == "execute":
+                return "execute"
+            elif next_step == "generate":
+                return "generate"
+            elif next_step == "end":
+                return "end"
+            else:
+                raise ValueError(f"Unexpected next_step: {next_step}")
+        def routing_function_self_critic(
+            state: AgentState,
+        ) -> Literal["generate", "end"]:
+            next_step = state.get("next_step")
+            if next_step == "generate":
+                return "generate"
+            elif next_step == "end":
+                return "end"
+            else:
+                raise ValueError(f"Unexpected next_step: {next_step}")
+        def execute_self_critic(state: AgentState) -> AgentState:
+            if self.critic_count < test_time_scale_round:
+                # Generate feedback based on message history
+                messages = state["messages"]
+                feedback_prompt = f"""
+                Here is a reminder of what is the user requested: {self.user_task}
+                Examine the previous executions, reaosning, and solutions.
+                Critic harshly on what could be improved?
+                Be specific and constructive.
+                Think hard what are missing to solve the task.
+                No question asked, just feedbacks.
+                """
+                feedback = self.llm.invoke(messages + [HumanMessage(content=feedback_prompt)])
+                # Add feedback as a new message
+                state["messages"].append(
+                    HumanMessage(
+                        content=f"Wait... this is not enough to solve the task. Here are some feedbacks for improvement:\n{feedback.content}"
+                    )
+                )
+                self.critic_count += 1
+                state["next_step"] = "generate"
+            else:
+                state["next_step"] = "end"
+            return state
+        # Create the workflow
+        workflow = StateGraph(AgentState)
+        # Add nodes
+        workflow.add_node("generate", generate)
+        workflow.add_node("execute", execute)
+        if self_critic:
+            workflow.add_node("self_critic", execute_self_critic)
+            # Add conditional edges
+            workflow.add_conditional_edges(
+                "generate",
+                routing_function,
+                path_map={
+                    "execute": "execute",
+                    "generate": "generate",
+                    "end": "self_critic",
+                },
+            )
+            workflow.add_conditional_edges(
+                "self_critic",
+                routing_function_self_critic,
+                path_map={"generate": "generate", "end": END},
+            )
+        else:
+            # Add conditional edges
+            workflow.add_conditional_edges(
+                "generate",
+                routing_function,
+                path_map={"execute": "execute", "generate": "generate", "end": END},
+            )
+        workflow.add_edge("execute", "generate")
+        workflow.add_edge(START, "generate")
+        # Compile the workflow
+        self.app = workflow.compile()
+        self.checkpointer = MemorySaver()
+        self.app.checkpointer = self.checkpointer
+        # display(Image(self.app.get_graph().draw_mermaid_png()))
+    def _prepare_resources_for_retrieval(self, prompt):
+        """Prepare resources for retrieval and return selected resource names.
+        Args:
+            prompt: The user's query
+        Returns:
+            dict: Dictionary containing selected resource names for tools, data_lake, and libraries
+        """
+        if not self.use_tool_retriever:
+            return None
+        # Gather all available resources
+        # 1. Tools from the registry
+        all_tools = self.tool_registry.tools if hasattr(self, "tool_registry") else []
+        # 2. Libraries with descriptions - use library_content_dict directly
+        library_descriptions = []
+        for lib_name, lib_desc in self.library_content_dict.items():
+            library_descriptions.append({"name": lib_name, "description": lib_desc})
+        # Use retrieval to get relevant resources
+        resources = {
+            "tools": all_tools,
+            "libraries": library_descriptions,
+        }
+        # Use prompt-based retrieval with the agent's LLM
+        selected_resources = self.retriever.prompt_based_retrieval(prompt, resources, llm=self.llm)
+        print("Using prompt-based retrieval with the agent's LLM")
+        # Extract the names from the selected resources for the system prompt
+        selected_resources_names = {
+            "tools": selected_resources["tools"],
+            "libraries": [lib["name"] if isinstance(lib, dict) else lib for lib in selected_resources["libraries"]],
+        }
+        return selected_resources_names
+    def go(self, prompt):
+        """Execute the agent with the given prompt.
+        Args:
+            prompt: The user's query
+        """
+        self.critic_count = 0
+        self.user_task = prompt
+        if self.use_tool_retriever:
+            selected_resources_names = self._prepare_resources_for_retrieval(prompt)
+            self.update_system_prompt_with_selected_resources(selected_resources_names)
+        inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None}
+        config = {"recursion_limit": 500, "configurable": {"thread_id": 42}}
+        self.log = []
+        for s in self.app.stream(inputs, stream_mode="values", config=config):
+            message = s["messages"][-1]
+            out = pretty_print(message)
+            self.log.append(out)
+        return self.log, message.content
+    def go_stream(self, prompt, image_path=None) -> Generator[dict, None, None]:
+        """Execute the agent with the given prompt and return a generator that yields each step.
+        This function returns a generator that yields each step of the agent's execution,
+        allowing for real-time monitoring of the agent's progress.
+        Args:
+            prompt: The user's query
+        Yields:
+            dict: Each step of the agent's execution containing the current message and state
+        """
+        self.critic_count = 0
+        self.user_task = prompt
+        if image_path:
+            self.user_task += """
+\nUser uploaded this file:\n
+{image_path}
+Please use it if needed.
+"""
+        if self.use_tool_retriever:
+            selected_resources_names = self._prepare_resources_for_retrieval(prompt)
+            self.update_system_prompt_with_selected_resources(selected_resources_names)
+        inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None}
+        config = {"recursion_limit": 500, "configurable": {"thread_id": 42}}
+        self.log = []
+        for s in self.app.stream(inputs, stream_mode="values", config=config):
+            message = s["messages"][-1]
+            out = pretty_print(message)
+            self.log.append(out)
+            # Yield the current step
+            yield {"output": out}
+    def update_system_prompt_with_selected_resources(self, selected_resources):
+        """Update the system prompt with the selected resources."""
+        # Extract tool descriptions for the selected tools
+        tool_desc = {}
+        for tool in selected_resources["tools"]:
+            # Get the module name from the tool
+            if isinstance(tool, dict):
+                module_name = tool.get("module", None)
+                # If module is not specified, try to find it in the module2api
+                if not module_name and hasattr(self, "module2api"):
+                    for mod, apis in self.module2api.items():
+                        for api in apis:
+                            if api.get("name") == tool.get("name"):
+                                module_name = mod
+                                # Update the tool with the module information
+                                tool["module"] = module_name
+                                break
+                        if module_name:
+                            break
+                # If still not found, use a default
+                if not module_name:
+                    module_name = "histopath.tool.scRNA_tools"  # Default to scRNA_tools as a fallback
+                    tool["module"] = module_name
+            else:
+                module_name = getattr(tool, "module_name", None)
+                # If module is not specified, try to find it in the module2api
+                if not module_name and hasattr(self, "module2api"):
+                    tool_name = getattr(tool, "name", str(tool))
+                    for mod, apis in self.module2api.items():
+                        for api in apis:
+                            if api.get("name") == tool_name:
+                                module_name = mod
+                                # Set the module_name attribute
+                                tool.module_name = module_name
+                                break
+                        if module_name:
+                            break
+                # If still not found, use a default
+                if not module_name:
+                    module_name = "histopath.tool.scRNA_tools"  # Default to scRNA_tools as a fallback
+                    tool.module_name = module_name
+            if module_name not in tool_desc:
+                tool_desc[module_name] = []
+            # Add the tool to the appropriate module
+            if isinstance(tool, dict):
+                # Ensure the module is included in the tool description
+                if "module" not in tool:
+                    tool["module"] = module_name
+                tool_desc[module_name].append(tool)
+            else:
+                # Convert tool object to dictionary
+                tool_dict = {
+                    "name": getattr(tool, "name", str(tool)),
+                    "description": getattr(tool, "description", ""),
+                    "parameters": getattr(tool, "parameters", {}),
+                    "module": module_name,  # Explicitly include the module
+                }
+                tool_desc[module_name].append(tool_dict)
+        self.system_prompt = self._generate_system_prompt(
+            tool_desc=tool_desc,
+            library_content_list=selected_resources["libraries"],
+            self_critic=getattr(self, "self_critic", False),
+            is_retrieval=True,
+        )
+        # Print the raw system prompt for debugging
+        # print("\n" + "="*20 + " RAW SYSTEM PROMPT FROM AGENT " + "="*20)
+        # print(self.system_prompt)
+        # print("="*70 + "\n")
+    def result_formatting(self, output_class, task_intention):
+        self.format_check_prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    (
+                        "You are evaluateGPT, tasked with extract and parse the task output based on the history of an agent. "
+                        "Review the entire history of messages provided. "
+                        "Here is the task output requirement: \n"
+                        f"'{task_intention.replace('{', '{{').replace('}', '}}')}'.\n"
+                    ),
+                ),
+                ("placeholder", "{messages}"),
+            ]
+        )
+        checker_llm = self.format_check_prompt | self.llm.with_structured_output(output_class)
+        result = checker_llm.invoke({"messages": [("user", str(self.log))]}).dict()
+        return result

histopath/config.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""
+HistoPath Configuration Management
+Simple configuration class for centralizing common settings.
+Maintains full backward compatibility with existing code.
+"""
+import os
+from dataclasses import dataclass
+@dataclass
+class HistoPathConfig:
+    """Central configuration for the HistoPath agent.
+    All settings are optional and have sensible defaults.
+    API keys are still read from environment variables to maintain
+    compatibility with existing .env file structure.
+    Usage:
+        # Create config with defaults
+        config = HistoPathConfig()
+        # Override specific settings
+        config = HistoPathConfig(llm="gpt-4", timeout_seconds=1200)
+        # Modify after creation
+        config.path = "./custom_data"
+    """
+    # Data and execution settings
+    path: str = "./data"
+    timeout_seconds: int = 600
+    # LLM settings (API keys still from environment)
+    llm: str = "claude-sonnet-4-20250514"
+    temperature: float = 0.7
+    # Tool settings
+    use_tool_retriever: bool = True
+    # Data licensing settings
+    commercial_mode: bool = False  # If True, excludes non-commercial datasets
+    # Custom model settings (for custom LLM serving)
+    base_url: str | None = None
+    api_key: str | None = None  # Only for custom models, not provider API keys
+    # LLM source (auto-detected if None)
+    source: str | None = None
+    def __post_init__(self):
+        """Load any environment variable overrides if they exist."""
+        # Check for environment variable overrides (optional)
+        # Support both old and new names for backwards compatibility
+        if os.getenv("HISTOPATH_PATH") or os.getenv("HISTOPATH_DATA_PATH"):
+            self.path = os.getenv("HISTOPATH_PATH") or os.getenv("HISTOPATH_DATA_PATH")
+        if os.getenv("HISTOPATH_TIMEOUT_SECONDS"):
+            self.timeout_seconds = int(os.getenv("HISTOPATH_TIMEOUT_SECONDS"))
+        if os.getenv("HISTOPATH_LLM") or os.getenv("HISTOPATH_LLM_MODEL"):
+            self.llm = os.getenv("HISTOPATH_LLM") or os.getenv("HISTOPATH_LLM_MODEL")
+        if os.getenv("HISTOPATH_USE_TOOL_RETRIEVER"):
+            self.use_tool_retriever = os.getenv("HISTOPATH_USE_TOOL_RETRIEVER").lower() == "true"
+        if os.getenv("HISTOPATH_COMMERCIAL_MODE"):
+            self.commercial_mode = os.getenv("HISTOPATH_COMMERCIAL_MODE").lower() == "true"
+        if os.getenv("HISTOPATH_TEMPERATURE"):
+            self.temperature = float(os.getenv("HISTOPATH_TEMPERATURE"))
+        if os.getenv("HISTOPATH_CUSTOM_BASE_URL"):
+            self.base_url = os.getenv("HISTOPATH_CUSTOM_BASE_URL")
+        if os.getenv("HISTOPATH_CUSTOM_API_KEY"):
+            self.api_key = os.getenv("HISTOPATH_CUSTOM_API_KEY")
+        if os.getenv("HISTOPATH_SOURCE"):
+            self.source = os.getenv("HISTOPATH_SOURCE")
+    def to_dict(self) -> dict:
+        """Convert config to dictionary for easy access."""
+        return {
+            "path": self.path,
+            "timeout_seconds": self.timeout_seconds,
+            "llm": self.llm,
+            "temperature": self.temperature,
+            "use_tool_retriever": self.use_tool_retriever,
+            "commercial_mode": self.commercial_mode,
+            "base_url": self.base_url,
+            "api_key": self.api_key,
+            "source": self.source,
+        }
+# Global default config instance (optional, for convenience)
+default_config = HistoPathConfig()

histopath/llm.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import os
+import openai
+from typing import TYPE_CHECKING, Literal, Optional
+from langchain_core.language_models.chat_models import BaseChatModel
+if TYPE_CHECKING: from histopath.config import HistoPathConfig
+SourceType = Literal["OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", "Groq",  "HuggingFace", "Custom"]
+ALLOWED_SOURCES: set[str] = set(SourceType.__args__)
+def get_llm(
+    model: str | None = None,
+    temperature: float | None = None,
+    stop_sequences: list[str] | None = None,
+    source: SourceType | None = None,
+    base_url: str | None = None,
+    api_key: str | None = None,
+    config: Optional["HistoPathConfig"] = None,
+) -> BaseChatModel:
+    """
+    Get a language model instance based on the specified model name and source.
+    This function supports models from OpenAI, Azure OpenAI, Anthropic, Ollama, Gemini, Bedrock, and custom model serving.
+    Args:
+        model (str): The model name to use
+        temperature (float): Temperature setting for generation
+        stop_sequences (list): Sequences that will stop generation
+        source (str): Source provider: "OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", or "Custom"
+                      If None, will attempt to auto-detect from model name
+        base_url (str): The base URL for custom model serving (e.g., "http://localhost:8000/v1"), default is None
+        api_key (str): The API key for the custom llm
+        config (BiomniConfig): Optional configuration object. If provided, unspecified parameters will use config values
+    """
+    # Use config values for any unspecified parameters
+    if config is not None:
+        if model is None:
+            model = config.llm_model
+        if temperature is None:
+            temperature = config.temperature
+        if source is None:
+            source = config.source
+        if base_url is None:
+            base_url = config.base_url
+        if api_key is None:
+            api_key = config.api_key or "EMPTY"
+    # Use defaults if still not specified
+    if model is None:
+        model = "claude-3-5-sonnet-20241022"
+    if temperature is None:
+        temperature = 0.7
+    if api_key is None:
+        api_key = "EMPTY"
+    # Auto-detect source from model name if not specified
+    if source is None:
+        env_source = os.getenv("LLM_SOURCE")
+        if env_source in ALLOWED_SOURCES:
+            source = env_source
+        else:
+            if model[:7] == "claude-":
+                source = "Anthropic"
+            elif model[:7] == "gpt-oss":
+                source = "Ollama"
+            elif model[:4] == "gpt-":
+                source = "OpenAI"
+            elif model.startswith("azure-"):
+                source = "AzureOpenAI"
+            elif model[:7] == "gemini-":
+                source = "Gemini"
+            elif "groq" in model.lower():
+                source = "Groq"
+            elif base_url is not None:
+                source = "Custom"
+            elif "/" in model or any(
+                name in model.lower()
+                for name in [
+                    "llama",
+                    "mistral",
+                    "qwen",
+                    "gemma",
+                    "phi",
+                    "dolphin",
+                    "orca",
+                    "vicuna",
+                    "deepseek",
+                ]
+            ):
+                source = "Ollama"
+            elif model.startswith(
+                ("anthropic.claude-", "amazon.titan-", "meta.llama-", "mistral.", "cohere.", "ai21.", "us.")
+            ):
+                source = "Bedrock"
+            else:
+                raise ValueError("Unable to determine model source. Please specify 'source' parameter.")
+    # Create appropriate model based on source
+    if source == "OpenAI":
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-openai package is required for OpenAI models. Install with: pip install langchain-openai"
+            )
+        return ChatOpenAI(model=model, temperature=temperature, stop_sequences=stop_sequences)
+    elif source == "AzureOpenAI":
+        try:
+            from langchain_openai import AzureChatOpenAI
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-openai package is required for Azure OpenAI models. Install with: pip install langchain-openai"
+            )
+        API_VERSION = "2024-12-01-preview"
+        model = model.replace("azure-", "")
+        return AzureChatOpenAI(
+            openai_api_key=os.getenv("OPENAI_API_KEY"),
+            azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
+            azure_deployment=model,
+            openai_api_version=API_VERSION,
+            temperature=temperature,
+        )
+    elif source == "Anthropic":
+        try:
+            from langchain_anthropic import ChatAnthropic
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-anthropic package is required for Anthropic models. Install with: pip install langchain-anthropic"
+            )
+        return ChatAnthropic(
+            model=model,
+            temperature=temperature,
+            max_tokens=8192,
+            stop_sequences=stop_sequences,
+        )
+    elif source == "Gemini":
+        # If you want to use ChatGoogleGenerativeAI, you need to pass the stop sequences upon invoking the model.
+        # return ChatGoogleGenerativeAI(
+        #     model=model,
+        #     temperature=temperature,
+        #     google_api_key=api_key,
+        # )
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-openai package is required for Gemini models. Install with: pip install langchain-openai"
+            )
+        return ChatOpenAI(
+            model=model,
+            temperature=temperature,
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+            stop_sequences=stop_sequences,
+        )
+    elif source == "Groq":
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-openai package is required for Groq models. Install with: pip install langchain-openai"
+            )
+        return ChatOpenAI(
+            model=model,
+            temperature=temperature,
+            api_key=os.getenv("GROQ_API_KEY"),
+            base_url="https://api.groq.com/openai/v1",
+            stop_sequences=stop_sequences,
+        )
+    elif source == "Ollama":
+        try:
+            from langchain_ollama import ChatOllama
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-ollama package is required for Ollama models. Install with: pip install langchain-ollama"
+            )
+        return ChatOllama(
+            model=model,
+            temperature=temperature,
+        )
+    elif source == "Bedrock":
+        try:
+            from langchain_aws import ChatBedrock
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-aws package is required for Bedrock models. Install with: pip install langchain-aws"
+            )
+        return ChatBedrock(
+            model=model,
+            temperature=temperature,
+            stop_sequences=stop_sequences,
+            region_name=os.getenv("AWS_REGION", "us-east-1"),
+        )
+    elif source == "HuggingFace":
+        try:
+            from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+        except ImportError:
+            raise ImportError(
+                "langchain-huggingface package is required for HuggingFace models. Install with: pip install langchain-huggingface"
+            )
+        llm = HuggingFaceEndpoint(
+            repo_id="openai/gpt-oss-120b",
+            temperature=temperature,
+            stop_sequences=stop_sequences,
+            huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_KEY")
+        )
+        return ChatHuggingFace(llm=llm)
+    elif source == "Custom":
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(  # noqa: B904
+                "langchain-openai package is required for custom models. Install with: pip install langchain-openai"
+            )
+        # Custom LLM serving such as SGLang. Must expose an openai compatible API.
+        assert base_url is not None, "base_url must be provided for customly served LLMs"
+        llm = ChatOpenAI(
+            model=model,
+            temperature=temperature,
+            max_tokens=8192,
+            stop_sequences=stop_sequences,
+            base_url=base_url,
+            api_key=api_key,
+        )
+        return llm
+    else:
+        raise ValueError(
+            f"Invalid source: {source}. Valid options are 'OpenAI', 'AzureOpenAI', 'Anthropic', 'Gemini', 'Groq', 'Bedrock', or 'Ollama'"
+        )

histopath/model/__init__.py ADDED Viewed

File without changes

histopath/model/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (177 Bytes). View file

histopath/model/__pycache__/retriever.cpython-311.pyc ADDED Viewed

Binary file (8.28 kB). View file

histopath/model/retriever.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import re
+import contextlib
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+class ToolRetriever:
+    """Retrieve tools from the tool registry."""
+    def __init__(self):
+        pass
+    def prompt_based_retrieval(self, query: str, resources: dict, llm=None) -> dict:
+        """Use a prompt-based approach to retrieve the most relevant resources for a query.
+        Args:
+            query: The user's query
+            resources: A dictionary with keys 'tools', 'data_lake', and 'libraries',
+                      each containing a list of available resources
+            llm: Optional LLM instance to use for retrieval (if None, will create a new one)
+        Returns:
+            A dictionary with the same keys, but containing only the most relevant resources
+        """
+        # Create a prompt for the LLM to select relevant resources
+        prompt = f"""
+You are an expert histopathology research assistant. Your task is to select the relevant resources to help answer a user's query.
+USER QUERY: {query}
+Below are the available resources. For each category, select items that are directly or indirectly relevant to answering the query.
+Be generous in your selection - include resources that might be useful for the task, even if they're not explicitly mentioned in the query.
+It's better to include slightly more resources than to miss potentially useful ones.
+AVAILABLE SOFTWARE LIBRARIES:
+{self._format_resources_for_prompt(resources.get("libraries", []))}
+AVAILABLE TOOLS:
+{self._format_resources_for_prompt(resources.get("tools", []))}
+For each category, respond with ONLY the indices of the relevant items in the following format:
+TOOLS: [list of indices]
+For example:
+TOOLS: [0, 3, 5, 7, 9]
+If a category has no relevant items, use an empty list, e.g., TOOLS: []
+IMPORTANT GUIDELINES:
+1. Be generous but not excessive - aim to include all potentially relevant resources
+2. ALWAYS prioritize database tools for general queries - include as many database tools as possible
+3. Include all literature search tools
+4. For libraries, include those that provide functions needed for analysis
+5. Don't exclude resources just because they're not explicitly mentioned in the query
+6. When in doubt about a tool, include it rather than exclude it
+"""
+        # Use the provided LLM or create a new one
+        if llm is None:
+            llm = ChatOpenAI(model="gpt-4o")
+        # Invoke the LLM
+        if hasattr(llm, "invoke"):
+            # For LangChain-style LLMs
+            response = llm.invoke([HumanMessage(content=prompt)])
+            response_content = response.content
+        else:
+            # For other LLM interfaces
+            response_content = str(llm(prompt))
+        # Parse the response to extract the selected indices
+        selected_indices = self._parse_llm_response(response_content)
+        # Get the selected resources
+        selected_resources = {
+            "tools": [
+                resources["tools"][i]
+                for i in selected_indices.get("tools", [])
+                if i < len(resources.get("tools", []))
+            ],
+            "libraries": [
+                resources["libraries"][i]
+                for i in selected_indices.get("libraries", [])
+                if i < len(resources.get("libraries", []))
+            ]
+        }
+        return selected_resources
+    def _format_resources_for_prompt(self, resources: list) -> str:
+        """Format resources for inclusion in the prompt."""
+        formatted = []
+        for i, resource in enumerate(resources):
+            if isinstance(resource, dict):
+                # Handle dictionary format (from tool registry or data lake/libraries with descriptions)
+                name = resource.get("name", f"Resource {i}")
+                description = resource.get("description", "")
+                formatted.append(f"{i}. {name}: {description}")
+            elif isinstance(resource, str):
+                # Handle string format (simple strings)
+                formatted.append(f"{i}. {resource}")
+            else:
+                # Try to extract name and description from tool objects
+                name = getattr(resource, "name", str(resource))
+                desc = getattr(resource, "description", "")
+                formatted.append(f"{i}. {name}: {desc}")
+        return "\n".join(formatted) if formatted else "None available"
+    def _parse_llm_response(self, response: str) -> dict:
+        """Parse the LLM response to extract the selected indices."""
+        selected_indices = {"tools": [], "libraries": []}
+        # Extract indices for each category
+        tools_match = re.search(r"TOOLS:\s*\[(.*?)\]", response, re.IGNORECASE)
+        if tools_match and tools_match.group(1).strip():
+            with contextlib.suppress(ValueError):
+                selected_indices["tools"] = [int(idx.strip()) for idx in tools_match.group(1).split(",") if idx.strip()]
+        libraries_match = re.search(r"LIBRARIES:\s*\[(.*?)\]", response, re.IGNORECASE)
+        if libraries_match and libraries_match.group(1).strip():
+            with contextlib.suppress(ValueError):
+                selected_indices["libraries"] = [
+                    int(idx.strip()) for idx in libraries_match.group(1).split(",") if idx.strip()
+                ]
+        return selected_indices

histopath/retriever_benchmark.py ADDED Viewed

	@@ -0,0 +1,101 @@

+###########################################################################################
+# Basic ToolRetriever benchmarking for measuring retrieval rate for a certain custom tool #
+# Author: Ryan Ding                                                                       #
+###########################################################################################
+import random
+from nltk.corpus import wordnet
+from histopath.model.retriever import ToolRetriever
+from histopath.tool.tool_registry import ToolRegistry
+from histopath.utils import read_module2api
+from langchain_ollama import ChatOllama
+LLM = ChatOllama(model='gpt-oss:120b', temperature=0.7)
+PROMPT_v1 = 'Caption the whole slide into patches into directory ./test/directory/'
+PROMPT_v2 = 'Caption the whole slide images already segmented into pathces in directory ./test/directory'
+RUNS = 100
+def synonym_replace(text, p_replace=0.2, protected_words=None):
+    """Prompt pertubation via replacement of words with their synoynms.
+    Parameters
+    ----------
+    text: str
+        prompt to perturb
+    p_replace: float
+        probability of replacing any given word (default: 0.2)
+    protected_words: set
+        words protected from perturbation (default: None)
+    Returns
+    -------
+    str
+        perturbed prompt
+    """
+    words = text.split()
+    new_words = []
+    for w in words:
+        if protected_words and w in protected_words:
+            new_words.append(w)
+            continue
+        if random.random() < p_replace:
+            syns = wordnet.synsets(w)
+            if syns:
+                lemma_names = syns[0].lemma_names()
+                if lemma_names:
+                    w = random.choice(lemma_names).replace('_', ' ')
+        new_words.append(w)
+    return ' '.join(new_words)
+def add_typo(text, p_typo=0.02):
+    """Prompt perturbation via integration of character-level typos.
+    Parameters
+    ----------
+    text: str
+        prompt to perturb
+    p_typo: float
+        probability of introducing a typo at any given character (default: 0.02)
+    Returns
+    -------
+    str
+        perturbed prompt
+    """
+    new_text = list(text)
+    for i in range(len(new_text)):
+        if random.random() < p_typo:
+            new_text[i] = random.choice('abcdefghijklmnopqrstuvwxyz')
+    return ''.join(new_text)
+class ToolBenchmark:
+    def __init__(self, llm, prompts, runs, targets):
+        self.llm = llm
+        self.targets = targets
+        self.prompts = prompts
+        self.runs = runs
+        self.module2api = read_module2api()
+        self.registry = ToolRegistry(self.module2api)
+        self.retriever = ToolRetriever()
+        self.all_tools = self.registry.tools
+        self.resources = { "tools": self.all_tools }
+    def retrieve_tools(self):
+        selected_resources = self.retriever.prompt_based_retrieval(query=PROMPT_v2, resources=self.resources, llm=self.llm)
+        return set([tool for tool in selected_resources["tools"]])
+    def evaluate(self):
+        hits = dict()
+        for _ in range(self.runs):
+            tools = self.retrieve_tools()
+            for target in self.targets:
+                # amount of times proper tool retrieved
+                if target in tools: hits[target] = hits.get(target, 0) + 1
+def main():
+    pass
+if __name__ == '__main__':
+    main()

histopath/tool/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from histopath.utils import get_tool_decorated_functions

histopath/tool/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (258 Bytes). View file

histopath/tool/__pycache__/pathology.cpython-311.pyc ADDED Viewed

Binary file (19.2 kB). View file

histopath/tool/__pycache__/support_tools.cpython-311.pyc ADDED Viewed

Binary file (3.02 kB). View file

histopath/tool/__pycache__/tool_registry.cpython-311.pyc ADDED Viewed

Binary file (5.94 kB). View file

histopath/tool/pathology.py ADDED Viewed

	@@ -0,0 +1,458 @@

+def caption_slide(image_path, slide_name, prompt="Diagnosis:", output_dir="./output"):
+    """Captions a Whole Slide Image(WSI).
+    Parameters
+    ----------
+    image_path: str
+        Path to the whole slide image file.
+    slide_name: str
+        Name of whole slide image file
+    prompt: str
+        Starting prompt of the generated caption (default: "Diagnosis:")
+    output_dir: str, optional
+        Directory to save output files (default: "./output")
+    Returns
+    -------
+    str
+        Research log summarizing analysis and results
+    """
+    import os
+    import glob
+    import timm
+    import torch
+    from PIL import Image
+    import lazyslide as zs
+    from pathlib import Path
+    from datetime import datetime
+    from transformers import AutoModel
+    from timm.layers import SwiGLUPacked
+    from timm.data import resolve_data_config
+    from huggingface_hub import login, whoami
+    from timm.data.transforms_factory import create_transform
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    # Step 1: Login to HuggingFace
+    login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
+    hf_user = whoami()
+    username = hf_user['name']
+    # Step 2: Setup models and transforms
+    virchow2 = timm.create_model("hf-hub:paige-ai/Virchow2", pretrained=True, mlp_layer=SwiGLUPacked, act_layer=torch.nn.SiLU)
+    virchow2 = virchow2.eval()
+    prism = AutoModel.from_pretrained('paige-ai/Prism', trust_remote_code=True)
+    prism = prism.to(device)
+    transforms = create_transform(**resolve_data_config(virchow2.pretrained_cfg, model=virchow2))
+    tile_embeddings = []
+    # Step 3: Initialize, process, tile, and encode slide file(s)
+    files = [f for f in glob.glob(f"{image_path}/*") if slide_name in os.path.basename(f)]
+    if len(files) == 1 and files[0].endswith(".svs"):
+        # dealing with the whole slide in itself
+        wsi = zs.open_wsi(f"{image_path}/{slide_name}.svs")
+        tiles, tile_spec = zs.pp.tile_tissues(wsi, 224, mpp=0.5, return_tiles=True)
+        tile_dir = Path("tiles")
+        tile_dir.mkdir(exist_ok=True)
+        for _, row in tiles.iterrows():
+            tile_id = row["tile_id"]
+            geometry = row["geometry"]  # shapely Polygon of the tile
+            # Get top-left corner of the tile
+            minx, miny, maxx, maxy = geometry.bounds
+            width = int(maxx - minx)
+            height = int(maxy - miny)
+            # Read the tile from WSI
+            tile_img = wsi.read_region(int(minx), int(miny), width, height, tile_spec.ops_level)
+            tile_img = Image.fromarray(tile_img, 'RGB')
+            tile_tensor = transforms(tile_img).unsqueeze(0)
+            output = virchow2(tile_tensor)
+            class_token = output[:, 0]
+            patch_tokens = output[:, 1:]
+            embedding = torch.cat([class_token, patch_tokens.mean(1)], dim=-1)
+            tile_embeddings.append(embedding)
+            # Save as PNG
+            tile_path = tile_dir / f"tile_{tile_id:05d}.png"
+            tile_img.save(tile_path)
+    else:
+        # dealing with patches (not svs); need to encode tiles with Virchow directly
+        for file in files:
+            tile_img = Image.open(file).convert('RGB')
+            tile_tensor = transforms(tile_img).unsqueeze(0)
+            output = virchow2(tile_tensor)
+            class_token = output[:, 0]
+            patch_tokens = output[:, 1:]
+            embedding = torch.cat([class_token, patch_tokens.mean(1)], dim=-1)
+            tile_embeddings.append(embedding)
+    tile_embeddings = torch.cat(tile_embeddings, dim=0).unsqueeze(0).to(device)
+    with torch.autocast(device, torch.float16), torch.inference_mode():
+        reprs = prism.slide_representations(tile_embeddings)
+        genned_ids = prism.generate(
+            key_value_states=reprs['image_latents'],
+            do_sample=False,
+            num_beams=5,
+            num_beam_groups=1,
+        )
+        generated_caption = prism.untokenize(genned_ids)
+    # Step 4: Generate caption using latent representation and initial prompt
+    log = f"""
+Research Log: Whole Slide Image Captioning
+Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+Image Path: {os.path.basename(image_path)}
+Slide Name: {slide_name}
+Analysis Steps:
+1. Logged into HuggingFace as {username}
+2. Load in PRISM and Virchow2 models for encoding and captioning
+3. Initialized, processed, tiled, and encode slide file(s)
+4. Generated the caption with "{prompt}" as initial prompt
+Results:
+Caption
+-------
+{generated_caption}
+"""
+    return log
+def segment_slide(image_path, seg_type, model, output_dir="./output"):
+    """Segment a Whole Slide Image (WSI).
+    Parameters
+    ----------
+    image_path: str
+        Path to the whole slide image file.
+    seg_type: str
+        Type of segmentation to perform
+    model: str
+        Segmentation model to use
+    output_dir: str, optional
+        Directory to save output files (default: "./output")
+    Returns
+    -------
+    str
+        Research log summarizing analysis and results
+    """
+    import os
+    import lazyslide as zs
+    from datetime import datetime
+    from huggingface_hub import login, whoami
+    # Step 1: Perform validity checking
+    usable_models = set(zs.models.list_models("segmentation"))
+    if seg_type not in {"cells", "cell_type", "semantic", "tissue", "artifact"}: return None
+    if model not in usable_models: return None
+    if seg_type == "tissue" and model not in {"grandqc", "pathprofiler"}: return None
+    if seg_type == "artifact" and model != "grandqc": return None
+    if seg_type == "cells" and model not in {"instanseg", "cellpose"}: return None
+    if seg_type == "cell_type" and model != "nulite": return None
+    # Step 2: Login to HuggingFace if gated model
+    login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
+    hf_user = whoami()
+    username = hf_user['name']
+    # Step 3: Open, process, and tile WSI image
+    wsi = zs.open_wsi(image_path)
+    zs.pp.find_tissues(wsi)
+    zs.pp.tile_graph(wsi)
+    #TODO Change values
+    zs.pp.tile_tissues(wsi, 512, background_fraction=0.95, mpp=0.5)
+    # Step 4: Appropriately Segment the slide
+    if seg_type == "cells":
+        zs.seg.cells(wsi, model=model)
+    elif seg_type == "cell_type":
+        zs.seg.cell_type(wsi, model=model)
+    elif seg_type == "semantic":
+        zs.seg.semantic(wsi, model=model)
+    elif seg_type == "tissue":
+        zs.seg.tissue(wsi, model=model)
+    else:
+        zs.seg.artifact(wsi, model=model)
+    # Step 5: Generate WSI with annotations
+    log = f"""
+Research Log: Whole Slide Image Segmentation
+Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+Image: {os.path.basename(image_path)}
+Analysis Steps:
+1. Performed validity checking
+2. Logged into HuggingFace as {username}
+3. Open WSI, find, tile and graph tissues
+4. Segmented tissues using {model}
+5. Generated and displayed segmentation results in {output_dir}
+Results:
+Output Files
+"""
+    return log
+def zero_shot_classification(image_path, labels, output_dir="./output"):
+    """Performs Zero-Shot Classification from Whole Slide Images (WSIs).
+    Parameters
+    ----------
+    image_path: str
+        Path to the whole slide image file.
+    labels: list
+        Labels of the classes to perform zero-shot classification
+    output_dir: str, optional
+        Directory to save output files (default: "./output")
+    Returns
+    -------
+    str
+        Research log summarizing analysis and results
+    """
+    import os
+    import lazyslide as zs
+    from datetime import datetime
+    from huggingface_hub import login, whoami
+    # login to huggingface; zero shot via LazySlide only possible with gated models
+    login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
+    hf_user = whoami()
+    username = hf_user['name']
+    wsi = zs.open_wsi(image_path)
+    zs.pp.find_tissues(wsi)
+    zs.pp.tile_tissues(wsi, 512, background_fraction=0.95, mpp=0.5)
+    # might want to make tile graph
+    # zs.pp.tile_graph(wsi)
+    zs.tl.feature_extraction(wsi, "virchow")
+    zs.tl.feature_aggregation(wsi, feature_key="virchow", encoder="prism")
+    results = zs.tl.zero_shot_score(wsi, labels, feature_key="virchow_tiles")
+    log = f"""
+Research Log: Zero-Shot Classification
+Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+Image: {os.path.basename(image_path)}
+Analysis Steps:
+1. Logged in as user {username} to HuggingFace
+2. Loaded WSI: {wsi}
+3. Found tissues
+4. Tiled tissues
+5. Extracted features
+6. Aggregated features
+Results:
+{results}
+Output Files:
+"""
+    print(log)
+    return log
+def quantify_tumor_infiltrating_lymphocites(image_path, tile_size=256, tile_step=128, batch_size=4, output_dir="./output"):
+    """Quantifies Tumor-Infiltrating Lymphocytes (TILs) from Whole-Slide Images (WSIs).
+    Parameters
+    ----------
+    image_path: str
+        Path to the whole slide image file.
+    tile_size: int, optional
+        Size of inference tiles (default: 256)
+    tile_step: int, optional
+        Step size between inference tiles (default: 128)
+    batch_size: int, optional
+        Simulatenous inference tiles (default: 4)
+    output_dir: str, optional
+        Directory to save output files (default: "./output")
+    Returns
+    -------
+    str
+        Research log summarizing analysis and results
+    """
+    import os
+    import numpy as np
+    import pandas as pd
+    import lazyslide as zs
+    from datetime import datetime
+    import matplotlib.pyplot as plt
+    # Step 1: Load WSI via LazySlide
+    try:
+        wsi = zs.open_wsi(image_path)
+    except Exception as e:
+        return f"Error loading WSI: {str(e)}"
+    # Step 2: Build a tissue mask + upscale it for higher resolutions
+    try:
+        tissue_mask = zs.pp.find_tissues(wsi, refine_level=0, to_hsv=True)
+    except:
+        return f"Error building tissue mask: {str(e)}"
+    # Step 3: Cell type segmentation using LazySlide"s seg.cell_types
+    try:
+        zs.seg.cell_types(wsi, batch_size=batch_size)
+    except Exception as e:
+        return f"Error during cell type segmentation: {str(e)}"
+    # Step 4: Load results
+    instance_map = zs.io.load_annotations(wsi, "instance_map")
+    type_map = zs.io.load_annotations(wsi, "cell_types")  # may include TIL labels
+    instance_map_path = os.path.join(output_dir, "instance_map.npy")
+    type_map_path = os.path.join(output_dir, "cell_type_map.npy")
+    np.save(instance_map_path, instance_map)
+    np.save(type_map_path, type_map)
+    # Step 5: Define the TIL cell type ID (e.g., 1 for TILs)
+    til_type_id = 1
+    # Step 6: Compute TIL counts
+    valid_cells = tissue_mask & (type_map == til_type_id)
+    total_cells = np.count_nonzero(valid_cells)
+    til_cells = np.count_nonzero(valid_cells & (type_map == til_type_id))
+    # Step 7: Compute densities
+    pixel_area_mm2 = (wsi.mpp ** 2) / 1e6  # convert μm² to mm²
+    roi_area_mm2 = np.count_nonzero(tissue_mask) * pixel_area_mm2
+    til_density = til_cells / roi_area_mm2 if roi_area_mm2 > 0 else float("nan")
+    total_density = total_cells / roi_area_mm2 if roi_area_mm2 > 0 else float("nan")
+    til_fraction = til_cells / total_cells if total_cells > 0 else float("nan")
+    # Step 6: Save metrics CSV
+    metrics = {
+        "total_nuclei": total_cells,
+        "til_nuclei": til_cells,
+        "til_fraction": til_fraction,
+        "til_density_per_mm2": til_density,
+        "total_density_per_mm2": total_density,
+        "roi_area_mm2": roi_area_mm2
+    }
+    metrics_df = pd.DataFrame([metrics])
+    metrics_path = os.path.join(output_dir, "metrics.csv")
+    metrics_df.to_csv(metrics_path, index=False)
+    # Step 7: Create and save overlay visualization
+    overlay = np.zeros((*type_map.shape, 3), dtype=np.uint8)
+    overlay[type_map == til_type_id] = [255, 0, 0]  # red for TILs
+    overlay[(type_map != til_type_id) & (instance_map > 0)] = [0, 255, 0]  # green for other nuclei
+    overlay_path = os.path.join(output_dir, "overlay.png")
+    plt.imsave(overlay_path, overlay)
+    # Step 8: Create and return research log
+    log = f"""
+Research Log: Quantification of Tumor-Infiltrating Lymphocytes
+Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+Image: {os.path.basename(image_path)}
+Analysis Steps:
+1. Loaded and preprocessed the whole slide image into upscaled tiles
+2. Applied NuLite Nucleus Instance Segmentation and Classification on tiles
+3. Computed and quantified TIL (based on inflammed cell class) and total nuclear density
+Results:
+- Total Nuclei: {int(total_cells)}
+- Total Inflammed Nuclei: {int(til_cells)}
+- Fiber Density: {til_density:.2f}
+Output Files:
+- Segmented Image: {os.path.basename(overlay_path)}
+- Measurements: {os.path.basename(metrics_path)}
+"""
+    return log
+def quantify_fibrosis(image_path, model="grandqc", output_dir="./output"):
+    """Quantifies Fibrosis from Whole Slide Images (WSIs).
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file.
+    output_dir: str, optional
+        Directory to save output files (default: "./output")
+    model: str, optional
+        Tissue segmentation model to use (default: grandqc)
+    Returns
+    -------
+    str
+        Research log summarizing analysis and results
+    """
+    import os
+    import lazyslide as zs
+    from datetime import datetime
+    # Step 1: Load WSI via LazySlide
+    try:
+        wsi = zs.open_wsi(image_path)
+    except Exception as e:
+        return f"Error loading WSI: {str(e)}"
+    zs.seg.tissue(wsi, model=model)
+    log = f"""
+Research Log: Template
+Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+Image: {os.path.basename(image_path)}
+Analysis Steps:
+1.
+2.
+3.
+Results:
+-
+-
+-
+Output Files:
+-
+-
+    """
+    return log
+# def template(image_path, output_dir="./output"):
+#     """Template.
+#     Parameters
+#     ----------
+#     image_path: str
+#         Path to the image file.
+#     output_dir: str, optional
+#         Directory to save output files (default: "./output")
+#     Returns
+#     -------
+#     str
+#         Research log summarizing analysis and results
+#     """
+#     # Step X
+#     log = f"""
+# Research Log: Template
+# Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+# Image: {os.path.basename(image_path)}
+# Analysis Steps:
+# 1.
+# 2.
+# 3.
+# Results:
+# -
+# -
+# -
+# Output Files:
+# -
+# -
+#     """
+#     return log

histopath/tool/support_tools.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import sys
+from io import StringIO
+# Create a persistent namespace that will be shared across all executions
+_persistent_namespace = {}
+def run_python_repl(command: str) -> str:
+    """Executes the provided Python command in a persistent environment and returns the output.
+    Variables defined in one execution will be available in subsequent executions.
+    """
+    def execute_in_repl(command: str) -> str:
+        """Helper function to execute the command in the persistent environment."""
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        # Use the persistent namespace
+        global _persistent_namespace
+        try:
+            # Execute the command in the persistent namespace
+            exec(command, _persistent_namespace)
+            output = mystdout.getvalue()
+        except Exception as e:
+            output = f"Error: {str(e)}"
+        finally:
+            sys.stdout = old_stdout
+        return output
+    command = command.strip("```").strip()
+    return execute_in_repl(command)
+def read_function_source_code(function_name: str) -> str:
+    """Read the source code of a function from any module path.
+    Parameters
+    ----------
+        function_name (str): Fully qualified function name (e.g., 'bioagentos.tool.support_tools.write_python_code')
+    Returns
+    -------
+        str: The source code of the function
+    """
+    import importlib
+    import inspect
+    # Split the function name into module path and function name
+    parts = function_name.split(".")
+    module_path = ".".join(parts[:-1])
+    func_name = parts[-1]
+    try:
+        # Import the module
+        module = importlib.import_module(module_path)
+        # Get the function object from the module
+        function = getattr(module, func_name)
+        # Get the source code of the function
+        source_code = inspect.getsource(function)
+        return source_code
+    except (ImportError, AttributeError) as e:
+        return f"Error: Could not find function '{function_name}'. Details: {str(e)}"

histopath/tool/tool_description/__pycache__/pathology.cpython-311.pyc ADDED Viewed

Binary file (1.77 kB). View file

histopath/tool/tool_description/__pycache__/support_tools.cpython-311.pyc ADDED Viewed

Binary file (822 Bytes). View file

histopath/tool/tool_description/pathology.py ADDED Viewed

	@@ -0,0 +1,156 @@

+description = [
+    {
+        "description": "Perform zero-shot classification on a whole slide image",
+        "name": "zero_shot_classification",
+        "optional_parameters": [
+            {
+                "default": "./output",
+                "description": "Directory to save output files",
+                "name": "output_dir",
+                "type": "str"
+            }
+        ],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "File path of the whole slide image",
+                "name": "image_path",
+                "type": "str"
+            },
+            {
+                "default": None,
+                "description": "Labels of the classes to perform zero-shot classification",
+                "name": "labels",
+                "type": "list"
+            }
+        ]
+    },
+    {
+        "description": "Segment a Whole Slide Image (WSI)",
+        "name": "segment_slide",
+        "optional_parameters": [
+            {
+                "default": "./output",
+                "description": "Directory to save output files",
+                "name": "output_dir",
+                "type": "str"
+            }
+        ],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "Path of the whole slide image",
+                "name": "image_path",
+                "type": "str"
+            },
+            {
+                "default": None,
+                "description": "Type of segmentation to perform",
+                "name": "seg_type",
+                "type": "str"
+            },
+            {
+                "default": None,
+                "description": "Segmentation model to use",
+                "name": "model",
+                "type": "str"
+            }
+        ]
+    },
+    {
+        "description": "Quantify Fibrosis from a Whole Slide Image",
+        "name": "quantify_fibrosis",
+        "optional_parameters": [
+            {
+                "default": "./output",
+                "description": "Directory to save output files",
+                "name": "output_dir",
+                "type": "str"
+            },
+            {
+                "default": "grandqc",
+                "description": "Tissue segmentation model to use (default: grandqc)",
+                "name": "model",
+                "type": "str"
+            }
+        ],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "Path to the whole slide image",
+                "name": "image_path",
+                "type": "str"
+            }
+        ]
+    },
+    {
+        "description": "Caption a whole slide image directly from the slide file or via tiled tissue patches from the slide file",
+        "name": "caption_slide",
+        "optional_parameters": [
+            {
+                "default": "./output",
+                "description": "Directory to save output files",
+                "name": "output_dir",
+                "type": "str"
+            },
+            {
+                "default": "Diagnosis:",
+                "description": "Starting prompt of the generated caption ",
+                "name": "prompt",
+                "type": "str"
+            }
+        ],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "Path to the whole slide image",
+                "name": "image_path",
+                "type": "str"
+            },
+            {
+                "default": None,
+                "description": "Name of the whole slide image file",
+                "name": "slide_name",
+                "type": "str"
+            }
+        ]
+    }
+]
+    # {
+    #     "description": "Quantify Tumor-Infiltrating Lymphocytes from "
+    #     "whole slide image data via identification of inflammed nuclei "
+    #     "region fractions and density"
+    #     "name": "quantify_tumor_infilitrating_lymphocytes",
+    #     "optional_parameters": [
+    #         {
+    #             "default": "./output",
+    #             "description": "Directory to save output files"
+    #             "name": "output_dir",
+    #             "type": "str
+    #         },
+    #         {
+    #             "default": ""
+    #             "description": "",
+    #             "name": ""
+    #             "type": ""
+    #         },
+    #         {
+    #             "default": ""
+    #             "description": "",
+    #             "name": ""
+    #             "type": ""
+    #         },
+    #         {
+    #             "default": ""
+    #             "description": "",
+    #             "name": ""
+    #             "type": ""
+    #         },
+    #     ],
+    #     "required_parameters": [
+    #         "default": None,
+    #         "description": "Path to the whole slide image",
+    #         "name": "image_path",
+    #         "type": "str"
+    #     ],
+    # },

histopath/tool/tool_description/support_tools.py ADDED Viewed

	@@ -0,0 +1,30 @@

+description = [
+    {
+        "description": "Executes the provided Python command in the notebook environment and returns the output.",
+        "name": "run_python_repl",
+        "optional_parameters": [],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "Python command to execute in the notebook environment",
+                "name": "command",
+                "type": "str",
+            }
+        ],
+    },
+    {
+        "description": "Read the source code of a function from any module path.",
+        "name": "read_function_source_code",
+        "optional_parameters": [],
+        "required_parameters": [
+            {
+                "default": None,
+                "description": "Fully qualified function name "
+                "(e.g., "
+                "'bioagentos.tool.support_tools.write_python_code')",
+                "name": "function_name",
+                "type": "str",
+            }
+        ],
+    },
+]

histopath/tool/tool_registry.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import pickle
+import pandas as pd
+import numpy as np
+class ToolRegistry:
+    def __init__(self, tools):
+        self.tools = []
+        self.next_id = 0
+        for j in tools.values():
+            for tool in j:
+                self.register_tool(tool)
+        docs = []
+        for tool_id in range(len(self.tools)):
+            docs.append([int(tool_id), self.get_tool_by_id(int(tool_id))])
+        self.document_df = pd.DataFrame(docs, columns=["docid", "document_content"])
+    def register_tool(self, tool):
+        if self.validate_tool(tool):
+            tool["id"] = self.next_id
+            self.tools.append(tool)
+            self.next_id += 1
+        else:
+            raise ValueError("Invalid tool format")
+    def validate_tool(self, tool):
+        required_keys = ["name", "description", "required_parameters"]
+        return all(key in tool for key in required_keys)
+    def get_tool_by_name(self, name):
+        for tool in self.tools:
+            if tool["name"] == name:
+                return tool
+        return None
+    def get_tool_by_id(self, tool_id):
+        for tool in self.tools:
+            if tool["id"] == tool_id:
+                return tool
+        return None
+    def get_id_by_name(self, name):
+        for tool in self.tools:
+            if tool["name"] == name:
+                return tool["id"]
+        return None
+    def get_name_by_id(self, tool_id):
+        for tool in self.tools:
+            if tool["id"] == tool_id:
+                return tool["name"]
+        return None
+    def list_tools(self):
+        return [{"name": tool["name"], "id": tool["id"]} for tool in self.tools]
+    def remove_tool_by_id(self, tool_id):
+        # Remove the tool with the given id
+        tool = self.get_tool_by_id(tool_id)
+        if tool:
+            self.tools = [t for t in self.tools if t["id"] != tool_id]
+            return True
+        return False
+    def remove_tool_by_name(self, name):
+        # Remove the tool with the given name
+        tool = self.get_tool_by_name(name)
+        if tool:
+            self.tools = [t for t in self.tools if t["name"] != name]
+            return True
+        return False
+    def save_registry(self, filename):
+        with open(filename, "wb") as file:
+            pickle.dump(self, file)
+    # def get_langchain_tool_by_id(self, id):
+    #     return self.langchain_tools[id]
+    @staticmethod
+    def load_registry(filename):
+        with open(filename, "rb") as file:
+            return pickle.load(file)

histopath/utils.py ADDED Viewed

	@@ -0,0 +1,722 @@

+import ast
+import enum
+import importlib
+import json
+import os
+import pickle
+import subprocess
+import tempfile
+import traceback
+import zipfile
+from typing import Any, ClassVar
+from urllib.parse import urljoin
+import pandas as pd
+import requests
+import tqdm  # Add tqdm for progress bar
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.messages.base import get_msg_title_repr
+from langchain_core.tools import StructuredTool
+from langchain_core.utils.interactive_env import is_interactive_env
+from pydantic import BaseModel, Field, ValidationError
+def run_bash_script(script: str) -> str:
+    """Run a Bash script using subprocess.
+    Args:
+        script: Bash script to run
+    Returns:
+        Output of the Bash script
+    Example:
+        ```
+        # Example of a complex Bash script
+        script = '''
+        #!/bin/bash
+        # Define variables
+        DATA_DIR="/path/to/data"
+        OUTPUT_FILE="results.txt"
+        # Create output directory if it doesn't exist
+        mkdir -p $(dirname $OUTPUT_FILE)
+        # Loop through files
+        for file in $DATA_DIR/*.txt; do
+            echo "Processing $file..."
+            # Count lines in each file
+            line_count=$(wc -l < $file)
+            echo "$file: $line_count lines" >> $OUTPUT_FILE
+        done
+        echo "Processing complete. Results saved to $OUTPUT_FILE"
+        '''
+        result = run_bash_script(script)
+        print(result)
+        ```
+    """
+    try:
+        # Trim any leading/trailing whitespace
+        script = script.strip()
+        # If the script is empty, return an error
+        if not script:
+            return "Error: Empty script"
+        # Create a temporary file to store the Bash script
+        with tempfile.NamedTemporaryFile(suffix=".sh", mode="w", delete=False) as f:
+            # Add shebang if not present
+            if not script.startswith("#!/"):
+                f.write("#!/bin/bash\n")
+            # Add set -e to exit on error
+            if "set -e" not in script:
+                f.write("set -e\n")
+            f.write(script)
+            temp_file = f.name
+        # Make the script executable
+        os.chmod(temp_file, 0o755)
+        # Get current environment variables and working directory
+        env = os.environ.copy()
+        cwd = os.getcwd()
+        # Run the Bash script with the current environment and working directory
+        result = subprocess.run(
+            [temp_file],
+            shell=True,
+            capture_output=True,
+            text=True,
+            check=False,
+            env=env,
+            cwd=cwd,
+        )
+        # Clean up the temporary file
+        os.unlink(temp_file)
+        # Return the output
+        if result.returncode != 0:
+            traceback.print_stack()
+            print(result)
+            return f"Error running Bash script (exit code {result.returncode}):\n{result.stderr}"
+        else:
+            return result.stdout
+    except Exception as e:
+        traceback.print_exc()
+        return f"Error running Bash script: {str(e)}"
+# Keep the run_cli_command for backward compatibility
+def run_cli_command(command: str) -> str:
+    """Run a CLI command using subprocess.
+    Args:
+        command: CLI command to run
+    Returns:
+        Output of the CLI command
+    """
+    try:
+        # Trim any leading/trailing whitespace
+        command = command.strip()
+        # If the command is empty, return an error
+        if not command:
+            return "Error: Empty command"
+        # Split the command into a list of arguments, handling quoted arguments correctly
+        import shlex
+        args = shlex.split(command)
+        # Run the command
+        result = subprocess.run(args, capture_output=True, text=True, check=False)
+        # Return the output
+        if result.returncode != 0:
+            return f"Error running command '{command}':\n{result.stderr}"
+        else:
+            return result.stdout
+    except Exception as e:
+        return f"Error running command '{command}': {str(e)}"
+def run_with_timeout(func, args=None, kwargs=None, timeout=600):
+    """Run a function with a timeout using threading instead of multiprocessing.
+    This allows variables to persist in the global namespace between function calls.
+    Returns the function result or a timeout error message.
+    """
+    if args is None:
+        args = []
+    if kwargs is None:
+        kwargs = {}
+    import ctypes
+    import queue
+    import threading
+    result_queue = queue.Queue()
+    def thread_func(func, args, kwargs, result_queue):
+        """Function to run in a separate thread."""
+        try:
+            result = func(*args, **kwargs)
+            result_queue.put(("success", result))
+        except Exception as e:
+            result_queue.put(("error", str(e)))
+    # Start a separate thread
+    thread = threading.Thread(target=thread_func, args=(func, args, kwargs, result_queue))
+    thread.daemon = True  # Set as daemon so it will be killed when main thread exits
+    thread.start()
+    # Wait for the specified timeout
+    thread.join(timeout)
+    # Check if the thread is still running after timeout
+    if thread.is_alive():
+        print(f"TIMEOUT: Code execution timed out after {timeout} seconds")
+        # Unfortunately, there's no clean way to force terminate a thread in Python
+        # The recommended approach is to use daemon threads and let them be killed when main thread exits
+        # Here, we'll try to raise an exception in the thread to make it stop
+        try:
+            # Get thread ID and try to terminate it
+            thread_id = thread.ident
+            if thread_id:
+                # This is a bit dangerous and not 100% reliable
+                # It attempts to raise a SystemExit exception in the thread
+                res = ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), ctypes.py_object(SystemExit))
+                if res > 1:
+                    # Oops, we raised too many exceptions
+                    ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), None)
+        except Exception as e:
+            print(f"Error trying to terminate thread: {e}")
+        return f"ERROR: Code execution timed out after {timeout} seconds. Please try with simpler inputs or break your task into smaller steps."
+    # Get the result from the queue if available
+    try:
+        status, result = result_queue.get(block=False)
+        return result if status == "success" else f"Error in execution: {result}"
+    except queue.Empty:
+        return "Error: Execution completed but no result was returned"
+class api_schema(BaseModel):
+    """api schema specification."""
+    api_schema: str | None = Field(description="The api schema as a dictionary")
+def function_to_api_schema(function_string, llm):
+    prompt = """
+    Based on a code snippet and help me write an API docstring in the format like this:
+    {{'name': 'get_gene_set_enrichment',
+    'description': 'Given a list of genes, identify a pathway that is enriched for this gene set. Return a list of pathway name, p-value, z-scores.',
+    'required_parameters': [{{'name': 'genes',
+    'type': 'List[str]',
+    'description': 'List of g`ene symbols to analyze',
+    'default': None}}],
+    'optional_parameters': [{{'name': 'top_k',
+    'type': 'int',
+    'description': 'Top K pathways to return',
+    'default': 10}},  {{'name': 'database',
+    'type': 'str',
+    'description': 'Name of the database to use for enrichment analysis',
+    'default': "gene_ontology"}}]}}
+    Strictly follow the input from the function - don't create fake optional parameters.
+    For variable without default values, set them as None, not null.
+    For variable with boolean values, use capitalized True or False, not true or false.
+    Do not add any return type in the docstring.
+    Be as clear and succint as possible for the descriptions. Please do not make it overly verbose.
+    Here is the code snippet:
+    {code}
+    """
+    llm = llm.with_structured_output(api_schema)
+    for _ in range(7):
+        try:
+            api = llm.invoke(prompt.format(code=function_string)).dict()["api_schema"]
+            return ast.literal_eval(api)  # -> prefer "default": None
+            # return json.loads(api) # -> prefer "default": null
+        except Exception as e:
+            print("API string:", api)
+            print("Error parsing the API string:", e)
+            continue
+    return "Error: Could not parse the API schema"
+    # return
+def get_all_functions_from_file(file_path):
+    with open(file_path) as file:
+        file_content = file.read()
+    # Parse the file content into an AST (Abstract Syntax Tree)
+    tree = ast.parse(file_content)
+    # List to hold the top-level functions as strings
+    functions = []
+    # Walk through the AST nodes
+    for node in tree.body:  # Only consider top-level nodes in the body
+        if isinstance(node, ast.FunctionDef):  # Check if the node is a function definition
+            # Skip if function name starts with underscore
+            if node.name.startswith("_"):
+                continue
+            start_line = node.lineno - 1  # Get the starting line of the function
+            end_line = node.end_lineno  # Get the ending line of the function (only available in Python 3.8+)
+            func_code = file_content.splitlines()[start_line:end_line]
+            functions.append("\n".join(func_code))  # Join lines of the function and add to the list
+    return functions
+def write_python_code(request: str):
+    from langchain_anthropic import ChatAnthropic
+    from langchain_core.output_parsers import StrOutputParser
+    from langchain_core.prompts import ChatPromptTemplate
+    model = ChatAnthropic(model="claude-3-5-sonnet-20240620")
+    template = """Write some python code to solve the user's problem.
+    Return only python code in Markdown format, e.g.:
+    ```python
+    ....
+    ```"""
+    prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")])
+    def _sanitize_output(text: str):
+        _, after = text.split("```python")
+        return after.split("```")[0]
+    chain = prompt | model | StrOutputParser() | _sanitize_output
+    return chain.invoke({"input": "write a code that " + request})
+def execute_graphql_query(
+    query: str,
+    variables: dict,
+    api_address: str = "https://api.genetics.opentargets.org/graphql",
+) -> dict:
+    """Executes a GraphQL query with variables and returns the data as a dictionary."""
+    headers = {"Content-Type": "application/json"}
+    response = requests.post(api_address, json={"query": query, "variables": variables}, headers=headers)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        print(response.text)
+        response.raise_for_status()
+def get_tool_decorated_functions(relative_path):
+    import ast
+    import importlib.util
+    import os
+    # Get the directory of the current file (__init__.py)
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    # Construct the absolute path from the relative path
+    file_path = os.path.join(current_dir, relative_path)
+    with open(file_path) as file:
+        tree = ast.parse(file.read(), filename=file_path)
+    tool_function_names = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            for decorator in node.decorator_list:
+                if (
+                    isinstance(decorator, ast.Name)
+                    and decorator.id == "tool"
+                    or (
+                        isinstance(decorator, ast.Call)
+                        and isinstance(decorator.func, ast.Name)
+                        and decorator.func.id == "tool"
+                    )
+                ):
+                    tool_function_names.append(node.name)
+    # Calculate the module name from the relative path
+    package_path = os.path.relpath(file_path, start=current_dir)
+    module_name = package_path.replace(os.path.sep, ".").rsplit(".", 1)[0]
+    # Import the module and get the function objects
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    tool_functions = [getattr(module, name) for name in tool_function_names]
+    return tool_functions
+def load_pickle(file):
+    import pickle
+    with open(file, "rb") as f:
+        return pickle.load(f)
+def pretty_print(message, printout=True):
+    if isinstance(message, tuple):
+        title = message
+    elif isinstance(message.content, list):
+        title = get_msg_title_repr(message.type.title().upper() + " Message", bold=is_interactive_env())
+        if message.name is not None:
+            title += f"\nName: {message.name}"
+        for i in message.content:
+            if i["type"] == "text":
+                title += f"\n{i['text']}\n"
+            elif i["type"] == "tool_use":
+                title += f"\nTool: {i['name']}"
+                title += f"\nInput: {i['input']}"
+        if printout:
+            print(f"{title}")
+    else:
+        title = get_msg_title_repr(message.type.title() + " Message", bold=is_interactive_env())
+        if message.name is not None:
+            title += f"\nName: {message.name}"
+        title += f"\n\n{message.content}"
+        if printout:
+            print(f"{title}")
+    return title
+class CustomBaseModel(BaseModel):
+    api_schema: ClassVar[dict] = None  # Class variable to store api_schema
+    # Add model_config with arbitrary_types_allowed=True
+    model_config = {"arbitrary_types_allowed": True}
+    @classmethod
+    def set_api_schema(cls, schema: dict):
+        cls.api_schema = schema
+    @classmethod
+    def model_validate(cls, obj):
+        try:
+            return super().model_validate(obj)
+        except (ValidationError, AttributeError) as e:
+            if not cls.api_schema:
+                raise e  # If no api_schema is set, raise original error
+            error_msg = "Required Parameters:\n"
+            for param in cls.api_schema["required_parameters"]:
+                error_msg += f"- {param['name']} ({param['type']}): {param['description']}\n"
+            error_msg += "\nErrors:\n"
+            for err in e.errors():
+                field = err["loc"][0] if err["loc"] else "input"
+                error_msg += f"- {field}: {err['msg']}\n"
+            if not obj:
+                error_msg += "\nNo input provided"
+            else:
+                error_msg += "\nProvided Input:\n"
+                for key, value in obj.items():
+                    error_msg += f"- {key}: {value}\n"
+                missing_params = {param["name"] for param in cls.api_schema["required_parameters"]} - set(obj.keys())
+                if missing_params:
+                    error_msg += "\nMissing Parameters:\n"
+                    for param in missing_params:
+                        error_msg += f"- {param}\n"
+            # # Create proper validation error structure
+            raise ValidationError.from_exception_data(
+                title="Validation Error",
+                line_errors=[
+                    {
+                        "type": "value_error",
+                        "loc": ("input",),
+                        "input": obj,
+                        "ctx": {
+                            "error": error_msg,
+                        },
+                    }
+                ],
+            ) from None
+def safe_execute_decorator(func):
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            return str(e)
+    return wrapper
+def api_schema_to_langchain_tool(api_schema, mode="generated_tool", module_name=None):
+    if mode == "generated_tool":
+        module = importlib.import_module("histopath.tool.generated_tool." + api_schema["tool_name"] + ".api")
+    elif mode == "custom_tool":
+        module = importlib.import_module(module_name)
+    api_function = getattr(module, api_schema["name"])
+    api_function = safe_execute_decorator(api_function)
+    # Define a mapping from string type names to actual Python type objects
+    type_mapping = {
+        "string": str,
+        "integer": int,
+        "boolean": bool,
+        "pandas": pd.DataFrame,  # Use the imported pandas.DataFrame directly
+        "str": str,
+        "int": int,
+        "bool": bool,
+        "List[str]": list[str],
+        "List[int]": list[int],
+        "Dict": dict,
+        "Any": Any,
+    }
+    # Create the fields and annotations
+    annotations = {}
+    for param in api_schema["required_parameters"]:
+        param_type = param["type"]
+        if param_type in type_mapping:
+            annotations[param["name"]] = type_mapping[param_type]
+        else:
+            # For types not in the mapping, try a safer approach than direct eval
+            try:
+                annotations[param["name"]] = eval(param_type)
+            except (NameError, SyntaxError):
+                # Default to Any for unknown types
+                annotations[param["name"]] = Any
+    fields = {param["name"]: Field(description=param["description"]) for param in api_schema["required_parameters"]}
+    # Create the ApiInput class dynamically
+    ApiInput = type("Input", (CustomBaseModel,), {"__annotations__": annotations, **fields})
+    # Set the api_schema
+    ApiInput.set_api_schema(api_schema)
+    # Create the StructuredTool
+    api_tool = StructuredTool.from_function(
+        func=api_function,
+        name=api_schema["name"],
+        description=api_schema["description"],
+        args_schema=ApiInput,
+        return_direct=True,
+    )
+    return api_tool
+class ID(enum.Enum):
+    ENTREZ = "Entrez"
+    ENSEMBL = "Ensembl without version"  # e.g. ENSG00000123374
+    ENSEMBL_W_VERSION = "Ensembl with version"  # e.g. ENSG00000123374.10 (needed for GTEx)
+def save_pkl(f, filename):
+    with open(filename, "wb") as file:
+        pickle.dump(f, file)
+def load_pkl(filename):
+    with open(filename, "rb") as file:
+        return pickle.load(file)
+_TEXT_COLOR_MAPPING = {
+    "blue": "36;1",
+    "yellow": "33;1",
+    "pink": "38;5;200",
+    "green": "32;1",
+    "red": "31;1",
+}
+def color_print(text, color="blue"):
+    color_str = _TEXT_COLOR_MAPPING[color]
+    print(f"\u001b[{color_str}m\033[1;3m{text}\u001b[0m")
+class PromptLogger(BaseCallbackHandler):
+    def on_chat_model_start(self, serialized, messages, **kwargs):
+        for message in messages[0]:
+            color_print(message.pretty_repr(), color="green")
+class NodeLogger(BaseCallbackHandler):
+    def on_llm_end(self, response, **kwargs):  # response of type LLMResult
+        for generations in response.generations:  # response.generations of type List[List[Generations]] becuase "each input could have multiple candidate generations"
+            for generation in generations:
+                generated_text = generation.message.content
+                # token_usage = generation.message.response_metadata["token_usage"]
+                color_print(generated_text, color="yellow")
+    def on_agent_action(self, action, **kwargs):
+        color_print(action.log, color="pink")
+    def on_agent_finish(self, finish, **kwargs):
+        color_print(finish, color="red")
+    def on_tool_start(self, serialized, input_str, **kwargs):
+        tool_name = serialized.get("name")
+        color_print(f"Calling {tool_name} with inputs: {input_str}", color="pink")
+    def on_tool_end(self, output, **kwargs):
+        output = str(output)
+        color_print(output, color="blue")
+def check_or_create_path(path=None):
+    # Set a default path if none is provided
+    if path is None:
+        path = os.path.join(os.getcwd(), "tmp_directory")
+    # Check if the path exists
+    if not os.path.exists(path):
+        # If it doesn't exist, create the directory
+        os.makedirs(path)
+        print(f"Directory created at: {path}")
+    else:
+        print(f"Directory already exists at: {path}")
+    return path
+def langchain_to_gradio_message(message):
+    # Build the title and content based on the message type
+    if isinstance(message.content, list):
+        # For a message with multiple content items (like text and tool use)
+        gradio_messages = []
+        for item in message.content:
+            gradio_message = {
+                "role": "user" if message.type == "human" else "assistant",
+                "content": "",
+                "metadata": {},
+            }
+            if item["type"] == "text":
+                item["text"] = item["text"].replace("<think>", "\n")
+                item["text"] = item["text"].replace("</think>", "\n")
+                gradio_message["content"] += f"{item['text']}\n"
+                gradio_messages.append(gradio_message)
+            elif item["type"] == "tool_use":
+                if item["name"] == "run_python_repl":
+                    gradio_message["metadata"]["title"] = "🛠️ Writing code..."
+                    # input = "```python {code_block}```\n".format(code_block=item['input']["command"])
+                    gradio_message["metadata"]["log"] = "Executing Code block..."
+                    gradio_message["content"] = f"##### Code: \n ```python \n {item['input']['command']} \n``` \n"
+                else:
+                    gradio_message["metadata"]["title"] = f"🛠️ Used tool ```{item['name']}```"
+                    to_print = ";".join([i + ": " + str(j) for i, j in item["input"].items()])
+                    gradio_message["metadata"]["log"] = f"🔍 Input -- {to_print}\n"
+                gradio_message["metadata"]["status"] = "pending"
+                gradio_messages.append(gradio_message)
+    else:
+        gradio_message = {
+            "role": "user" if message.type == "human" else "assistant",
+            "content": "",
+            "metadata": {},
+        }
+        print(message)
+        content = message.content
+        content = content.replace("<think>", "\n")
+        content = content.replace("</think>", "\n")
+        content = content.replace("<solution>", "\n")
+        content = content.replace("</solution>", "\n")
+        gradio_message["content"] = content
+        gradio_messages = [gradio_message]
+    return gradio_messages
+def parse_hpo_obo(file_path):
+    """Parse the HPO OBO file and create a dictionary mapping HP IDs to phenotype descriptions.
+    Args:
+        file_path (str): Path to the HPO OBO file.
+    Returns:
+        dict: A dictionary where keys are HP IDs and values are phenotype descriptions.
+    """
+    hp_dict = {}
+    current_id = None
+    current_name = None
+    with open(file_path) as file:
+        for line in file:
+            line = line.strip()
+            if line.startswith("[Term]"):
+                # If a new term block starts, save the previous term
+                if current_id and current_name:
+                    hp_dict[current_id] = current_name
+                current_id = None
+                current_name = None
+            elif line.startswith("id: HP:"):
+                current_id = line.split(": ")[1]
+            elif line.startswith("name:"):
+                current_name = line.split(": ", 1)[1]
+        # Add the last term to the dictionary
+        if current_id and current_name:
+            hp_dict[current_id] = current_name
+    return hp_dict
+def textify_api_dict(api_dict):
+    """Convert a nested API dictionary to a nicely formatted string."""
+    lines = []
+    for category, methods in api_dict.items():
+        lines.append(f"Import file: {category}")
+        lines.append("=" * (len("Import file: ") + len(category)))
+        for method in methods:
+            lines.append(f"Method: {method.get('name', 'N/A')}")
+            lines.append(f"  Description: {method.get('description', 'No description provided.')}")
+            # Process required parameters
+            req_params = method.get("required_parameters", [])
+            if req_params:
+                lines.append("  Required Parameters:")
+                for param in req_params:
+                    param_name = param.get("name", "N/A")
+                    param_type = param.get("type", "N/A")
+                    param_desc = param.get("description", "No description")
+                    param_default = param.get("default", "None")
+                    lines.append(f"    - {param_name} ({param_type}): {param_desc} [Default: {param_default}]")
+            # Process optional parameters
+            opt_params = method.get("optional_parameters", [])
+            if opt_params:
+                lines.append("  Optional Parameters:")
+                for param in opt_params:
+                    param_name = param.get("name", "N/A")
+                    param_type = param.get("type", "N/A")
+                    param_desc = param.get("description", "No description")
+                    param_default = param.get("default", "None")
+                    lines.append(f"    - {param_name} ({param_type}): {param_desc} [Default: {param_default}]")
+            lines.append("")  # Empty line between methods
+        lines.append("")  # Extra empty line after each category
+    return "\n".join(lines)
+def read_module2api():
+    fields = [
+        "support_tools",
+        "pathology"
+    ]
+    module2api = {}
+    for field in fields:
+        module_name = f"histopath.tool.tool_description.{field}"
+        module = importlib.import_module(module_name)
+        module2api[f"histopath.tool.{field}"] = module.description
+    return module2api

histopath_env/environment.yml ADDED Viewed

	@@ -0,0 +1,33 @@

+name: histopath
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.11
+  - pip
+  - pip:
+      # Core dependencies
+      - requests
+      - pandas
+      - numpy
+      - pydantic>=2.0
+      # LangChain ecosystem
+      - langchain
+      - langchain_core
+      - langchain-core
+      - langchain-openai
+      - langchain-anthropic
+      - langchain-ollama
+      - langchain-huggingface
+      - langgraph
+      # OpenAI + Anthropic API SDKs
+      - openai
+      - anthropic
+      - dotenv
+      # LazySlide (histopathology toolkit)
+      - lazyslide
+      # packages for prism
+      - environ
+      - protobuf
+      - sacremoses

histopath_env/histo_env.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: histopath
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.11
+  - pip
+  - pip:
+      # Core dependencies
+      - requests
+      - pandas
+      - numpy
+      - pydantic>=2.0
+      # LangChain ecosystem
+      - langchain
+      - langchain_core
+      - langchain-core
+      - langchain-openai
+      - langchain-anthropic
+      - langchain-ollama
+      - langchain-huggingface
+      - langgraph
+      # OpenAI + Anthropic API SDKs
+      - openai
+      - anthropic
+      - dotenv
+      # LazySlide (histopathology toolkit)
+      - lazyslide
+      - histolab
+      - biolearn
+      - pyaging

histopath_env/setup.sh ADDED Viewed

	@@ -0,0 +1,108 @@

+#!/bin/bash
+# This script sets up a comprehensive histopathology environment
+# Set up colors for output
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Default tools directory is the current directory
+DEFAULT_TOOLS_DIR="$(pwd)/histopath_tools"
+TOOLS_DIR=""
+echo -e "${YELLOW}=== HistoPath Environment Setup ===${NC}"
+echo -e "${BLUE}This script will set up a comprehensive histopathology environment with various tools and packages.${NC}"
+# Check if conda is installed
+if ! command -v conda &> /dev/null && ! command -v micromamba &> /dev/null; then
+    echo -e "${RED}Error: Conda is not installed or not in PATH.${NC}"
+    echo "Please install Miniconda or Anaconda first."
+    echo "Visit: https://docs.conda.io/en/latest/miniconda.html"
+    exit 1
+fi
+# Function to handle errors
+handle_error() {
+    local exit_code=$1
+    local error_message=$2
+    local optional=${3:-false}
+    if [ $exit_code -ne 0 ]; then
+        echo -e "${RED}Error: $error_message${NC}"
+        if [ "$optional" = true ]; then
+            echo -e "${YELLOW}Continuing with setup as this component is optional.${NC}"
+            return 0
+        else
+            if [ -z "$NON_INTERACTIVE" ]; then
+                read -p "Continue with setup? (y/n) " -n 1 -r
+                echo
+                if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+                    echo -e "${RED}Setup aborted.${NC}"
+                    exit 1
+                fi
+            else
+                echo -e "${YELLOW}Non-interactive mode: continuing despite error.${NC}"
+            fi
+        fi
+    fi
+    return $exit_code
+}
+# Function to install a specific environment file
+install_env_file() {
+    local env_file=$1
+    local description=$2
+    local optional=${3:-false}
+    echo -e "\n${BLUE}=== Installing $description ===${NC}"
+    if [ "$optional" = true ]; then
+        if [ -z "$NON_INTERACTIVE" ]; then
+            read -p "Do you want to install $description? (y/n) " -n 1 -r
+            echo
+            if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+                echo -e "${YELLOW}Skipping $description installation.${NC}"
+                return 0
+            fi
+        else
+            echo -e "${YELLOW}Non-interactive mode: automatically installing $description.${NC}"
+        fi
+    fi
+    echo -e "${YELLOW}Installing $description from $env_file...${NC}"
+    conda env update -f $env_file
+    handle_error $? "Failed to install $description." $optional
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}Successfully installed $description!${NC}"
+    fi
+}
+# Main installation process
+main() {
+    # Step 1: Create base conda environment
+    echo -e "\n${YELLOW}Step 1: Creating base environment from environment.yml...${NC}"
+    conda env create -n histopath -f environment.yml
+    handle_error $? "Failed to create base conda environment."
+    # Step 2: Activate the environment
+    echo -e "\n${YELLOW}Step 2: Activating conda environment...${NC}"
+    if command -v micromamba &> /dev/null; then
+        eval "$("$MAMBA_EXE" shell hook --shell bash)"
+        micromamba activate histopath
+    else
+        eval "$(conda shell.bash hook)"
+        conda activate histopath
+    fi
+    handle_error $? "Failed to activate histopath environment."
+    # Step 3: Install core histopathology tools
+    echo -e "\n${YELLOW}Step 3: Installing core histopathology tools...${NC}"
+    install_env_file "histo_env.yml" "core bioinformatics tools"
+}
+# Run the main installation process
+main

requirements.txt ADDED Viewed

	@@ -0,0 +1,45 @@

+# Core dependencies
+requests
+pandas
+numpy
+pydantic
+gradio
+# LangChain ecosystem
+langchain
+langchain-core
+langchain-openai
+langchain-anthropic
+langchain-ollama
+langchain-huggingface
+langgraph
+# LLM API SDKs
+openai
+anthropic
+python-dotenv
+# Histopathology packages
+lazyslide
+histolab
+biolearn
+pyaging
+# ML/Vision dependencies (for LazySlide and models)
+torch
+torchvision
+timm
+transformers
+huggingface-hub
+Pillow
+scikit-image
+opencv-python-headless
+matplotlib
+# Additional PRISM dependencies
+python-environ
+protobuf
+sacremoses
+# For whole slide image support
+openslide-python