ryanDing26
commited on
Commit
·
f2a52eb
1
Parent(s):
127ace6
App release
Browse files- LICENSE +21 -0
- README.md +33 -7
- app.py +602 -0
- histopath/__init__.py +0 -0
- histopath/__pycache__/__init__.cpython-311.pyc +0 -0
- histopath/__pycache__/env_desc.cpython-311.pyc +0 -0
- histopath/__pycache__/llm.cpython-311.pyc +0 -0
- histopath/__pycache__/utils.cpython-311.pyc +0 -0
- histopath/agent/__init__.py +1 -0
- histopath/agent/__pycache__/__init__.cpython-311.pyc +0 -0
- histopath/agent/__pycache__/agent.cpython-311.pyc +0 -0
- histopath/agent/agent.py +705 -0
- histopath/config.py +91 -0
- histopath/llm.py +235 -0
- histopath/model/__init__.py +0 -0
- histopath/model/__pycache__/__init__.cpython-311.pyc +0 -0
- histopath/model/__pycache__/retriever.cpython-311.pyc +0 -0
- histopath/model/retriever.py +127 -0
- histopath/retriever_benchmark.py +101 -0
- histopath/tool/__init__.py +1 -0
- histopath/tool/__pycache__/__init__.cpython-311.pyc +0 -0
- histopath/tool/__pycache__/pathology.cpython-311.pyc +0 -0
- histopath/tool/__pycache__/support_tools.cpython-311.pyc +0 -0
- histopath/tool/__pycache__/tool_registry.cpython-311.pyc +0 -0
- histopath/tool/pathology.py +458 -0
- histopath/tool/support_tools.py +66 -0
- histopath/tool/tool_description/__pycache__/pathology.cpython-311.pyc +0 -0
- histopath/tool/tool_description/__pycache__/support_tools.cpython-311.pyc +0 -0
- histopath/tool/tool_description/pathology.py +156 -0
- histopath/tool/tool_description/support_tools.py +30 -0
- histopath/tool/tool_registry.py +84 -0
- histopath/utils.py +722 -0
- histopath_env/environment.yml +33 -0
- histopath_env/histo_env.yml +32 -0
- histopath_env/setup.sh +108 -0
- requirements.txt +45 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Ryan Ding
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,14 +1,40 @@
|
|
| 1 |
---
|
| 2 |
-
title: HistoPath
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: HistoPath Agent
|
| 3 |
+
emoji: 🔬
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
models:
|
| 12 |
+
- paige-ai/Virchow2
|
| 13 |
+
- paige-ai/Prism
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# 🔬 HistoPath Agent
|
| 17 |
+
|
| 18 |
+
AI-Powered Histopathology Analysis Assistant for whole slide image analysis, segmentation, and captioning.
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
- 📸 Whole Slide Image (WSI) Captioning
|
| 22 |
+
- 🔍 Cell and Tissue Segmentation
|
| 23 |
+
- 🏷️ Zero-Shot Classification
|
| 24 |
+
- 📊 Quantitative Analysis (TILs, Fibrosis)
|
| 25 |
+
|
| 26 |
+
## Usage
|
| 27 |
+
1. Enter the passcode to access the application
|
| 28 |
+
2. Upload your histopathology image (.svs, .png, .jpg, .tif)
|
| 29 |
+
3. Enter your analysis request
|
| 30 |
+
4. View results in the Images and Data tabs
|
| 31 |
+
|
| 32 |
+
## Environment Variables
|
| 33 |
+
Set these in your Hugging Face Spaces secrets:
|
| 34 |
+
- `GRADIO_PASSWORD`: Access passcode
|
| 35 |
+
- `HUGGINGFACE_ACCESS_TOKEN`: For accessing gated models
|
| 36 |
+
- `ANTHROPIC_API_KEY`: For Claude LLM (optional)
|
| 37 |
+
- `OPENAI_API_KEY`: For OpenAI models (optional)
|
| 38 |
+
|
| 39 |
+
## Credits
|
| 40 |
+
Built with LazySlide, LangChain, and Gradio
|
app.py
ADDED
|
@@ -0,0 +1,602 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import shutil
|
| 4 |
+
import traceback
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from histopath.agent import A1
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Load environment variables
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# Get passcode from environment
|
| 14 |
+
PASSCODE = os.getenv("GRADIO_PASSWORD")
|
| 15 |
+
|
| 16 |
+
# Initialize agent (will be created after passcode validation)
|
| 17 |
+
agent = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def check_for_output_files():
|
| 21 |
+
"""Check for all files in the output directory and return their paths."""
|
| 22 |
+
output_dir = Path("./output")
|
| 23 |
+
if not output_dir.exists():
|
| 24 |
+
return [], []
|
| 25 |
+
|
| 26 |
+
image_extensions = {".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff"}
|
| 27 |
+
data_extensions = {".csv", ".txt", ".json", ".npy"}
|
| 28 |
+
|
| 29 |
+
images = []
|
| 30 |
+
data_files = []
|
| 31 |
+
|
| 32 |
+
for file in output_dir.iterdir():
|
| 33 |
+
if file.is_file():
|
| 34 |
+
if file.suffix.lower() in image_extensions:
|
| 35 |
+
images.append(str(file))
|
| 36 |
+
elif file.suffix.lower() in data_extensions:
|
| 37 |
+
data_files.append(str(file))
|
| 38 |
+
|
| 39 |
+
return images, data_files
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def preview_uploaded_file(uploaded_file):
|
| 43 |
+
"""Preview the uploaded file - show image or file info."""
|
| 44 |
+
if uploaded_file is None:
|
| 45 |
+
return None, None, "No file uploaded"
|
| 46 |
+
|
| 47 |
+
file_path = Path(uploaded_file.name)
|
| 48 |
+
file_ext = file_path.suffix.lower()
|
| 49 |
+
|
| 50 |
+
image_extensions = {".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff", ".svs"}
|
| 51 |
+
|
| 52 |
+
if file_ext in image_extensions:
|
| 53 |
+
# Show image preview
|
| 54 |
+
return uploaded_file.name, None, f"📷 Previewing: {file_path.name}"
|
| 55 |
+
else:
|
| 56 |
+
# Show file info
|
| 57 |
+
file_size = Path(uploaded_file.name).stat().st_size / 1024 # KB
|
| 58 |
+
return None, uploaded_file.name, f"📄 File: {file_path.name} ({file_size:.1f} KB)"
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def parse_agent_output(output):
|
| 62 |
+
"""Parse agent output to extract code blocks, observations, and regular text."""
|
| 63 |
+
# Strip out the message divider bars
|
| 64 |
+
output = re.sub(r'={30,}\s*(Human|Ai)\s+Message\s*={30,}', '', output)
|
| 65 |
+
output = output.strip()
|
| 66 |
+
|
| 67 |
+
parsed = {
|
| 68 |
+
"type": "text",
|
| 69 |
+
"content": output,
|
| 70 |
+
"code": None,
|
| 71 |
+
"observation": None,
|
| 72 |
+
"thinking": None
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
# Check for code execution block
|
| 76 |
+
execute_match = re.search(r'<execute>(.*?)</execute>', output, re.DOTALL)
|
| 77 |
+
if execute_match:
|
| 78 |
+
parsed["type"] = "code"
|
| 79 |
+
parsed["code"] = execute_match.group(1).strip()
|
| 80 |
+
# Extract text before the code block (thinking/explanation)
|
| 81 |
+
text_before = output[:execute_match.start()].strip()
|
| 82 |
+
# Remove any think tags but keep the content
|
| 83 |
+
text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
|
| 84 |
+
text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
|
| 85 |
+
parsed["thinking"] = text_before if text_before else None
|
| 86 |
+
return parsed
|
| 87 |
+
|
| 88 |
+
# Check for observation block
|
| 89 |
+
observation_match = re.search(r'<observation>(.*?)</observation>', output, re.DOTALL)
|
| 90 |
+
if observation_match:
|
| 91 |
+
parsed["type"] = "observation"
|
| 92 |
+
parsed["observation"] = observation_match.group(1).strip()
|
| 93 |
+
# Extract text before observation if any
|
| 94 |
+
text_before = output[:observation_match.start()].strip()
|
| 95 |
+
text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
|
| 96 |
+
text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
|
| 97 |
+
parsed["thinking"] = text_before if text_before else None
|
| 98 |
+
return parsed
|
| 99 |
+
|
| 100 |
+
# Check for solution block
|
| 101 |
+
solution_match = re.search(r'<solution>(.*?)</solution>', output, re.DOTALL)
|
| 102 |
+
if solution_match:
|
| 103 |
+
parsed["type"] = "solution"
|
| 104 |
+
parsed["content"] = solution_match.group(1).strip()
|
| 105 |
+
# Get thinking before solution
|
| 106 |
+
text_before = output[:solution_match.start()].strip()
|
| 107 |
+
text_before = re.sub(r'<think>(.*?)</think>', r'\1', text_before, flags=re.DOTALL)
|
| 108 |
+
text_before = re.sub(r'={30,}.*?={30,}', '', text_before).strip()
|
| 109 |
+
parsed["thinking"] = text_before if text_before else None
|
| 110 |
+
return parsed
|
| 111 |
+
|
| 112 |
+
# Clean up any remaining tags for display
|
| 113 |
+
cleaned = re.sub(r'<think>(.*?)</think>', r'\1', output, flags=re.DOTALL)
|
| 114 |
+
cleaned = re.sub(r'={30,}.*?={30,}', '', cleaned).strip()
|
| 115 |
+
parsed["content"] = cleaned
|
| 116 |
+
|
| 117 |
+
return parsed
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def format_message_for_display(parsed_output):
|
| 121 |
+
"""Format parsed output into a readable message for the chatbot."""
|
| 122 |
+
msg_parts = []
|
| 123 |
+
|
| 124 |
+
# Add thinking/explanation text first if present
|
| 125 |
+
if parsed_output.get("thinking"):
|
| 126 |
+
msg_parts.append(parsed_output["thinking"])
|
| 127 |
+
|
| 128 |
+
if parsed_output["type"] == "code":
|
| 129 |
+
# Add separator if there was thinking text
|
| 130 |
+
if parsed_output.get("thinking"):
|
| 131 |
+
msg_parts.append("\n---\n")
|
| 132 |
+
|
| 133 |
+
msg_parts.append("### 💻 Executing Code\n")
|
| 134 |
+
msg_parts.append(f"```python\n{parsed_output['code']}\n```")
|
| 135 |
+
|
| 136 |
+
elif parsed_output["type"] == "observation":
|
| 137 |
+
# Add separator if there was thinking text
|
| 138 |
+
if parsed_output.get("thinking"):
|
| 139 |
+
msg_parts.append("\n---\n")
|
| 140 |
+
|
| 141 |
+
msg_parts.append("### 📊 Observation\n")
|
| 142 |
+
msg_parts.append(f"```\n{parsed_output['observation']}\n```")
|
| 143 |
+
|
| 144 |
+
elif parsed_output["type"] == "solution":
|
| 145 |
+
# Add separator if there was thinking text
|
| 146 |
+
if parsed_output.get("thinking"):
|
| 147 |
+
msg_parts.append("\n---\n")
|
| 148 |
+
|
| 149 |
+
msg_parts.append("### ✅ Solution\n")
|
| 150 |
+
msg_parts.append(parsed_output['content'])
|
| 151 |
+
|
| 152 |
+
else:
|
| 153 |
+
# For regular text, just add the content if thinking wasn't already set
|
| 154 |
+
if not parsed_output.get("thinking"):
|
| 155 |
+
msg_parts.append(parsed_output["content"])
|
| 156 |
+
|
| 157 |
+
return "\n\n".join(msg_parts)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def process_agent_response(prompt, uploaded_file, chatbot_history):
|
| 161 |
+
"""Process the agent response and update chatbot."""
|
| 162 |
+
global agent
|
| 163 |
+
|
| 164 |
+
if agent is None:
|
| 165 |
+
chatbot_history.append({
|
| 166 |
+
"role": "assistant",
|
| 167 |
+
"content": "⚠️ Please enter the passcode first to initialize the agent."
|
| 168 |
+
})
|
| 169 |
+
yield chatbot_history, None, None, None, None, "⚠️ Agent not initialized"
|
| 170 |
+
return
|
| 171 |
+
|
| 172 |
+
if not prompt.strip() and uploaded_file is None:
|
| 173 |
+
chatbot_history.append({
|
| 174 |
+
"role": "assistant",
|
| 175 |
+
"content": "⚠️ Please provide a prompt or upload a file."
|
| 176 |
+
})
|
| 177 |
+
yield chatbot_history, None, None, None, None, "⚠️ No input provided"
|
| 178 |
+
return
|
| 179 |
+
|
| 180 |
+
# Handle file upload
|
| 181 |
+
file_path = None
|
| 182 |
+
file_info = ""
|
| 183 |
+
if uploaded_file is not None:
|
| 184 |
+
try:
|
| 185 |
+
# Create data directory if it doesn't exist
|
| 186 |
+
data_dir = Path("./data")
|
| 187 |
+
data_dir.mkdir(exist_ok=True)
|
| 188 |
+
|
| 189 |
+
# Copy uploaded file to data directory
|
| 190 |
+
file_name = Path(uploaded_file.name).name
|
| 191 |
+
file_path = data_dir / file_name
|
| 192 |
+
shutil.copy(uploaded_file.name, file_path)
|
| 193 |
+
|
| 194 |
+
file_info = f"\n\n📎 **Uploaded file:** `{file_path}`\n"
|
| 195 |
+
|
| 196 |
+
# Augment prompt with file path
|
| 197 |
+
if prompt.strip():
|
| 198 |
+
prompt = f"{prompt}\n\nUploaded file path: {file_path}"
|
| 199 |
+
else:
|
| 200 |
+
prompt = f"I have uploaded a file at: {file_path}. Please analyze it."
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
error_msg = f"❌ Error handling file upload: {str(e)}"
|
| 204 |
+
chatbot_history.append({
|
| 205 |
+
"role": "assistant",
|
| 206 |
+
"content": error_msg
|
| 207 |
+
})
|
| 208 |
+
yield chatbot_history, None, None, None, None, error_msg
|
| 209 |
+
return
|
| 210 |
+
|
| 211 |
+
# Add user message to chat
|
| 212 |
+
user_message = prompt if not file_info else f"{prompt}{file_info}"
|
| 213 |
+
chatbot_history.append({"role": "user", "content": user_message})
|
| 214 |
+
yield chatbot_history, None, None, None, None, "🔄 Processing..."
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
# Stream agent responses
|
| 218 |
+
step_count = 0
|
| 219 |
+
for step in agent.go_stream(prompt):
|
| 220 |
+
step_count += 1
|
| 221 |
+
output = step.get("output", "")
|
| 222 |
+
|
| 223 |
+
if output:
|
| 224 |
+
# Parse the output
|
| 225 |
+
parsed = parse_agent_output(output)
|
| 226 |
+
|
| 227 |
+
# Add thinking text as separate message if present
|
| 228 |
+
if parsed.get("thinking"):
|
| 229 |
+
chatbot_history.append({
|
| 230 |
+
"role": "assistant",
|
| 231 |
+
"content": parsed["thinking"]
|
| 232 |
+
})
|
| 233 |
+
|
| 234 |
+
# Add the block (code/observation/solution) as separate message if present
|
| 235 |
+
if parsed["type"] == "code" and parsed["code"]:
|
| 236 |
+
chatbot_history.append({
|
| 237 |
+
"role": "assistant",
|
| 238 |
+
"content": f"### 💻 Executing Code\n\n```python\n{parsed['code']}\n```"
|
| 239 |
+
})
|
| 240 |
+
elif parsed["type"] == "observation" and parsed["observation"]:
|
| 241 |
+
chatbot_history.append({
|
| 242 |
+
"role": "assistant",
|
| 243 |
+
"content": f"### 📊 Observation\n\n```\n{parsed['observation']}\n```"
|
| 244 |
+
})
|
| 245 |
+
elif parsed["type"] == "solution":
|
| 246 |
+
chatbot_history.append({
|
| 247 |
+
"role": "assistant",
|
| 248 |
+
"content": f"### ✅ Solution\n\n{parsed['content']}"
|
| 249 |
+
})
|
| 250 |
+
elif parsed["type"] == "text" and parsed["content"]:
|
| 251 |
+
# Only add if we haven't already added it as thinking
|
| 252 |
+
if not parsed.get("thinking"):
|
| 253 |
+
chatbot_history.append({
|
| 254 |
+
"role": "assistant",
|
| 255 |
+
"content": parsed["content"]
|
| 256 |
+
})
|
| 257 |
+
|
| 258 |
+
# Check for output files after each step
|
| 259 |
+
images, data_files = check_for_output_files()
|
| 260 |
+
|
| 261 |
+
# Create status message
|
| 262 |
+
status = f"🔄 Step {step_count}"
|
| 263 |
+
if parsed["type"] == "code":
|
| 264 |
+
status += " - Executing code..."
|
| 265 |
+
elif parsed["type"] == "observation":
|
| 266 |
+
status += " - Processing results..."
|
| 267 |
+
elif parsed["type"] == "solution":
|
| 268 |
+
status += " - Finalizing solution..."
|
| 269 |
+
|
| 270 |
+
yield (
|
| 271 |
+
chatbot_history,
|
| 272 |
+
images if images else None,
|
| 273 |
+
data_files if data_files else None,
|
| 274 |
+
None,
|
| 275 |
+
None,
|
| 276 |
+
status
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
# Final check for files
|
| 280 |
+
final_images, final_data = check_for_output_files()
|
| 281 |
+
|
| 282 |
+
# Create download links message if files were generated
|
| 283 |
+
if final_images or final_data:
|
| 284 |
+
download_msg = "\n\n---\n\n### 📁 Generated Files Ready for Download\n\n"
|
| 285 |
+
|
| 286 |
+
if final_images:
|
| 287 |
+
download_msg += f"**🖼️ Images ({len(final_images)})** - Available in the **Images** tab →\n"
|
| 288 |
+
for img_path in final_images:
|
| 289 |
+
img_name = Path(img_path).name
|
| 290 |
+
download_msg += f"- `{img_name}`\n"
|
| 291 |
+
download_msg += "\n"
|
| 292 |
+
|
| 293 |
+
if final_data:
|
| 294 |
+
download_msg += f"**📄 Data Files ({len(final_data)})** - Available in the **Data** tab →\n"
|
| 295 |
+
for data_path in final_data:
|
| 296 |
+
data_name = Path(data_path).name
|
| 297 |
+
download_msg += f"- `{data_name}`\n"
|
| 298 |
+
|
| 299 |
+
download_msg += "\n*Click the download button on each file in the respective tabs above.*"
|
| 300 |
+
|
| 301 |
+
# Add download message as separate bubble
|
| 302 |
+
chatbot_history.append({
|
| 303 |
+
"role": "assistant",
|
| 304 |
+
"content": download_msg
|
| 305 |
+
})
|
| 306 |
+
|
| 307 |
+
status = "✅ Complete"
|
| 308 |
+
if final_images:
|
| 309 |
+
status += f" | {len(final_images)} image(s)"
|
| 310 |
+
if final_data:
|
| 311 |
+
status += f" | {len(final_data)} data file(s)"
|
| 312 |
+
|
| 313 |
+
yield chatbot_history, final_images if final_images else None, final_data if final_data else None, None, None, status
|
| 314 |
+
|
| 315 |
+
except Exception as e:
|
| 316 |
+
error_msg = f"❌ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```"
|
| 317 |
+
chatbot_history.append({
|
| 318 |
+
"role": "assistant",
|
| 319 |
+
"content": error_msg
|
| 320 |
+
})
|
| 321 |
+
yield chatbot_history, None, None, None, None, "❌ Error occurred"
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def validate_passcode(passcode):
|
| 325 |
+
"""Validate the passcode and initialize the agent."""
|
| 326 |
+
global agent
|
| 327 |
+
|
| 328 |
+
if passcode == PASSCODE:
|
| 329 |
+
# Initialize agent
|
| 330 |
+
try:
|
| 331 |
+
agent = A1()
|
| 332 |
+
return (
|
| 333 |
+
gr.update(visible=False), # Hide passcode section
|
| 334 |
+
gr.update(visible=True), # Show main interface
|
| 335 |
+
"✅ Access granted! Agent initialized and ready."
|
| 336 |
+
)
|
| 337 |
+
except Exception as e:
|
| 338 |
+
error_trace = traceback.format_exc()
|
| 339 |
+
return (
|
| 340 |
+
gr.update(visible=True),
|
| 341 |
+
gr.update(visible=False),
|
| 342 |
+
f"❌ Error initializing agent:\n{str(e)}\n\n{error_trace}"
|
| 343 |
+
)
|
| 344 |
+
else:
|
| 345 |
+
return (
|
| 346 |
+
gr.update(visible=True),
|
| 347 |
+
gr.update(visible=False),
|
| 348 |
+
"❌ Invalid passcode. Please try again."
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def clear_chat():
|
| 353 |
+
"""Clear the chat history and output files."""
|
| 354 |
+
# Clean up output directory
|
| 355 |
+
output_dir = Path("./output")
|
| 356 |
+
if output_dir.exists():
|
| 357 |
+
shutil.rmtree(output_dir)
|
| 358 |
+
output_dir.mkdir(exist_ok=True)
|
| 359 |
+
|
| 360 |
+
# Clean up data directory
|
| 361 |
+
data_dir = Path("./data")
|
| 362 |
+
if data_dir.exists():
|
| 363 |
+
for file in data_dir.iterdir():
|
| 364 |
+
if file.is_file():
|
| 365 |
+
file.unlink()
|
| 366 |
+
|
| 367 |
+
return [], None, None, None, None, "🗑️ Chat cleared"
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
# Create Gradio interface with custom theme
|
| 371 |
+
custom_theme = gr.themes.Soft(
|
| 372 |
+
primary_hue="blue",
|
| 373 |
+
secondary_hue="slate",
|
| 374 |
+
spacing_size="sm",
|
| 375 |
+
radius_size="md",
|
| 376 |
+
).set(
|
| 377 |
+
button_primary_background_fill="*primary_500",
|
| 378 |
+
button_primary_background_fill_hover="*primary_600",
|
| 379 |
+
block_label_text_weight="600",
|
| 380 |
+
block_title_text_weight="600",
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
with gr.Blocks(title="HistoPath Agent", theme=custom_theme, css="""
|
| 384 |
+
.gradio-container {
|
| 385 |
+
max-width: 100% !important;
|
| 386 |
+
}
|
| 387 |
+
.main-header {
|
| 388 |
+
text-align: center;
|
| 389 |
+
padding: 1.5rem 0;
|
| 390 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 391 |
+
color: white;
|
| 392 |
+
border-radius: 8px;
|
| 393 |
+
margin-bottom: 1.5rem;
|
| 394 |
+
}
|
| 395 |
+
.main-header h1 {
|
| 396 |
+
margin: 0;
|
| 397 |
+
font-size: 2.2rem;
|
| 398 |
+
font-weight: 700;
|
| 399 |
+
}
|
| 400 |
+
.main-header p {
|
| 401 |
+
margin: 0.5rem 0 0 0;
|
| 402 |
+
opacity: 0.95;
|
| 403 |
+
font-size: 1.1rem;
|
| 404 |
+
}
|
| 405 |
+
.file-upload-box .wrap {
|
| 406 |
+
min-width: 0 !important;
|
| 407 |
+
}
|
| 408 |
+
.file-upload-box .file-name {
|
| 409 |
+
word-break: break-word !important;
|
| 410 |
+
white-space: normal !important;
|
| 411 |
+
overflow-wrap: break-word !important;
|
| 412 |
+
}
|
| 413 |
+
.tab-nav {
|
| 414 |
+
margin-bottom: 0.5rem;
|
| 415 |
+
}
|
| 416 |
+
/* Better styling for code and observation blocks */
|
| 417 |
+
.message.bot pre {
|
| 418 |
+
background-color: #f6f8fa !important;
|
| 419 |
+
border: 1px solid #d0d7de !important;
|
| 420 |
+
border-radius: 6px !important;
|
| 421 |
+
padding: 12px !important;
|
| 422 |
+
margin: 8px 0 !important;
|
| 423 |
+
}
|
| 424 |
+
.message.bot h3 {
|
| 425 |
+
margin-top: 12px !important;
|
| 426 |
+
margin-bottom: 8px !important;
|
| 427 |
+
font-weight: 600 !important;
|
| 428 |
+
}
|
| 429 |
+
.message.bot hr {
|
| 430 |
+
border: none !important;
|
| 431 |
+
border-top: 2px solid #e1e4e8 !important;
|
| 432 |
+
margin: 16px 0 !important;
|
| 433 |
+
}
|
| 434 |
+
""") as demo:
|
| 435 |
+
|
| 436 |
+
# Header
|
| 437 |
+
gr.HTML("""
|
| 438 |
+
<div class="main-header">
|
| 439 |
+
<h1>🔬 HistoPath Agent</h1>
|
| 440 |
+
<p>AI-Powered Histopathology Analysis Assistant</p>
|
| 441 |
+
</div>
|
| 442 |
+
""")
|
| 443 |
+
|
| 444 |
+
# Passcode section
|
| 445 |
+
with gr.Group(visible=True) as passcode_section:
|
| 446 |
+
gr.Markdown("### 🔐 Authentication Required")
|
| 447 |
+
|
| 448 |
+
with gr.Row():
|
| 449 |
+
passcode_input = gr.Textbox(
|
| 450 |
+
label="Passcode",
|
| 451 |
+
type="password",
|
| 452 |
+
placeholder="Enter your passcode...",
|
| 453 |
+
scale=3
|
| 454 |
+
)
|
| 455 |
+
passcode_btn = gr.Button("🔓 Unlock", variant="primary", scale=1, size="lg")
|
| 456 |
+
|
| 457 |
+
passcode_status = gr.Textbox(
|
| 458 |
+
label="Status",
|
| 459 |
+
interactive=False,
|
| 460 |
+
lines=2
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# Main interface (hidden initially)
|
| 464 |
+
with gr.Group(visible=False) as main_interface:
|
| 465 |
+
with gr.Row(equal_height=True):
|
| 466 |
+
# Left column - Chat interface
|
| 467 |
+
with gr.Column(scale=3):
|
| 468 |
+
chatbot = gr.Chatbot(
|
| 469 |
+
label="💬 Conversation",
|
| 470 |
+
height=550,
|
| 471 |
+
type="messages",
|
| 472 |
+
show_label=True,
|
| 473 |
+
avatar_images=(None, "🤖"),
|
| 474 |
+
render_markdown=True,
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
# Input area
|
| 478 |
+
with gr.Row():
|
| 479 |
+
with gr.Column(scale=7):
|
| 480 |
+
prompt_input = gr.Textbox(
|
| 481 |
+
label="Your Query",
|
| 482 |
+
placeholder="E.g., 'Caption the uploaded whole slide image' or 'Segment cells using instanseg model'",
|
| 483 |
+
lines=2,
|
| 484 |
+
max_lines=5,
|
| 485 |
+
show_label=False,
|
| 486 |
+
)
|
| 487 |
+
with gr.Column(scale=3):
|
| 488 |
+
file_upload = gr.File(
|
| 489 |
+
label="📎 Upload File",
|
| 490 |
+
file_types=[".svs", ".png", ".jpg", ".jpeg", ".tif", ".tiff", ".csv", ".txt", ".json", ".npy"],
|
| 491 |
+
height=75,
|
| 492 |
+
elem_classes="file-upload-box",
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
with gr.Row():
|
| 496 |
+
submit_btn = gr.Button("🚀 Submit", variant="primary", scale=3, size="lg")
|
| 497 |
+
clear_btn = gr.Button("🗑️ Clear", scale=1, size="lg", variant="secondary")
|
| 498 |
+
|
| 499 |
+
status_text = gr.Textbox(
|
| 500 |
+
label="Status",
|
| 501 |
+
interactive=False,
|
| 502 |
+
value="Ready",
|
| 503 |
+
show_label=False,
|
| 504 |
+
container=False,
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
+
# Right column - Outputs
|
| 508 |
+
with gr.Column(scale=2):
|
| 509 |
+
with gr.Tabs():
|
| 510 |
+
with gr.Tab("📥 Input"):
|
| 511 |
+
with gr.Column():
|
| 512 |
+
input_image_preview = gr.Image(
|
| 513 |
+
label="Input Image",
|
| 514 |
+
height=400,
|
| 515 |
+
show_label=False,
|
| 516 |
+
container=True,
|
| 517 |
+
)
|
| 518 |
+
input_file_preview = gr.File(
|
| 519 |
+
label="Input File",
|
| 520 |
+
interactive=False,
|
| 521 |
+
height=100,
|
| 522 |
+
show_label=False,
|
| 523 |
+
container=True,
|
| 524 |
+
)
|
| 525 |
+
input_status = gr.Textbox(
|
| 526 |
+
value="Upload a file to preview",
|
| 527 |
+
show_label=False,
|
| 528 |
+
interactive=False,
|
| 529 |
+
container=False,
|
| 530 |
+
)
|
| 531 |
+
|
| 532 |
+
with gr.Tab("🖼️ Images"):
|
| 533 |
+
output_gallery = gr.Gallery(
|
| 534 |
+
label="Generated Visualizations",
|
| 535 |
+
columns=1,
|
| 536 |
+
height=600,
|
| 537 |
+
object_fit="contain",
|
| 538 |
+
show_label=False,
|
| 539 |
+
show_download_button=True,
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
with gr.Tab("📄 Data"):
|
| 543 |
+
data_files = gr.File(
|
| 544 |
+
label="Generated Data Files",
|
| 545 |
+
file_count="multiple",
|
| 546 |
+
interactive=False,
|
| 547 |
+
height=600,
|
| 548 |
+
show_label=False,
|
| 549 |
+
)
|
| 550 |
+
|
| 551 |
+
# Event handlers
|
| 552 |
+
passcode_btn.click(
|
| 553 |
+
fn=validate_passcode,
|
| 554 |
+
inputs=[passcode_input],
|
| 555 |
+
outputs=[passcode_section, main_interface, passcode_status]
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
# File upload preview
|
| 559 |
+
file_upload.change(
|
| 560 |
+
fn=preview_uploaded_file,
|
| 561 |
+
inputs=[file_upload],
|
| 562 |
+
outputs=[input_image_preview, input_file_preview, input_status]
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
submit_btn.click(
|
| 566 |
+
fn=process_agent_response,
|
| 567 |
+
inputs=[prompt_input, file_upload, chatbot],
|
| 568 |
+
outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
clear_btn.click(
|
| 572 |
+
fn=clear_chat,
|
| 573 |
+
outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
# Allow enter key to submit
|
| 577 |
+
prompt_input.submit(
|
| 578 |
+
fn=process_agent_response,
|
| 579 |
+
inputs=[prompt_input, file_upload, chatbot],
|
| 580 |
+
outputs=[chatbot, output_gallery, data_files, input_image_preview, input_file_preview, status_text]
|
| 581 |
+
)
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
if __name__ == "__main__":
|
| 585 |
+
# Create necessary directories
|
| 586 |
+
Path("./data").mkdir(exist_ok=True)
|
| 587 |
+
Path("./output").mkdir(exist_ok=True)
|
| 588 |
+
|
| 589 |
+
print("=" * 60)
|
| 590 |
+
print("🔬 HistoPath Agent - Gradio Interface")
|
| 591 |
+
print("=" * 60)
|
| 592 |
+
print(f"Passcode: {PASSCODE}")
|
| 593 |
+
print("Starting server...")
|
| 594 |
+
print("=" * 60)
|
| 595 |
+
|
| 596 |
+
# Launch the app
|
| 597 |
+
demo.launch(
|
| 598 |
+
server_name="0.0.0.0",
|
| 599 |
+
server_port=None, # Let Gradio auto-pick an available port
|
| 600 |
+
share=False,
|
| 601 |
+
show_error=True,
|
| 602 |
+
)
|
histopath/__init__.py
ADDED
|
File without changes
|
histopath/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
histopath/__pycache__/env_desc.cpython-311.pyc
ADDED
|
Binary file (858 Bytes). View file
|
|
|
histopath/__pycache__/llm.cpython-311.pyc
ADDED
|
Binary file (8.72 kB). View file
|
|
|
histopath/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (34.5 kB). View file
|
|
|
histopath/agent/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from histopath.agent.agent import A1
|
histopath/agent/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (246 Bytes). View file
|
|
|
histopath/agent/__pycache__/agent.cpython-311.pyc
ADDED
|
Binary file (30.3 kB). View file
|
|
|
histopath/agent/agent.py
ADDED
|
@@ -0,0 +1,705 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import glob
|
| 4 |
+
import inspect
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from collections.abc import Generator
|
| 9 |
+
from typing import Any, Literal, TypedDict
|
| 10 |
+
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
| 11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 12 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 13 |
+
from langgraph.graph import END, START, StateGraph
|
| 14 |
+
from histopath.env_desc import library_content_dict
|
| 15 |
+
from histopath.llm import SourceType, get_llm
|
| 16 |
+
from histopath.model.retriever import ToolRetriever
|
| 17 |
+
from histopath.tool.support_tools import run_python_repl
|
| 18 |
+
from histopath.tool.tool_registry import ToolRegistry
|
| 19 |
+
from histopath.utils import (
|
| 20 |
+
pretty_print,
|
| 21 |
+
read_module2api,
|
| 22 |
+
run_bash_script,
|
| 23 |
+
run_with_timeout,
|
| 24 |
+
textify_api_dict,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
if os.path.exists(".env"):
|
| 28 |
+
load_dotenv(".env", override=False)
|
| 29 |
+
print("Loaded environment variables from .env")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class AgentState(TypedDict):
|
| 33 |
+
messages: list[BaseMessage]
|
| 34 |
+
next_step: str | None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class A1:
|
| 38 |
+
def __init__(
|
| 39 |
+
self,
|
| 40 |
+
path="./data",
|
| 41 |
+
llm="claude-sonnet-4-20250514",
|
| 42 |
+
source: SourceType | None = None,
|
| 43 |
+
use_tool_retriever=True,
|
| 44 |
+
timeout_seconds=600,
|
| 45 |
+
base_url: str | None = None,
|
| 46 |
+
api_key: str = "EMPTY",
|
| 47 |
+
):
|
| 48 |
+
"""Initialize the HistoPath agent.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
path: Path to the data
|
| 52 |
+
llm: LLM to use for the agent
|
| 53 |
+
source (str): Source provider: "OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", "HuggingFace", or "Custom"
|
| 54 |
+
use_tool_retriever: If True, use a tool retriever
|
| 55 |
+
timeout_seconds: Timeout for code execution in seconds
|
| 56 |
+
base_url: Base URL for custom model serving (e.g., "http://localhost:8000/v1")
|
| 57 |
+
api_key: API key for the custom LLM
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
self.path = path
|
| 61 |
+
|
| 62 |
+
if not os.path.exists(path):
|
| 63 |
+
os.makedirs(path)
|
| 64 |
+
print(f"Created directory: {path}")
|
| 65 |
+
|
| 66 |
+
self.path = os.path.join(path, "histopath_data")
|
| 67 |
+
module2api = read_module2api()
|
| 68 |
+
|
| 69 |
+
self.llm = get_llm(
|
| 70 |
+
llm, stop_sequences=["</execute>", "</solution>"], source=source, base_url=base_url, api_key=api_key
|
| 71 |
+
)
|
| 72 |
+
self.module2api = module2api
|
| 73 |
+
self.use_tool_retriever = use_tool_retriever
|
| 74 |
+
|
| 75 |
+
if self.use_tool_retriever:
|
| 76 |
+
self.tool_registry = ToolRegistry(module2api)
|
| 77 |
+
self.retriever = ToolRetriever()
|
| 78 |
+
|
| 79 |
+
# Add timeout parameter
|
| 80 |
+
self.timeout_seconds = timeout_seconds # 10 minutes default timeout
|
| 81 |
+
self.configure()
|
| 82 |
+
|
| 83 |
+
###########################
|
| 84 |
+
# Agent Prompting Section #
|
| 85 |
+
###########################
|
| 86 |
+
|
| 87 |
+
def _generate_system_prompt(
|
| 88 |
+
self,
|
| 89 |
+
tool_desc,
|
| 90 |
+
library_content_list,
|
| 91 |
+
self_critic=False,
|
| 92 |
+
is_retrieval=False,
|
| 93 |
+
):
|
| 94 |
+
"""Generate the system prompt based on the provided resources.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
tool_desc: Dictionary of tool descriptions
|
| 98 |
+
library_content_list: List of libraries
|
| 99 |
+
self_critic: Whether to include self-critic instructions
|
| 100 |
+
is_retrieval: Whether this is for retrieval (True) or initial configuration (False)
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
The generated system prompt
|
| 104 |
+
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
def format_item_with_description(name, description):
|
| 108 |
+
"""Format an item with its description in a readable way."""
|
| 109 |
+
# Handle None or empty descriptions
|
| 110 |
+
if not description:
|
| 111 |
+
description = f"Library or Tooling Item: {name}"
|
| 112 |
+
|
| 113 |
+
# Check if the item is already formatted (contains a colon)
|
| 114 |
+
if isinstance(name, str) and ": " in name:
|
| 115 |
+
return name
|
| 116 |
+
|
| 117 |
+
# Wrap long descriptions to make them more readable
|
| 118 |
+
max_line_length = 80
|
| 119 |
+
if len(description) > max_line_length:
|
| 120 |
+
# Simple wrapping for long descriptions
|
| 121 |
+
wrapped_desc = []
|
| 122 |
+
words = description.split()
|
| 123 |
+
current_line = ""
|
| 124 |
+
|
| 125 |
+
for word in words:
|
| 126 |
+
if len(current_line) + len(word) + 1 <= max_line_length:
|
| 127 |
+
if current_line:
|
| 128 |
+
current_line += " " + word
|
| 129 |
+
else:
|
| 130 |
+
current_line = word
|
| 131 |
+
else:
|
| 132 |
+
wrapped_desc.append(current_line)
|
| 133 |
+
current_line = word
|
| 134 |
+
|
| 135 |
+
if current_line:
|
| 136 |
+
wrapped_desc.append(current_line)
|
| 137 |
+
|
| 138 |
+
# Join with newlines and proper indentation
|
| 139 |
+
formatted_desc = f"{name}:\n " + "\n ".join(wrapped_desc)
|
| 140 |
+
return formatted_desc
|
| 141 |
+
else:
|
| 142 |
+
return f"{name}: {description}"
|
| 143 |
+
|
| 144 |
+
library_content_list = []
|
| 145 |
+
|
| 146 |
+
for lib in library_content_list:
|
| 147 |
+
if isinstance(lib, dict):
|
| 148 |
+
name = lib.get("name", "")
|
| 149 |
+
library_content_list.append(lib)
|
| 150 |
+
else:
|
| 151 |
+
library_content_list.append(lib)
|
| 152 |
+
|
| 153 |
+
# Format the default library content
|
| 154 |
+
if isinstance(library_content_list, list) and all(
|
| 155 |
+
isinstance(item, str) for item in library_content_list
|
| 156 |
+
):
|
| 157 |
+
if (
|
| 158 |
+
len(library_content_list) > 0
|
| 159 |
+
and isinstance(library_content_list[0], str)
|
| 160 |
+
and "," not in library_content_list[0]
|
| 161 |
+
):
|
| 162 |
+
# Simple list of strings
|
| 163 |
+
libraries_formatted = []
|
| 164 |
+
for lib in library_content_list:
|
| 165 |
+
description = self.library_content_dict.get(lib, f"Software library: {lib}")
|
| 166 |
+
libraries_formatted.append(format_item_with_description(lib, description))
|
| 167 |
+
else:
|
| 168 |
+
# Already formatted string
|
| 169 |
+
libraries_formatted = library_content_list
|
| 170 |
+
else:
|
| 171 |
+
# List with descriptions
|
| 172 |
+
libraries_formatted = []
|
| 173 |
+
for lib in library_content_list:
|
| 174 |
+
if isinstance(lib, dict):
|
| 175 |
+
name = lib.get("name", "")
|
| 176 |
+
description = self.library_content_dict.get(name, f"Software library: {name}")
|
| 177 |
+
libraries_formatted.append(format_item_with_description(name, description))
|
| 178 |
+
else:
|
| 179 |
+
description = self.library_content_dict.get(lib, f"Software library: {lib}")
|
| 180 |
+
libraries_formatted.append(format_item_with_description(lib, description))
|
| 181 |
+
|
| 182 |
+
# Base prompt
|
| 183 |
+
prompt_modifier = """
|
| 184 |
+
You are a helpful histopathology researcher assigned with the task of problem-solving.
|
| 185 |
+
To achieve this, you will be using an interactive coding environment equipped with a variety of tool functions and softwares to assist you throughout the process.
|
| 186 |
+
|
| 187 |
+
Given a task, make a plan first. The plan should be a numbered list of steps that you will take to solve the task. Be specific and detailed.
|
| 188 |
+
Format your plan as a checklist with empty checkboxes like this:
|
| 189 |
+
1. [ ] First step
|
| 190 |
+
2. [ ] Second step
|
| 191 |
+
3. [ ] Third step
|
| 192 |
+
|
| 193 |
+
Follow the plan step by step. After completing each step, update the checklist by replacing the empty checkbox with a checkmark:
|
| 194 |
+
1. [✓] First step (completed)
|
| 195 |
+
2. [ ] Second step
|
| 196 |
+
3. [ ] Third step
|
| 197 |
+
|
| 198 |
+
If a step fails or needs modification, mark it with an X and explain why:
|
| 199 |
+
1. [✓] First step (completed)
|
| 200 |
+
2. [✗] Second step (failed because...)
|
| 201 |
+
3. [ ] Modified second step
|
| 202 |
+
4. [ ] Third step
|
| 203 |
+
|
| 204 |
+
Always show the updated plan after each step so the user can track progress.
|
| 205 |
+
|
| 206 |
+
At each turn, you should first provide your thinking and reasoning given the conversation history.
|
| 207 |
+
After that, you have two options:
|
| 208 |
+
|
| 209 |
+
1) Interact with a programming environment and receive the corresponding output within <observe></observe>. Your code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. IMPORTANT: You must end the code block with </execute> tag.
|
| 210 |
+
- For Python code (default): <execute> print("Hello World!") </execute>
|
| 211 |
+
- For Bash scripts and commands: <execute> #!BASH\necho "Hello from Bash"\nls -la </execute>
|
| 212 |
+
- For CLI softwares, use Bash scripts.
|
| 213 |
+
|
| 214 |
+
2) When you think it is ready, directly provide a solution that adheres to the required format for the given task to the user. Your solution should be enclosed using "<solution>" tag, for example: The answer is <solution> A </solution>. IMPORTANT: You must end the solution block with </solution> tag.
|
| 215 |
+
|
| 216 |
+
You have many chances to interact with the environment to receive the observation. So you can decompose your code into multiple steps.
|
| 217 |
+
Don't overcomplicate the code. Keep it simple and easy to understand.
|
| 218 |
+
When writing the code, please print out the steps and results in a clear and concise manner, like a research log.
|
| 219 |
+
When calling the existing python functions in the function dictionary, YOU MUST SAVE THE OUTPUT and PRINT OUT the result.
|
| 220 |
+
For example, result = understand_scRNA(XXX) print(result)
|
| 221 |
+
Otherwise the system will not be able to know what has been done.
|
| 222 |
+
|
| 223 |
+
For Bash scripts and commands, use the #!BASH marker at the beginning of your code block. This allows for both simple commands and multi-line scripts with variables, loops, conditionals, loops, and other Bash features.
|
| 224 |
+
|
| 225 |
+
In each response, you must include EITHER <execute> or <solution> tag. Not both at the same time. Do not respond with messages without any tags. No empty messages.
|
| 226 |
+
|
| 227 |
+
If you feel that a task is not at all histopathology-related or related at all to any surrounding concepts within pathology, you should not execute your plan at all.
|
| 228 |
+
|
| 229 |
+
If you have no knowledge of a provided library that you feel is highly useful to a given task (such as the imperatively useful LazySlide package), please do a thorough exploration of the library's capabilities prior to experimentation
|
| 230 |
+
"""
|
| 231 |
+
|
| 232 |
+
# Add self-critic instructions if needed
|
| 233 |
+
if self_critic:
|
| 234 |
+
prompt_modifier += """
|
| 235 |
+
You may or may not receive feedbacks from human. If so, address the feedbacks by following the same procedure of multiple rounds of thinking, execution, and then coming up with a new solution.
|
| 236 |
+
"""
|
| 237 |
+
|
| 238 |
+
# Add environment resources
|
| 239 |
+
prompt_modifier += """
|
| 240 |
+
|
| 241 |
+
Environment Resources:
|
| 242 |
+
|
| 243 |
+
- Function Dictionary:
|
| 244 |
+
{function_intro}
|
| 245 |
+
---
|
| 246 |
+
{tool_desc}
|
| 247 |
+
---
|
| 248 |
+
|
| 249 |
+
{import_instruction}
|
| 250 |
+
|
| 251 |
+
- Software Library:
|
| 252 |
+
{library_intro}
|
| 253 |
+
Each library is listed with its description to help you understand its functionality.
|
| 254 |
+
----
|
| 255 |
+
{library_content_formatted}
|
| 256 |
+
----
|
| 257 |
+
|
| 258 |
+
- Note on using Bash scripts:
|
| 259 |
+
- Bash scripts and commands: Use the #!BASH marker in your execute block for both simple commands and complex shell scripts with variables, loops, conditionals, etc.
|
| 260 |
+
"""
|
| 261 |
+
|
| 262 |
+
# Set appropriate text based on whether this is initial configuration or after retrieval
|
| 263 |
+
if is_retrieval:
|
| 264 |
+
function_intro = "Based on your query, I've identified the following most relevant functions that you can use in your code:"
|
| 265 |
+
library_intro = (
|
| 266 |
+
"Based on your query, I've identified the following most relevant libraries that you can use:"
|
| 267 |
+
)
|
| 268 |
+
import_instruction = "IMPORTANT: When using any function, you MUST first import it from its module. For example:\nfrom [module_name] import [function_name]"
|
| 269 |
+
else:
|
| 270 |
+
function_intro = "In your code, you will need to import the function location using the following dictionary of functions:"
|
| 271 |
+
library_intro = "The environment supports a list of libraries that can be directly used. Do not forget the import statement:"
|
| 272 |
+
import_instruction = ""
|
| 273 |
+
|
| 274 |
+
# Format the content consistently for both initial and retrieval cases
|
| 275 |
+
library_content_formatted = "\n".join(libraries_formatted)
|
| 276 |
+
|
| 277 |
+
# Format the prompt with the appropriate values
|
| 278 |
+
format_dict = {
|
| 279 |
+
"function_intro": function_intro,
|
| 280 |
+
"tool_desc": textify_api_dict(tool_desc) if isinstance(tool_desc, dict) else tool_desc,
|
| 281 |
+
"import_instruction": import_instruction,
|
| 282 |
+
"library_intro": library_intro,
|
| 283 |
+
"library_content_formatted": library_content_formatted,
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
formatted_prompt = prompt_modifier.format(**format_dict)
|
| 287 |
+
|
| 288 |
+
return formatted_prompt
|
| 289 |
+
|
| 290 |
+
def configure(self, self_critic=False, test_time_scale_round=0):
|
| 291 |
+
"""Configure the agent with the initial system prompt and workflow.
|
| 292 |
+
|
| 293 |
+
Args:
|
| 294 |
+
self_critic: Whether to enable self-critic mode
|
| 295 |
+
test_time_scale_round: Number of rounds for test time scaling
|
| 296 |
+
|
| 297 |
+
"""
|
| 298 |
+
# Store self_critic for later use
|
| 299 |
+
self.self_critic = self_critic
|
| 300 |
+
|
| 301 |
+
# Store library_content_dict directly without library_content
|
| 302 |
+
self.library_content_dict = library_content_dict
|
| 303 |
+
|
| 304 |
+
# Prepare tool descriptions
|
| 305 |
+
tool_desc = {i: [x for x in j if x["name"] != "run_python_repl"] for i, j in self.module2api.items()}
|
| 306 |
+
|
| 307 |
+
# Prepare library content list
|
| 308 |
+
library_content_list = list(self.library_content_dict.keys())
|
| 309 |
+
|
| 310 |
+
self.system_prompt = self._generate_system_prompt(
|
| 311 |
+
tool_desc=tool_desc,
|
| 312 |
+
library_content_list=library_content_list,
|
| 313 |
+
self_critic=self_critic,
|
| 314 |
+
is_retrieval=False
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
# Define the nodes
|
| 318 |
+
def generate(state: AgentState) -> AgentState:
|
| 319 |
+
messages = [SystemMessage(content=self.system_prompt)] + state["messages"]
|
| 320 |
+
response = self.llm.invoke(messages)
|
| 321 |
+
|
| 322 |
+
# Parse the response
|
| 323 |
+
msg = str(response.content)
|
| 324 |
+
|
| 325 |
+
# Check for incomplete tags and fix them
|
| 326 |
+
if "<execute>" in msg and "</execute>" not in msg:
|
| 327 |
+
msg += "</execute>"
|
| 328 |
+
if "<solution>" in msg and "</solution>" not in msg:
|
| 329 |
+
msg += "</solution>"
|
| 330 |
+
if "<think>" in msg and "</think>" not in msg:
|
| 331 |
+
msg += "</think>"
|
| 332 |
+
|
| 333 |
+
think_match = re.search(r"<think>(.*?)</think>", msg, re.DOTALL)
|
| 334 |
+
execute_match = re.search(r"<execute>(.*?)</execute>", msg, re.DOTALL)
|
| 335 |
+
answer_match = re.search(r"<solution>(.*?)</solution>", msg, re.DOTALL)
|
| 336 |
+
|
| 337 |
+
# Add the message to the state before checking for errors
|
| 338 |
+
state["messages"].append(AIMessage(content=msg.strip()))
|
| 339 |
+
|
| 340 |
+
if answer_match:
|
| 341 |
+
state["next_step"] = "end"
|
| 342 |
+
elif execute_match:
|
| 343 |
+
state["next_step"] = "execute"
|
| 344 |
+
elif think_match:
|
| 345 |
+
state["next_step"] = "generate"
|
| 346 |
+
else:
|
| 347 |
+
print("parsing error...")
|
| 348 |
+
# Check if we already added an error message to avoid infinite loops
|
| 349 |
+
error_count = sum(
|
| 350 |
+
1 for m in state["messages"] if isinstance(m, AIMessage) and "There are no tags" in m.content
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
if error_count >= 2:
|
| 354 |
+
# If we've already tried to correct the model twice, just end the conversation
|
| 355 |
+
print("Detected repeated parsing errors, ending conversation")
|
| 356 |
+
state["next_step"] = "end"
|
| 357 |
+
# Add a final message explaining the termination
|
| 358 |
+
state["messages"].append(
|
| 359 |
+
AIMessage(
|
| 360 |
+
content="Execution terminated due to repeated parsing errors. Please check your input and try again."
|
| 361 |
+
)
|
| 362 |
+
)
|
| 363 |
+
else:
|
| 364 |
+
# Try to correct it
|
| 365 |
+
state["messages"].append(
|
| 366 |
+
HumanMessage(
|
| 367 |
+
content="Each response must include thinking process followed by either <execute> or <solution> tag. But there are no tags in the current response. Please follow the instruction, fix and regenerate the response again."
|
| 368 |
+
)
|
| 369 |
+
)
|
| 370 |
+
state["next_step"] = "generate"
|
| 371 |
+
return state
|
| 372 |
+
|
| 373 |
+
def execute(state: AgentState) -> AgentState:
|
| 374 |
+
last_message = state["messages"][-1].content
|
| 375 |
+
# Only add the closing tag if it's not already there
|
| 376 |
+
if "<execute>" in last_message and "</execute>" not in last_message:
|
| 377 |
+
last_message += "</execute>"
|
| 378 |
+
|
| 379 |
+
execute_match = re.search(r"<execute>(.*?)</execute>", last_message, re.DOTALL)
|
| 380 |
+
if execute_match:
|
| 381 |
+
code = execute_match.group(1)
|
| 382 |
+
|
| 383 |
+
# Set timeout duration (10 minutes = 600 seconds)
|
| 384 |
+
timeout = self.timeout_seconds
|
| 385 |
+
|
| 386 |
+
# Check if the code is a Bash script or CLI command
|
| 387 |
+
if (
|
| 388 |
+
code.strip().startswith("#!BASH")
|
| 389 |
+
or code.strip().startswith("# Bash script")
|
| 390 |
+
or code.strip().startswith("#!CLI")
|
| 391 |
+
):
|
| 392 |
+
# Handle both Bash scripts and CLI commands with the same function
|
| 393 |
+
if code.strip().startswith("#!CLI"):
|
| 394 |
+
# For CLI commands, extract the command and run it as a simple bash script
|
| 395 |
+
cli_command = re.sub(r"^#!CLI", "", code, 1).strip() # noqa: B034
|
| 396 |
+
# Remove any newlines to ensure it's a single command
|
| 397 |
+
cli_command = cli_command.replace("\n", " ")
|
| 398 |
+
result = run_with_timeout(run_bash_script, [cli_command], timeout=timeout)
|
| 399 |
+
else:
|
| 400 |
+
# For Bash scripts, remove the marker and run as a bash script
|
| 401 |
+
bash_script = re.sub(r"^#!BASH|^# Bash script", "", code, 1).strip() # noqa: B034
|
| 402 |
+
result = run_with_timeout(run_bash_script, [bash_script], timeout=timeout)
|
| 403 |
+
# Otherwise, run as Python code
|
| 404 |
+
else:
|
| 405 |
+
result = run_with_timeout(run_python_repl, [code], timeout=timeout)
|
| 406 |
+
|
| 407 |
+
if len(result) > 10000:
|
| 408 |
+
result = (
|
| 409 |
+
"The output is too long to be added to context. Here are the first 10K characters...\n"
|
| 410 |
+
+ result[:10000]
|
| 411 |
+
)
|
| 412 |
+
observation = f"\n<observation>{result}</observation>"
|
| 413 |
+
state["messages"].append(AIMessage(content=observation.strip()))
|
| 414 |
+
|
| 415 |
+
return state
|
| 416 |
+
|
| 417 |
+
def routing_function(
|
| 418 |
+
state: AgentState,
|
| 419 |
+
) -> Literal["execute", "generate", "end"]:
|
| 420 |
+
next_step = state.get("next_step")
|
| 421 |
+
if next_step == "execute":
|
| 422 |
+
return "execute"
|
| 423 |
+
elif next_step == "generate":
|
| 424 |
+
return "generate"
|
| 425 |
+
elif next_step == "end":
|
| 426 |
+
return "end"
|
| 427 |
+
else:
|
| 428 |
+
raise ValueError(f"Unexpected next_step: {next_step}")
|
| 429 |
+
|
| 430 |
+
def routing_function_self_critic(
|
| 431 |
+
state: AgentState,
|
| 432 |
+
) -> Literal["generate", "end"]:
|
| 433 |
+
next_step = state.get("next_step")
|
| 434 |
+
if next_step == "generate":
|
| 435 |
+
return "generate"
|
| 436 |
+
elif next_step == "end":
|
| 437 |
+
return "end"
|
| 438 |
+
else:
|
| 439 |
+
raise ValueError(f"Unexpected next_step: {next_step}")
|
| 440 |
+
|
| 441 |
+
def execute_self_critic(state: AgentState) -> AgentState:
|
| 442 |
+
if self.critic_count < test_time_scale_round:
|
| 443 |
+
# Generate feedback based on message history
|
| 444 |
+
messages = state["messages"]
|
| 445 |
+
feedback_prompt = f"""
|
| 446 |
+
Here is a reminder of what is the user requested: {self.user_task}
|
| 447 |
+
Examine the previous executions, reaosning, and solutions.
|
| 448 |
+
Critic harshly on what could be improved?
|
| 449 |
+
Be specific and constructive.
|
| 450 |
+
Think hard what are missing to solve the task.
|
| 451 |
+
No question asked, just feedbacks.
|
| 452 |
+
"""
|
| 453 |
+
feedback = self.llm.invoke(messages + [HumanMessage(content=feedback_prompt)])
|
| 454 |
+
|
| 455 |
+
# Add feedback as a new message
|
| 456 |
+
state["messages"].append(
|
| 457 |
+
HumanMessage(
|
| 458 |
+
content=f"Wait... this is not enough to solve the task. Here are some feedbacks for improvement:\n{feedback.content}"
|
| 459 |
+
)
|
| 460 |
+
)
|
| 461 |
+
self.critic_count += 1
|
| 462 |
+
state["next_step"] = "generate"
|
| 463 |
+
else:
|
| 464 |
+
state["next_step"] = "end"
|
| 465 |
+
|
| 466 |
+
return state
|
| 467 |
+
|
| 468 |
+
# Create the workflow
|
| 469 |
+
workflow = StateGraph(AgentState)
|
| 470 |
+
|
| 471 |
+
# Add nodes
|
| 472 |
+
workflow.add_node("generate", generate)
|
| 473 |
+
workflow.add_node("execute", execute)
|
| 474 |
+
|
| 475 |
+
if self_critic:
|
| 476 |
+
workflow.add_node("self_critic", execute_self_critic)
|
| 477 |
+
# Add conditional edges
|
| 478 |
+
workflow.add_conditional_edges(
|
| 479 |
+
"generate",
|
| 480 |
+
routing_function,
|
| 481 |
+
path_map={
|
| 482 |
+
"execute": "execute",
|
| 483 |
+
"generate": "generate",
|
| 484 |
+
"end": "self_critic",
|
| 485 |
+
},
|
| 486 |
+
)
|
| 487 |
+
workflow.add_conditional_edges(
|
| 488 |
+
"self_critic",
|
| 489 |
+
routing_function_self_critic,
|
| 490 |
+
path_map={"generate": "generate", "end": END},
|
| 491 |
+
)
|
| 492 |
+
else:
|
| 493 |
+
# Add conditional edges
|
| 494 |
+
workflow.add_conditional_edges(
|
| 495 |
+
"generate",
|
| 496 |
+
routing_function,
|
| 497 |
+
path_map={"execute": "execute", "generate": "generate", "end": END},
|
| 498 |
+
)
|
| 499 |
+
workflow.add_edge("execute", "generate")
|
| 500 |
+
workflow.add_edge(START, "generate")
|
| 501 |
+
|
| 502 |
+
# Compile the workflow
|
| 503 |
+
self.app = workflow.compile()
|
| 504 |
+
self.checkpointer = MemorySaver()
|
| 505 |
+
self.app.checkpointer = self.checkpointer
|
| 506 |
+
# display(Image(self.app.get_graph().draw_mermaid_png()))
|
| 507 |
+
|
| 508 |
+
def _prepare_resources_for_retrieval(self, prompt):
|
| 509 |
+
"""Prepare resources for retrieval and return selected resource names.
|
| 510 |
+
|
| 511 |
+
Args:
|
| 512 |
+
prompt: The user's query
|
| 513 |
+
|
| 514 |
+
Returns:
|
| 515 |
+
dict: Dictionary containing selected resource names for tools, data_lake, and libraries
|
| 516 |
+
"""
|
| 517 |
+
if not self.use_tool_retriever:
|
| 518 |
+
return None
|
| 519 |
+
|
| 520 |
+
# Gather all available resources
|
| 521 |
+
# 1. Tools from the registry
|
| 522 |
+
all_tools = self.tool_registry.tools if hasattr(self, "tool_registry") else []
|
| 523 |
+
|
| 524 |
+
# 2. Libraries with descriptions - use library_content_dict directly
|
| 525 |
+
library_descriptions = []
|
| 526 |
+
for lib_name, lib_desc in self.library_content_dict.items():
|
| 527 |
+
library_descriptions.append({"name": lib_name, "description": lib_desc})
|
| 528 |
+
|
| 529 |
+
# Use retrieval to get relevant resources
|
| 530 |
+
resources = {
|
| 531 |
+
"tools": all_tools,
|
| 532 |
+
"libraries": library_descriptions,
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
# Use prompt-based retrieval with the agent's LLM
|
| 536 |
+
selected_resources = self.retriever.prompt_based_retrieval(prompt, resources, llm=self.llm)
|
| 537 |
+
print("Using prompt-based retrieval with the agent's LLM")
|
| 538 |
+
|
| 539 |
+
# Extract the names from the selected resources for the system prompt
|
| 540 |
+
selected_resources_names = {
|
| 541 |
+
"tools": selected_resources["tools"],
|
| 542 |
+
"libraries": [lib["name"] if isinstance(lib, dict) else lib for lib in selected_resources["libraries"]],
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
return selected_resources_names
|
| 546 |
+
|
| 547 |
+
def go(self, prompt):
|
| 548 |
+
"""Execute the agent with the given prompt.
|
| 549 |
+
|
| 550 |
+
Args:
|
| 551 |
+
prompt: The user's query
|
| 552 |
+
|
| 553 |
+
"""
|
| 554 |
+
self.critic_count = 0
|
| 555 |
+
self.user_task = prompt
|
| 556 |
+
|
| 557 |
+
if self.use_tool_retriever:
|
| 558 |
+
selected_resources_names = self._prepare_resources_for_retrieval(prompt)
|
| 559 |
+
self.update_system_prompt_with_selected_resources(selected_resources_names)
|
| 560 |
+
|
| 561 |
+
inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None}
|
| 562 |
+
config = {"recursion_limit": 500, "configurable": {"thread_id": 42}}
|
| 563 |
+
self.log = []
|
| 564 |
+
|
| 565 |
+
for s in self.app.stream(inputs, stream_mode="values", config=config):
|
| 566 |
+
message = s["messages"][-1]
|
| 567 |
+
out = pretty_print(message)
|
| 568 |
+
self.log.append(out)
|
| 569 |
+
|
| 570 |
+
return self.log, message.content
|
| 571 |
+
|
| 572 |
+
def go_stream(self, prompt, image_path=None) -> Generator[dict, None, None]:
|
| 573 |
+
"""Execute the agent with the given prompt and return a generator that yields each step.
|
| 574 |
+
|
| 575 |
+
This function returns a generator that yields each step of the agent's execution,
|
| 576 |
+
allowing for real-time monitoring of the agent's progress.
|
| 577 |
+
|
| 578 |
+
Args:
|
| 579 |
+
prompt: The user's query
|
| 580 |
+
|
| 581 |
+
Yields:
|
| 582 |
+
dict: Each step of the agent's execution containing the current message and state
|
| 583 |
+
"""
|
| 584 |
+
self.critic_count = 0
|
| 585 |
+
self.user_task = prompt
|
| 586 |
+
if image_path:
|
| 587 |
+
self.user_task += """
|
| 588 |
+
\nUser uploaded this file:\n
|
| 589 |
+
{image_path}
|
| 590 |
+
Please use it if needed.
|
| 591 |
+
"""
|
| 592 |
+
|
| 593 |
+
if self.use_tool_retriever:
|
| 594 |
+
selected_resources_names = self._prepare_resources_for_retrieval(prompt)
|
| 595 |
+
self.update_system_prompt_with_selected_resources(selected_resources_names)
|
| 596 |
+
|
| 597 |
+
inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None}
|
| 598 |
+
config = {"recursion_limit": 500, "configurable": {"thread_id": 42}}
|
| 599 |
+
self.log = []
|
| 600 |
+
|
| 601 |
+
for s in self.app.stream(inputs, stream_mode="values", config=config):
|
| 602 |
+
message = s["messages"][-1]
|
| 603 |
+
out = pretty_print(message)
|
| 604 |
+
self.log.append(out)
|
| 605 |
+
|
| 606 |
+
# Yield the current step
|
| 607 |
+
yield {"output": out}
|
| 608 |
+
|
| 609 |
+
def update_system_prompt_with_selected_resources(self, selected_resources):
|
| 610 |
+
"""Update the system prompt with the selected resources."""
|
| 611 |
+
# Extract tool descriptions for the selected tools
|
| 612 |
+
tool_desc = {}
|
| 613 |
+
for tool in selected_resources["tools"]:
|
| 614 |
+
# Get the module name from the tool
|
| 615 |
+
if isinstance(tool, dict):
|
| 616 |
+
module_name = tool.get("module", None)
|
| 617 |
+
|
| 618 |
+
# If module is not specified, try to find it in the module2api
|
| 619 |
+
if not module_name and hasattr(self, "module2api"):
|
| 620 |
+
for mod, apis in self.module2api.items():
|
| 621 |
+
for api in apis:
|
| 622 |
+
if api.get("name") == tool.get("name"):
|
| 623 |
+
module_name = mod
|
| 624 |
+
# Update the tool with the module information
|
| 625 |
+
tool["module"] = module_name
|
| 626 |
+
break
|
| 627 |
+
if module_name:
|
| 628 |
+
break
|
| 629 |
+
|
| 630 |
+
# If still not found, use a default
|
| 631 |
+
if not module_name:
|
| 632 |
+
module_name = "histopath.tool.scRNA_tools" # Default to scRNA_tools as a fallback
|
| 633 |
+
tool["module"] = module_name
|
| 634 |
+
else:
|
| 635 |
+
module_name = getattr(tool, "module_name", None)
|
| 636 |
+
|
| 637 |
+
# If module is not specified, try to find it in the module2api
|
| 638 |
+
if not module_name and hasattr(self, "module2api"):
|
| 639 |
+
tool_name = getattr(tool, "name", str(tool))
|
| 640 |
+
for mod, apis in self.module2api.items():
|
| 641 |
+
for api in apis:
|
| 642 |
+
if api.get("name") == tool_name:
|
| 643 |
+
module_name = mod
|
| 644 |
+
# Set the module_name attribute
|
| 645 |
+
tool.module_name = module_name
|
| 646 |
+
break
|
| 647 |
+
if module_name:
|
| 648 |
+
break
|
| 649 |
+
|
| 650 |
+
# If still not found, use a default
|
| 651 |
+
if not module_name:
|
| 652 |
+
module_name = "histopath.tool.scRNA_tools" # Default to scRNA_tools as a fallback
|
| 653 |
+
tool.module_name = module_name
|
| 654 |
+
|
| 655 |
+
if module_name not in tool_desc:
|
| 656 |
+
tool_desc[module_name] = []
|
| 657 |
+
|
| 658 |
+
# Add the tool to the appropriate module
|
| 659 |
+
if isinstance(tool, dict):
|
| 660 |
+
# Ensure the module is included in the tool description
|
| 661 |
+
if "module" not in tool:
|
| 662 |
+
tool["module"] = module_name
|
| 663 |
+
tool_desc[module_name].append(tool)
|
| 664 |
+
else:
|
| 665 |
+
# Convert tool object to dictionary
|
| 666 |
+
tool_dict = {
|
| 667 |
+
"name": getattr(tool, "name", str(tool)),
|
| 668 |
+
"description": getattr(tool, "description", ""),
|
| 669 |
+
"parameters": getattr(tool, "parameters", {}),
|
| 670 |
+
"module": module_name, # Explicitly include the module
|
| 671 |
+
}
|
| 672 |
+
tool_desc[module_name].append(tool_dict)
|
| 673 |
+
|
| 674 |
+
self.system_prompt = self._generate_system_prompt(
|
| 675 |
+
tool_desc=tool_desc,
|
| 676 |
+
library_content_list=selected_resources["libraries"],
|
| 677 |
+
self_critic=getattr(self, "self_critic", False),
|
| 678 |
+
is_retrieval=True,
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
# Print the raw system prompt for debugging
|
| 682 |
+
# print("\n" + "="*20 + " RAW SYSTEM PROMPT FROM AGENT " + "="*20)
|
| 683 |
+
# print(self.system_prompt)
|
| 684 |
+
# print("="*70 + "\n")
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
def result_formatting(self, output_class, task_intention):
|
| 688 |
+
self.format_check_prompt = ChatPromptTemplate.from_messages(
|
| 689 |
+
[
|
| 690 |
+
(
|
| 691 |
+
"system",
|
| 692 |
+
(
|
| 693 |
+
"You are evaluateGPT, tasked with extract and parse the task output based on the history of an agent. "
|
| 694 |
+
"Review the entire history of messages provided. "
|
| 695 |
+
"Here is the task output requirement: \n"
|
| 696 |
+
f"'{task_intention.replace('{', '{{').replace('}', '}}')}'.\n"
|
| 697 |
+
),
|
| 698 |
+
),
|
| 699 |
+
("placeholder", "{messages}"),
|
| 700 |
+
]
|
| 701 |
+
)
|
| 702 |
+
|
| 703 |
+
checker_llm = self.format_check_prompt | self.llm.with_structured_output(output_class)
|
| 704 |
+
result = checker_llm.invoke({"messages": [("user", str(self.log))]}).dict()
|
| 705 |
+
return result
|
histopath/config.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HistoPath Configuration Management
|
| 3 |
+
|
| 4 |
+
Simple configuration class for centralizing common settings.
|
| 5 |
+
Maintains full backward compatibility with existing code.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class HistoPathConfig:
|
| 14 |
+
"""Central configuration for the HistoPath agent.
|
| 15 |
+
|
| 16 |
+
All settings are optional and have sensible defaults.
|
| 17 |
+
API keys are still read from environment variables to maintain
|
| 18 |
+
compatibility with existing .env file structure.
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Create config with defaults
|
| 22 |
+
config = HistoPathConfig()
|
| 23 |
+
|
| 24 |
+
# Override specific settings
|
| 25 |
+
config = HistoPathConfig(llm="gpt-4", timeout_seconds=1200)
|
| 26 |
+
|
| 27 |
+
# Modify after creation
|
| 28 |
+
config.path = "./custom_data"
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
# Data and execution settings
|
| 32 |
+
path: str = "./data"
|
| 33 |
+
timeout_seconds: int = 600
|
| 34 |
+
|
| 35 |
+
# LLM settings (API keys still from environment)
|
| 36 |
+
llm: str = "claude-sonnet-4-20250514"
|
| 37 |
+
temperature: float = 0.7
|
| 38 |
+
|
| 39 |
+
# Tool settings
|
| 40 |
+
use_tool_retriever: bool = True
|
| 41 |
+
|
| 42 |
+
# Data licensing settings
|
| 43 |
+
commercial_mode: bool = False # If True, excludes non-commercial datasets
|
| 44 |
+
|
| 45 |
+
# Custom model settings (for custom LLM serving)
|
| 46 |
+
base_url: str | None = None
|
| 47 |
+
api_key: str | None = None # Only for custom models, not provider API keys
|
| 48 |
+
|
| 49 |
+
# LLM source (auto-detected if None)
|
| 50 |
+
source: str | None = None
|
| 51 |
+
|
| 52 |
+
def __post_init__(self):
|
| 53 |
+
"""Load any environment variable overrides if they exist."""
|
| 54 |
+
# Check for environment variable overrides (optional)
|
| 55 |
+
# Support both old and new names for backwards compatibility
|
| 56 |
+
if os.getenv("HISTOPATH_PATH") or os.getenv("HISTOPATH_DATA_PATH"):
|
| 57 |
+
self.path = os.getenv("HISTOPATH_PATH") or os.getenv("HISTOPATH_DATA_PATH")
|
| 58 |
+
if os.getenv("HISTOPATH_TIMEOUT_SECONDS"):
|
| 59 |
+
self.timeout_seconds = int(os.getenv("HISTOPATH_TIMEOUT_SECONDS"))
|
| 60 |
+
if os.getenv("HISTOPATH_LLM") or os.getenv("HISTOPATH_LLM_MODEL"):
|
| 61 |
+
self.llm = os.getenv("HISTOPATH_LLM") or os.getenv("HISTOPATH_LLM_MODEL")
|
| 62 |
+
if os.getenv("HISTOPATH_USE_TOOL_RETRIEVER"):
|
| 63 |
+
self.use_tool_retriever = os.getenv("HISTOPATH_USE_TOOL_RETRIEVER").lower() == "true"
|
| 64 |
+
if os.getenv("HISTOPATH_COMMERCIAL_MODE"):
|
| 65 |
+
self.commercial_mode = os.getenv("HISTOPATH_COMMERCIAL_MODE").lower() == "true"
|
| 66 |
+
if os.getenv("HISTOPATH_TEMPERATURE"):
|
| 67 |
+
self.temperature = float(os.getenv("HISTOPATH_TEMPERATURE"))
|
| 68 |
+
if os.getenv("HISTOPATH_CUSTOM_BASE_URL"):
|
| 69 |
+
self.base_url = os.getenv("HISTOPATH_CUSTOM_BASE_URL")
|
| 70 |
+
if os.getenv("HISTOPATH_CUSTOM_API_KEY"):
|
| 71 |
+
self.api_key = os.getenv("HISTOPATH_CUSTOM_API_KEY")
|
| 72 |
+
if os.getenv("HISTOPATH_SOURCE"):
|
| 73 |
+
self.source = os.getenv("HISTOPATH_SOURCE")
|
| 74 |
+
|
| 75 |
+
def to_dict(self) -> dict:
|
| 76 |
+
"""Convert config to dictionary for easy access."""
|
| 77 |
+
return {
|
| 78 |
+
"path": self.path,
|
| 79 |
+
"timeout_seconds": self.timeout_seconds,
|
| 80 |
+
"llm": self.llm,
|
| 81 |
+
"temperature": self.temperature,
|
| 82 |
+
"use_tool_retriever": self.use_tool_retriever,
|
| 83 |
+
"commercial_mode": self.commercial_mode,
|
| 84 |
+
"base_url": self.base_url,
|
| 85 |
+
"api_key": self.api_key,
|
| 86 |
+
"source": self.source,
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# Global default config instance (optional, for convenience)
|
| 91 |
+
default_config = HistoPathConfig()
|
histopath/llm.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import openai
|
| 3 |
+
from typing import TYPE_CHECKING, Literal, Optional
|
| 4 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 5 |
+
|
| 6 |
+
if TYPE_CHECKING: from histopath.config import HistoPathConfig
|
| 7 |
+
|
| 8 |
+
SourceType = Literal["OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", "Groq", "HuggingFace", "Custom"]
|
| 9 |
+
ALLOWED_SOURCES: set[str] = set(SourceType.__args__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_llm(
|
| 13 |
+
model: str | None = None,
|
| 14 |
+
temperature: float | None = None,
|
| 15 |
+
stop_sequences: list[str] | None = None,
|
| 16 |
+
source: SourceType | None = None,
|
| 17 |
+
base_url: str | None = None,
|
| 18 |
+
api_key: str | None = None,
|
| 19 |
+
config: Optional["HistoPathConfig"] = None,
|
| 20 |
+
) -> BaseChatModel:
|
| 21 |
+
"""
|
| 22 |
+
Get a language model instance based on the specified model name and source.
|
| 23 |
+
This function supports models from OpenAI, Azure OpenAI, Anthropic, Ollama, Gemini, Bedrock, and custom model serving.
|
| 24 |
+
Args:
|
| 25 |
+
model (str): The model name to use
|
| 26 |
+
temperature (float): Temperature setting for generation
|
| 27 |
+
stop_sequences (list): Sequences that will stop generation
|
| 28 |
+
source (str): Source provider: "OpenAI", "AzureOpenAI", "Anthropic", "Ollama", "Gemini", "Bedrock", or "Custom"
|
| 29 |
+
If None, will attempt to auto-detect from model name
|
| 30 |
+
base_url (str): The base URL for custom model serving (e.g., "http://localhost:8000/v1"), default is None
|
| 31 |
+
api_key (str): The API key for the custom llm
|
| 32 |
+
config (BiomniConfig): Optional configuration object. If provided, unspecified parameters will use config values
|
| 33 |
+
"""
|
| 34 |
+
# Use config values for any unspecified parameters
|
| 35 |
+
if config is not None:
|
| 36 |
+
if model is None:
|
| 37 |
+
model = config.llm_model
|
| 38 |
+
if temperature is None:
|
| 39 |
+
temperature = config.temperature
|
| 40 |
+
if source is None:
|
| 41 |
+
source = config.source
|
| 42 |
+
if base_url is None:
|
| 43 |
+
base_url = config.base_url
|
| 44 |
+
if api_key is None:
|
| 45 |
+
api_key = config.api_key or "EMPTY"
|
| 46 |
+
|
| 47 |
+
# Use defaults if still not specified
|
| 48 |
+
if model is None:
|
| 49 |
+
model = "claude-3-5-sonnet-20241022"
|
| 50 |
+
if temperature is None:
|
| 51 |
+
temperature = 0.7
|
| 52 |
+
if api_key is None:
|
| 53 |
+
api_key = "EMPTY"
|
| 54 |
+
# Auto-detect source from model name if not specified
|
| 55 |
+
if source is None:
|
| 56 |
+
env_source = os.getenv("LLM_SOURCE")
|
| 57 |
+
if env_source in ALLOWED_SOURCES:
|
| 58 |
+
source = env_source
|
| 59 |
+
else:
|
| 60 |
+
if model[:7] == "claude-":
|
| 61 |
+
source = "Anthropic"
|
| 62 |
+
elif model[:7] == "gpt-oss":
|
| 63 |
+
source = "Ollama"
|
| 64 |
+
elif model[:4] == "gpt-":
|
| 65 |
+
source = "OpenAI"
|
| 66 |
+
elif model.startswith("azure-"):
|
| 67 |
+
source = "AzureOpenAI"
|
| 68 |
+
elif model[:7] == "gemini-":
|
| 69 |
+
source = "Gemini"
|
| 70 |
+
elif "groq" in model.lower():
|
| 71 |
+
source = "Groq"
|
| 72 |
+
elif base_url is not None:
|
| 73 |
+
source = "Custom"
|
| 74 |
+
elif "/" in model or any(
|
| 75 |
+
name in model.lower()
|
| 76 |
+
for name in [
|
| 77 |
+
"llama",
|
| 78 |
+
"mistral",
|
| 79 |
+
"qwen",
|
| 80 |
+
"gemma",
|
| 81 |
+
"phi",
|
| 82 |
+
"dolphin",
|
| 83 |
+
"orca",
|
| 84 |
+
"vicuna",
|
| 85 |
+
"deepseek",
|
| 86 |
+
]
|
| 87 |
+
):
|
| 88 |
+
source = "Ollama"
|
| 89 |
+
elif model.startswith(
|
| 90 |
+
("anthropic.claude-", "amazon.titan-", "meta.llama-", "mistral.", "cohere.", "ai21.", "us.")
|
| 91 |
+
):
|
| 92 |
+
source = "Bedrock"
|
| 93 |
+
else:
|
| 94 |
+
raise ValueError("Unable to determine model source. Please specify 'source' parameter.")
|
| 95 |
+
|
| 96 |
+
# Create appropriate model based on source
|
| 97 |
+
if source == "OpenAI":
|
| 98 |
+
try:
|
| 99 |
+
from langchain_openai import ChatOpenAI
|
| 100 |
+
except ImportError:
|
| 101 |
+
raise ImportError( # noqa: B904
|
| 102 |
+
"langchain-openai package is required for OpenAI models. Install with: pip install langchain-openai"
|
| 103 |
+
)
|
| 104 |
+
return ChatOpenAI(model=model, temperature=temperature, stop_sequences=stop_sequences)
|
| 105 |
+
|
| 106 |
+
elif source == "AzureOpenAI":
|
| 107 |
+
try:
|
| 108 |
+
from langchain_openai import AzureChatOpenAI
|
| 109 |
+
except ImportError:
|
| 110 |
+
raise ImportError( # noqa: B904
|
| 111 |
+
"langchain-openai package is required for Azure OpenAI models. Install with: pip install langchain-openai"
|
| 112 |
+
)
|
| 113 |
+
API_VERSION = "2024-12-01-preview"
|
| 114 |
+
model = model.replace("azure-", "")
|
| 115 |
+
return AzureChatOpenAI(
|
| 116 |
+
openai_api_key=os.getenv("OPENAI_API_KEY"),
|
| 117 |
+
azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
|
| 118 |
+
azure_deployment=model,
|
| 119 |
+
openai_api_version=API_VERSION,
|
| 120 |
+
temperature=temperature,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
elif source == "Anthropic":
|
| 124 |
+
try:
|
| 125 |
+
from langchain_anthropic import ChatAnthropic
|
| 126 |
+
except ImportError:
|
| 127 |
+
raise ImportError( # noqa: B904
|
| 128 |
+
"langchain-anthropic package is required for Anthropic models. Install with: pip install langchain-anthropic"
|
| 129 |
+
)
|
| 130 |
+
return ChatAnthropic(
|
| 131 |
+
model=model,
|
| 132 |
+
temperature=temperature,
|
| 133 |
+
max_tokens=8192,
|
| 134 |
+
stop_sequences=stop_sequences,
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
elif source == "Gemini":
|
| 138 |
+
# If you want to use ChatGoogleGenerativeAI, you need to pass the stop sequences upon invoking the model.
|
| 139 |
+
# return ChatGoogleGenerativeAI(
|
| 140 |
+
# model=model,
|
| 141 |
+
# temperature=temperature,
|
| 142 |
+
# google_api_key=api_key,
|
| 143 |
+
# )
|
| 144 |
+
try:
|
| 145 |
+
from langchain_openai import ChatOpenAI
|
| 146 |
+
except ImportError:
|
| 147 |
+
raise ImportError( # noqa: B904
|
| 148 |
+
"langchain-openai package is required for Gemini models. Install with: pip install langchain-openai"
|
| 149 |
+
)
|
| 150 |
+
return ChatOpenAI(
|
| 151 |
+
model=model,
|
| 152 |
+
temperature=temperature,
|
| 153 |
+
api_key=os.getenv("GEMINI_API_KEY"),
|
| 154 |
+
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
| 155 |
+
stop_sequences=stop_sequences,
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
elif source == "Groq":
|
| 159 |
+
try:
|
| 160 |
+
from langchain_openai import ChatOpenAI
|
| 161 |
+
except ImportError:
|
| 162 |
+
raise ImportError( # noqa: B904
|
| 163 |
+
"langchain-openai package is required for Groq models. Install with: pip install langchain-openai"
|
| 164 |
+
)
|
| 165 |
+
return ChatOpenAI(
|
| 166 |
+
model=model,
|
| 167 |
+
temperature=temperature,
|
| 168 |
+
api_key=os.getenv("GROQ_API_KEY"),
|
| 169 |
+
base_url="https://api.groq.com/openai/v1",
|
| 170 |
+
stop_sequences=stop_sequences,
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
elif source == "Ollama":
|
| 174 |
+
try:
|
| 175 |
+
from langchain_ollama import ChatOllama
|
| 176 |
+
except ImportError:
|
| 177 |
+
raise ImportError( # noqa: B904
|
| 178 |
+
"langchain-ollama package is required for Ollama models. Install with: pip install langchain-ollama"
|
| 179 |
+
)
|
| 180 |
+
return ChatOllama(
|
| 181 |
+
model=model,
|
| 182 |
+
temperature=temperature,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
elif source == "Bedrock":
|
| 186 |
+
try:
|
| 187 |
+
from langchain_aws import ChatBedrock
|
| 188 |
+
except ImportError:
|
| 189 |
+
raise ImportError( # noqa: B904
|
| 190 |
+
"langchain-aws package is required for Bedrock models. Install with: pip install langchain-aws"
|
| 191 |
+
)
|
| 192 |
+
return ChatBedrock(
|
| 193 |
+
model=model,
|
| 194 |
+
temperature=temperature,
|
| 195 |
+
stop_sequences=stop_sequences,
|
| 196 |
+
region_name=os.getenv("AWS_REGION", "us-east-1"),
|
| 197 |
+
)
|
| 198 |
+
elif source == "HuggingFace":
|
| 199 |
+
try:
|
| 200 |
+
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
| 201 |
+
except ImportError:
|
| 202 |
+
raise ImportError(
|
| 203 |
+
"langchain-huggingface package is required for HuggingFace models. Install with: pip install langchain-huggingface"
|
| 204 |
+
)
|
| 205 |
+
llm = HuggingFaceEndpoint(
|
| 206 |
+
repo_id="openai/gpt-oss-120b",
|
| 207 |
+
temperature=temperature,
|
| 208 |
+
stop_sequences=stop_sequences,
|
| 209 |
+
huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_KEY")
|
| 210 |
+
)
|
| 211 |
+
return ChatHuggingFace(llm=llm)
|
| 212 |
+
|
| 213 |
+
elif source == "Custom":
|
| 214 |
+
try:
|
| 215 |
+
from langchain_openai import ChatOpenAI
|
| 216 |
+
except ImportError:
|
| 217 |
+
raise ImportError( # noqa: B904
|
| 218 |
+
"langchain-openai package is required for custom models. Install with: pip install langchain-openai"
|
| 219 |
+
)
|
| 220 |
+
# Custom LLM serving such as SGLang. Must expose an openai compatible API.
|
| 221 |
+
assert base_url is not None, "base_url must be provided for customly served LLMs"
|
| 222 |
+
llm = ChatOpenAI(
|
| 223 |
+
model=model,
|
| 224 |
+
temperature=temperature,
|
| 225 |
+
max_tokens=8192,
|
| 226 |
+
stop_sequences=stop_sequences,
|
| 227 |
+
base_url=base_url,
|
| 228 |
+
api_key=api_key,
|
| 229 |
+
)
|
| 230 |
+
return llm
|
| 231 |
+
|
| 232 |
+
else:
|
| 233 |
+
raise ValueError(
|
| 234 |
+
f"Invalid source: {source}. Valid options are 'OpenAI', 'AzureOpenAI', 'Anthropic', 'Gemini', 'Groq', 'Bedrock', or 'Ollama'"
|
| 235 |
+
)
|
histopath/model/__init__.py
ADDED
|
File without changes
|
histopath/model/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (177 Bytes). View file
|
|
|
histopath/model/__pycache__/retriever.cpython-311.pyc
ADDED
|
Binary file (8.28 kB). View file
|
|
|
histopath/model/retriever.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import contextlib
|
| 3 |
+
from langchain_core.messages import HumanMessage
|
| 4 |
+
from langchain_openai import ChatOpenAI
|
| 5 |
+
|
| 6 |
+
class ToolRetriever:
|
| 7 |
+
"""Retrieve tools from the tool registry."""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
pass
|
| 11 |
+
|
| 12 |
+
def prompt_based_retrieval(self, query: str, resources: dict, llm=None) -> dict:
|
| 13 |
+
"""Use a prompt-based approach to retrieve the most relevant resources for a query.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
query: The user's query
|
| 17 |
+
resources: A dictionary with keys 'tools', 'data_lake', and 'libraries',
|
| 18 |
+
each containing a list of available resources
|
| 19 |
+
llm: Optional LLM instance to use for retrieval (if None, will create a new one)
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
A dictionary with the same keys, but containing only the most relevant resources
|
| 23 |
+
|
| 24 |
+
"""
|
| 25 |
+
# Create a prompt for the LLM to select relevant resources
|
| 26 |
+
prompt = f"""
|
| 27 |
+
You are an expert histopathology research assistant. Your task is to select the relevant resources to help answer a user's query.
|
| 28 |
+
|
| 29 |
+
USER QUERY: {query}
|
| 30 |
+
|
| 31 |
+
Below are the available resources. For each category, select items that are directly or indirectly relevant to answering the query.
|
| 32 |
+
Be generous in your selection - include resources that might be useful for the task, even if they're not explicitly mentioned in the query.
|
| 33 |
+
It's better to include slightly more resources than to miss potentially useful ones.
|
| 34 |
+
|
| 35 |
+
AVAILABLE SOFTWARE LIBRARIES:
|
| 36 |
+
{self._format_resources_for_prompt(resources.get("libraries", []))}
|
| 37 |
+
|
| 38 |
+
AVAILABLE TOOLS:
|
| 39 |
+
{self._format_resources_for_prompt(resources.get("tools", []))}
|
| 40 |
+
|
| 41 |
+
For each category, respond with ONLY the indices of the relevant items in the following format:
|
| 42 |
+
TOOLS: [list of indices]
|
| 43 |
+
|
| 44 |
+
For example:
|
| 45 |
+
TOOLS: [0, 3, 5, 7, 9]
|
| 46 |
+
|
| 47 |
+
If a category has no relevant items, use an empty list, e.g., TOOLS: []
|
| 48 |
+
|
| 49 |
+
IMPORTANT GUIDELINES:
|
| 50 |
+
1. Be generous but not excessive - aim to include all potentially relevant resources
|
| 51 |
+
2. ALWAYS prioritize database tools for general queries - include as many database tools as possible
|
| 52 |
+
3. Include all literature search tools
|
| 53 |
+
4. For libraries, include those that provide functions needed for analysis
|
| 54 |
+
5. Don't exclude resources just because they're not explicitly mentioned in the query
|
| 55 |
+
6. When in doubt about a tool, include it rather than exclude it
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
# Use the provided LLM or create a new one
|
| 59 |
+
if llm is None:
|
| 60 |
+
llm = ChatOpenAI(model="gpt-4o")
|
| 61 |
+
|
| 62 |
+
# Invoke the LLM
|
| 63 |
+
if hasattr(llm, "invoke"):
|
| 64 |
+
# For LangChain-style LLMs
|
| 65 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 66 |
+
response_content = response.content
|
| 67 |
+
else:
|
| 68 |
+
# For other LLM interfaces
|
| 69 |
+
response_content = str(llm(prompt))
|
| 70 |
+
|
| 71 |
+
# Parse the response to extract the selected indices
|
| 72 |
+
selected_indices = self._parse_llm_response(response_content)
|
| 73 |
+
|
| 74 |
+
# Get the selected resources
|
| 75 |
+
selected_resources = {
|
| 76 |
+
"tools": [
|
| 77 |
+
resources["tools"][i]
|
| 78 |
+
for i in selected_indices.get("tools", [])
|
| 79 |
+
if i < len(resources.get("tools", []))
|
| 80 |
+
],
|
| 81 |
+
"libraries": [
|
| 82 |
+
resources["libraries"][i]
|
| 83 |
+
for i in selected_indices.get("libraries", [])
|
| 84 |
+
if i < len(resources.get("libraries", []))
|
| 85 |
+
]
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
return selected_resources
|
| 89 |
+
|
| 90 |
+
def _format_resources_for_prompt(self, resources: list) -> str:
|
| 91 |
+
"""Format resources for inclusion in the prompt."""
|
| 92 |
+
formatted = []
|
| 93 |
+
for i, resource in enumerate(resources):
|
| 94 |
+
if isinstance(resource, dict):
|
| 95 |
+
# Handle dictionary format (from tool registry or data lake/libraries with descriptions)
|
| 96 |
+
name = resource.get("name", f"Resource {i}")
|
| 97 |
+
description = resource.get("description", "")
|
| 98 |
+
formatted.append(f"{i}. {name}: {description}")
|
| 99 |
+
elif isinstance(resource, str):
|
| 100 |
+
# Handle string format (simple strings)
|
| 101 |
+
formatted.append(f"{i}. {resource}")
|
| 102 |
+
else:
|
| 103 |
+
# Try to extract name and description from tool objects
|
| 104 |
+
name = getattr(resource, "name", str(resource))
|
| 105 |
+
desc = getattr(resource, "description", "")
|
| 106 |
+
formatted.append(f"{i}. {name}: {desc}")
|
| 107 |
+
|
| 108 |
+
return "\n".join(formatted) if formatted else "None available"
|
| 109 |
+
|
| 110 |
+
def _parse_llm_response(self, response: str) -> dict:
|
| 111 |
+
"""Parse the LLM response to extract the selected indices."""
|
| 112 |
+
selected_indices = {"tools": [], "libraries": []}
|
| 113 |
+
|
| 114 |
+
# Extract indices for each category
|
| 115 |
+
tools_match = re.search(r"TOOLS:\s*\[(.*?)\]", response, re.IGNORECASE)
|
| 116 |
+
if tools_match and tools_match.group(1).strip():
|
| 117 |
+
with contextlib.suppress(ValueError):
|
| 118 |
+
selected_indices["tools"] = [int(idx.strip()) for idx in tools_match.group(1).split(",") if idx.strip()]
|
| 119 |
+
|
| 120 |
+
libraries_match = re.search(r"LIBRARIES:\s*\[(.*?)\]", response, re.IGNORECASE)
|
| 121 |
+
if libraries_match and libraries_match.group(1).strip():
|
| 122 |
+
with contextlib.suppress(ValueError):
|
| 123 |
+
selected_indices["libraries"] = [
|
| 124 |
+
int(idx.strip()) for idx in libraries_match.group(1).split(",") if idx.strip()
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
return selected_indices
|
histopath/retriever_benchmark.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
###########################################################################################
|
| 2 |
+
# Basic ToolRetriever benchmarking for measuring retrieval rate for a certain custom tool #
|
| 3 |
+
# Author: Ryan Ding #
|
| 4 |
+
###########################################################################################
|
| 5 |
+
import random
|
| 6 |
+
from nltk.corpus import wordnet
|
| 7 |
+
from histopath.model.retriever import ToolRetriever
|
| 8 |
+
from histopath.tool.tool_registry import ToolRegistry
|
| 9 |
+
from histopath.utils import read_module2api
|
| 10 |
+
from langchain_ollama import ChatOllama
|
| 11 |
+
|
| 12 |
+
LLM = ChatOllama(model='gpt-oss:120b', temperature=0.7)
|
| 13 |
+
PROMPT_v1 = 'Caption the whole slide into patches into directory ./test/directory/'
|
| 14 |
+
PROMPT_v2 = 'Caption the whole slide images already segmented into pathces in directory ./test/directory'
|
| 15 |
+
RUNS = 100
|
| 16 |
+
|
| 17 |
+
def synonym_replace(text, p_replace=0.2, protected_words=None):
|
| 18 |
+
"""Prompt pertubation via replacement of words with their synoynms.
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
text: str
|
| 23 |
+
prompt to perturb
|
| 24 |
+
p_replace: float
|
| 25 |
+
probability of replacing any given word (default: 0.2)
|
| 26 |
+
protected_words: set
|
| 27 |
+
words protected from perturbation (default: None)
|
| 28 |
+
|
| 29 |
+
Returns
|
| 30 |
+
-------
|
| 31 |
+
str
|
| 32 |
+
perturbed prompt
|
| 33 |
+
"""
|
| 34 |
+
words = text.split()
|
| 35 |
+
new_words = []
|
| 36 |
+
for w in words:
|
| 37 |
+
if protected_words and w in protected_words:
|
| 38 |
+
new_words.append(w)
|
| 39 |
+
continue
|
| 40 |
+
if random.random() < p_replace:
|
| 41 |
+
syns = wordnet.synsets(w)
|
| 42 |
+
if syns:
|
| 43 |
+
lemma_names = syns[0].lemma_names()
|
| 44 |
+
if lemma_names:
|
| 45 |
+
w = random.choice(lemma_names).replace('_', ' ')
|
| 46 |
+
new_words.append(w)
|
| 47 |
+
return ' '.join(new_words)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def add_typo(text, p_typo=0.02):
|
| 51 |
+
"""Prompt perturbation via integration of character-level typos.
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
text: str
|
| 56 |
+
prompt to perturb
|
| 57 |
+
p_typo: float
|
| 58 |
+
probability of introducing a typo at any given character (default: 0.02)
|
| 59 |
+
|
| 60 |
+
Returns
|
| 61 |
+
-------
|
| 62 |
+
str
|
| 63 |
+
perturbed prompt
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
+
new_text = list(text)
|
| 67 |
+
for i in range(len(new_text)):
|
| 68 |
+
if random.random() < p_typo:
|
| 69 |
+
new_text[i] = random.choice('abcdefghijklmnopqrstuvwxyz')
|
| 70 |
+
return ''.join(new_text)
|
| 71 |
+
|
| 72 |
+
class ToolBenchmark:
|
| 73 |
+
def __init__(self, llm, prompts, runs, targets):
|
| 74 |
+
self.llm = llm
|
| 75 |
+
self.targets = targets
|
| 76 |
+
self.prompts = prompts
|
| 77 |
+
self.runs = runs
|
| 78 |
+
self.module2api = read_module2api()
|
| 79 |
+
self.registry = ToolRegistry(self.module2api)
|
| 80 |
+
self.retriever = ToolRetriever()
|
| 81 |
+
self.all_tools = self.registry.tools
|
| 82 |
+
self.resources = { "tools": self.all_tools }
|
| 83 |
+
|
| 84 |
+
def retrieve_tools(self):
|
| 85 |
+
selected_resources = self.retriever.prompt_based_retrieval(query=PROMPT_v2, resources=self.resources, llm=self.llm)
|
| 86 |
+
return set([tool for tool in selected_resources["tools"]])
|
| 87 |
+
|
| 88 |
+
def evaluate(self):
|
| 89 |
+
hits = dict()
|
| 90 |
+
for _ in range(self.runs):
|
| 91 |
+
tools = self.retrieve_tools()
|
| 92 |
+
for target in self.targets:
|
| 93 |
+
# amount of times proper tool retrieved
|
| 94 |
+
if target in tools: hits[target] = hits.get(target, 0) + 1
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def main():
|
| 98 |
+
pass
|
| 99 |
+
|
| 100 |
+
if __name__ == '__main__':
|
| 101 |
+
main()
|
histopath/tool/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from histopath.utils import get_tool_decorated_functions
|
histopath/tool/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (258 Bytes). View file
|
|
|
histopath/tool/__pycache__/pathology.cpython-311.pyc
ADDED
|
Binary file (19.2 kB). View file
|
|
|
histopath/tool/__pycache__/support_tools.cpython-311.pyc
ADDED
|
Binary file (3.02 kB). View file
|
|
|
histopath/tool/__pycache__/tool_registry.cpython-311.pyc
ADDED
|
Binary file (5.94 kB). View file
|
|
|
histopath/tool/pathology.py
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def caption_slide(image_path, slide_name, prompt="Diagnosis:", output_dir="./output"):
|
| 2 |
+
"""Captions a Whole Slide Image(WSI).
|
| 3 |
+
|
| 4 |
+
Parameters
|
| 5 |
+
----------
|
| 6 |
+
image_path: str
|
| 7 |
+
Path to the whole slide image file.
|
| 8 |
+
slide_name: str
|
| 9 |
+
Name of whole slide image file
|
| 10 |
+
prompt: str
|
| 11 |
+
Starting prompt of the generated caption (default: "Diagnosis:")
|
| 12 |
+
output_dir: str, optional
|
| 13 |
+
Directory to save output files (default: "./output")
|
| 14 |
+
Returns
|
| 15 |
+
-------
|
| 16 |
+
str
|
| 17 |
+
Research log summarizing analysis and results
|
| 18 |
+
"""
|
| 19 |
+
import os
|
| 20 |
+
import glob
|
| 21 |
+
import timm
|
| 22 |
+
import torch
|
| 23 |
+
from PIL import Image
|
| 24 |
+
import lazyslide as zs
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
from transformers import AutoModel
|
| 28 |
+
from timm.layers import SwiGLUPacked
|
| 29 |
+
from timm.data import resolve_data_config
|
| 30 |
+
from huggingface_hub import login, whoami
|
| 31 |
+
from timm.data.transforms_factory import create_transform
|
| 32 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 33 |
+
# Step 1: Login to HuggingFace
|
| 34 |
+
login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
|
| 35 |
+
hf_user = whoami()
|
| 36 |
+
username = hf_user['name']
|
| 37 |
+
|
| 38 |
+
# Step 2: Setup models and transforms
|
| 39 |
+
virchow2 = timm.create_model("hf-hub:paige-ai/Virchow2", pretrained=True, mlp_layer=SwiGLUPacked, act_layer=torch.nn.SiLU)
|
| 40 |
+
virchow2 = virchow2.eval()
|
| 41 |
+
prism = AutoModel.from_pretrained('paige-ai/Prism', trust_remote_code=True)
|
| 42 |
+
prism = prism.to(device)
|
| 43 |
+
transforms = create_transform(**resolve_data_config(virchow2.pretrained_cfg, model=virchow2))
|
| 44 |
+
tile_embeddings = []
|
| 45 |
+
# Step 3: Initialize, process, tile, and encode slide file(s)
|
| 46 |
+
files = [f for f in glob.glob(f"{image_path}/*") if slide_name in os.path.basename(f)]
|
| 47 |
+
if len(files) == 1 and files[0].endswith(".svs"):
|
| 48 |
+
# dealing with the whole slide in itself
|
| 49 |
+
wsi = zs.open_wsi(f"{image_path}/{slide_name}.svs")
|
| 50 |
+
tiles, tile_spec = zs.pp.tile_tissues(wsi, 224, mpp=0.5, return_tiles=True)
|
| 51 |
+
|
| 52 |
+
tile_dir = Path("tiles")
|
| 53 |
+
tile_dir.mkdir(exist_ok=True)
|
| 54 |
+
for _, row in tiles.iterrows():
|
| 55 |
+
tile_id = row["tile_id"]
|
| 56 |
+
geometry = row["geometry"] # shapely Polygon of the tile
|
| 57 |
+
# Get top-left corner of the tile
|
| 58 |
+
minx, miny, maxx, maxy = geometry.bounds
|
| 59 |
+
width = int(maxx - minx)
|
| 60 |
+
height = int(maxy - miny)
|
| 61 |
+
|
| 62 |
+
# Read the tile from WSI
|
| 63 |
+
tile_img = wsi.read_region(int(minx), int(miny), width, height, tile_spec.ops_level)
|
| 64 |
+
tile_img = Image.fromarray(tile_img, 'RGB')
|
| 65 |
+
tile_tensor = transforms(tile_img).unsqueeze(0)
|
| 66 |
+
output = virchow2(tile_tensor)
|
| 67 |
+
class_token = output[:, 0]
|
| 68 |
+
patch_tokens = output[:, 1:]
|
| 69 |
+
|
| 70 |
+
embedding = torch.cat([class_token, patch_tokens.mean(1)], dim=-1)
|
| 71 |
+
tile_embeddings.append(embedding)
|
| 72 |
+
|
| 73 |
+
# Save as PNG
|
| 74 |
+
tile_path = tile_dir / f"tile_{tile_id:05d}.png"
|
| 75 |
+
tile_img.save(tile_path)
|
| 76 |
+
else:
|
| 77 |
+
# dealing with patches (not svs); need to encode tiles with Virchow directly
|
| 78 |
+
for file in files:
|
| 79 |
+
tile_img = Image.open(file).convert('RGB')
|
| 80 |
+
tile_tensor = transforms(tile_img).unsqueeze(0)
|
| 81 |
+
output = virchow2(tile_tensor)
|
| 82 |
+
class_token = output[:, 0]
|
| 83 |
+
patch_tokens = output[:, 1:]
|
| 84 |
+
embedding = torch.cat([class_token, patch_tokens.mean(1)], dim=-1)
|
| 85 |
+
tile_embeddings.append(embedding)
|
| 86 |
+
|
| 87 |
+
tile_embeddings = torch.cat(tile_embeddings, dim=0).unsqueeze(0).to(device)
|
| 88 |
+
with torch.autocast(device, torch.float16), torch.inference_mode():
|
| 89 |
+
reprs = prism.slide_representations(tile_embeddings)
|
| 90 |
+
genned_ids = prism.generate(
|
| 91 |
+
key_value_states=reprs['image_latents'],
|
| 92 |
+
do_sample=False,
|
| 93 |
+
num_beams=5,
|
| 94 |
+
num_beam_groups=1,
|
| 95 |
+
)
|
| 96 |
+
generated_caption = prism.untokenize(genned_ids)
|
| 97 |
+
|
| 98 |
+
# Step 4: Generate caption using latent representation and initial prompt
|
| 99 |
+
|
| 100 |
+
log = f"""
|
| 101 |
+
Research Log: Whole Slide Image Captioning
|
| 102 |
+
Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 103 |
+
Image Path: {os.path.basename(image_path)}
|
| 104 |
+
Slide Name: {slide_name}
|
| 105 |
+
|
| 106 |
+
Analysis Steps:
|
| 107 |
+
1. Logged into HuggingFace as {username}
|
| 108 |
+
2. Load in PRISM and Virchow2 models for encoding and captioning
|
| 109 |
+
3. Initialized, processed, tiled, and encode slide file(s)
|
| 110 |
+
4. Generated the caption with "{prompt}" as initial prompt
|
| 111 |
+
|
| 112 |
+
Results:
|
| 113 |
+
|
| 114 |
+
Caption
|
| 115 |
+
-------
|
| 116 |
+
{generated_caption}
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
return log
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def segment_slide(image_path, seg_type, model, output_dir="./output"):
|
| 123 |
+
"""Segment a Whole Slide Image (WSI).
|
| 124 |
+
|
| 125 |
+
Parameters
|
| 126 |
+
----------
|
| 127 |
+
image_path: str
|
| 128 |
+
Path to the whole slide image file.
|
| 129 |
+
seg_type: str
|
| 130 |
+
Type of segmentation to perform
|
| 131 |
+
model: str
|
| 132 |
+
Segmentation model to use
|
| 133 |
+
output_dir: str, optional
|
| 134 |
+
Directory to save output files (default: "./output")
|
| 135 |
+
Returns
|
| 136 |
+
-------
|
| 137 |
+
str
|
| 138 |
+
Research log summarizing analysis and results
|
| 139 |
+
"""
|
| 140 |
+
import os
|
| 141 |
+
import lazyslide as zs
|
| 142 |
+
from datetime import datetime
|
| 143 |
+
from huggingface_hub import login, whoami
|
| 144 |
+
|
| 145 |
+
# Step 1: Perform validity checking
|
| 146 |
+
usable_models = set(zs.models.list_models("segmentation"))
|
| 147 |
+
if seg_type not in {"cells", "cell_type", "semantic", "tissue", "artifact"}: return None
|
| 148 |
+
if model not in usable_models: return None
|
| 149 |
+
if seg_type == "tissue" and model not in {"grandqc", "pathprofiler"}: return None
|
| 150 |
+
if seg_type == "artifact" and model != "grandqc": return None
|
| 151 |
+
if seg_type == "cells" and model not in {"instanseg", "cellpose"}: return None
|
| 152 |
+
if seg_type == "cell_type" and model != "nulite": return None
|
| 153 |
+
|
| 154 |
+
# Step 2: Login to HuggingFace if gated model
|
| 155 |
+
login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
|
| 156 |
+
hf_user = whoami()
|
| 157 |
+
username = hf_user['name']
|
| 158 |
+
|
| 159 |
+
# Step 3: Open, process, and tile WSI image
|
| 160 |
+
wsi = zs.open_wsi(image_path)
|
| 161 |
+
zs.pp.find_tissues(wsi)
|
| 162 |
+
zs.pp.tile_graph(wsi)
|
| 163 |
+
#TODO Change values
|
| 164 |
+
zs.pp.tile_tissues(wsi, 512, background_fraction=0.95, mpp=0.5)
|
| 165 |
+
|
| 166 |
+
# Step 4: Appropriately Segment the slide
|
| 167 |
+
if seg_type == "cells":
|
| 168 |
+
zs.seg.cells(wsi, model=model)
|
| 169 |
+
elif seg_type == "cell_type":
|
| 170 |
+
zs.seg.cell_type(wsi, model=model)
|
| 171 |
+
elif seg_type == "semantic":
|
| 172 |
+
zs.seg.semantic(wsi, model=model)
|
| 173 |
+
elif seg_type == "tissue":
|
| 174 |
+
zs.seg.tissue(wsi, model=model)
|
| 175 |
+
else:
|
| 176 |
+
zs.seg.artifact(wsi, model=model)
|
| 177 |
+
|
| 178 |
+
# Step 5: Generate WSI with annotations
|
| 179 |
+
|
| 180 |
+
log = f"""
|
| 181 |
+
Research Log: Whole Slide Image Segmentation
|
| 182 |
+
Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 183 |
+
Image: {os.path.basename(image_path)}
|
| 184 |
+
|
| 185 |
+
Analysis Steps:
|
| 186 |
+
1. Performed validity checking
|
| 187 |
+
2. Logged into HuggingFace as {username}
|
| 188 |
+
3. Open WSI, find, tile and graph tissues
|
| 189 |
+
4. Segmented tissues using {model}
|
| 190 |
+
5. Generated and displayed segmentation results in {output_dir}
|
| 191 |
+
|
| 192 |
+
Results:
|
| 193 |
+
|
| 194 |
+
Output Files
|
| 195 |
+
"""
|
| 196 |
+
return log
|
| 197 |
+
|
| 198 |
+
def zero_shot_classification(image_path, labels, output_dir="./output"):
|
| 199 |
+
"""Performs Zero-Shot Classification from Whole Slide Images (WSIs).
|
| 200 |
+
|
| 201 |
+
Parameters
|
| 202 |
+
----------
|
| 203 |
+
image_path: str
|
| 204 |
+
Path to the whole slide image file.
|
| 205 |
+
labels: list
|
| 206 |
+
Labels of the classes to perform zero-shot classification
|
| 207 |
+
output_dir: str, optional
|
| 208 |
+
Directory to save output files (default: "./output")
|
| 209 |
+
|
| 210 |
+
Returns
|
| 211 |
+
-------
|
| 212 |
+
str
|
| 213 |
+
Research log summarizing analysis and results
|
| 214 |
+
"""
|
| 215 |
+
import os
|
| 216 |
+
import lazyslide as zs
|
| 217 |
+
from datetime import datetime
|
| 218 |
+
from huggingface_hub import login, whoami
|
| 219 |
+
|
| 220 |
+
# login to huggingface; zero shot via LazySlide only possible with gated models
|
| 221 |
+
login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
|
| 222 |
+
hf_user = whoami()
|
| 223 |
+
username = hf_user['name']
|
| 224 |
+
wsi = zs.open_wsi(image_path)
|
| 225 |
+
zs.pp.find_tissues(wsi)
|
| 226 |
+
zs.pp.tile_tissues(wsi, 512, background_fraction=0.95, mpp=0.5)
|
| 227 |
+
# might want to make tile graph
|
| 228 |
+
# zs.pp.tile_graph(wsi)
|
| 229 |
+
|
| 230 |
+
zs.tl.feature_extraction(wsi, "virchow")
|
| 231 |
+
zs.tl.feature_aggregation(wsi, feature_key="virchow", encoder="prism")
|
| 232 |
+
results = zs.tl.zero_shot_score(wsi, labels, feature_key="virchow_tiles")
|
| 233 |
+
log = f"""
|
| 234 |
+
Research Log: Zero-Shot Classification
|
| 235 |
+
Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 236 |
+
Image: {os.path.basename(image_path)}
|
| 237 |
+
|
| 238 |
+
Analysis Steps:
|
| 239 |
+
1. Logged in as user {username} to HuggingFace
|
| 240 |
+
2. Loaded WSI: {wsi}
|
| 241 |
+
3. Found tissues
|
| 242 |
+
4. Tiled tissues
|
| 243 |
+
5. Extracted features
|
| 244 |
+
6. Aggregated features
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
Results:
|
| 248 |
+
{results}
|
| 249 |
+
|
| 250 |
+
Output Files:
|
| 251 |
+
|
| 252 |
+
"""
|
| 253 |
+
print(log)
|
| 254 |
+
return log
|
| 255 |
+
|
| 256 |
+
def quantify_tumor_infiltrating_lymphocites(image_path, tile_size=256, tile_step=128, batch_size=4, output_dir="./output"):
|
| 257 |
+
"""Quantifies Tumor-Infiltrating Lymphocytes (TILs) from Whole-Slide Images (WSIs).
|
| 258 |
+
|
| 259 |
+
Parameters
|
| 260 |
+
----------
|
| 261 |
+
image_path: str
|
| 262 |
+
Path to the whole slide image file.
|
| 263 |
+
tile_size: int, optional
|
| 264 |
+
Size of inference tiles (default: 256)
|
| 265 |
+
tile_step: int, optional
|
| 266 |
+
Step size between inference tiles (default: 128)
|
| 267 |
+
batch_size: int, optional
|
| 268 |
+
Simulatenous inference tiles (default: 4)
|
| 269 |
+
output_dir: str, optional
|
| 270 |
+
Directory to save output files (default: "./output")
|
| 271 |
+
Returns
|
| 272 |
+
-------
|
| 273 |
+
str
|
| 274 |
+
Research log summarizing analysis and results
|
| 275 |
+
|
| 276 |
+
"""
|
| 277 |
+
import os
|
| 278 |
+
import numpy as np
|
| 279 |
+
import pandas as pd
|
| 280 |
+
import lazyslide as zs
|
| 281 |
+
from datetime import datetime
|
| 282 |
+
import matplotlib.pyplot as plt
|
| 283 |
+
|
| 284 |
+
# Step 1: Load WSI via LazySlide
|
| 285 |
+
try:
|
| 286 |
+
wsi = zs.open_wsi(image_path)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
return f"Error loading WSI: {str(e)}"
|
| 289 |
+
|
| 290 |
+
# Step 2: Build a tissue mask + upscale it for higher resolutions
|
| 291 |
+
try:
|
| 292 |
+
tissue_mask = zs.pp.find_tissues(wsi, refine_level=0, to_hsv=True)
|
| 293 |
+
except:
|
| 294 |
+
return f"Error building tissue mask: {str(e)}"
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
# Step 3: Cell type segmentation using LazySlide"s seg.cell_types
|
| 298 |
+
try:
|
| 299 |
+
zs.seg.cell_types(wsi, batch_size=batch_size)
|
| 300 |
+
except Exception as e:
|
| 301 |
+
return f"Error during cell type segmentation: {str(e)}"
|
| 302 |
+
|
| 303 |
+
# Step 4: Load results
|
| 304 |
+
instance_map = zs.io.load_annotations(wsi, "instance_map")
|
| 305 |
+
type_map = zs.io.load_annotations(wsi, "cell_types") # may include TIL labels
|
| 306 |
+
|
| 307 |
+
instance_map_path = os.path.join(output_dir, "instance_map.npy")
|
| 308 |
+
type_map_path = os.path.join(output_dir, "cell_type_map.npy")
|
| 309 |
+
np.save(instance_map_path, instance_map)
|
| 310 |
+
np.save(type_map_path, type_map)
|
| 311 |
+
|
| 312 |
+
# Step 5: Define the TIL cell type ID (e.g., 1 for TILs)
|
| 313 |
+
til_type_id = 1
|
| 314 |
+
|
| 315 |
+
# Step 6: Compute TIL counts
|
| 316 |
+
valid_cells = tissue_mask & (type_map == til_type_id)
|
| 317 |
+
total_cells = np.count_nonzero(valid_cells)
|
| 318 |
+
til_cells = np.count_nonzero(valid_cells & (type_map == til_type_id))
|
| 319 |
+
|
| 320 |
+
# Step 7: Compute densities
|
| 321 |
+
pixel_area_mm2 = (wsi.mpp ** 2) / 1e6 # convert μm² to mm²
|
| 322 |
+
roi_area_mm2 = np.count_nonzero(tissue_mask) * pixel_area_mm2
|
| 323 |
+
til_density = til_cells / roi_area_mm2 if roi_area_mm2 > 0 else float("nan")
|
| 324 |
+
total_density = total_cells / roi_area_mm2 if roi_area_mm2 > 0 else float("nan")
|
| 325 |
+
til_fraction = til_cells / total_cells if total_cells > 0 else float("nan")
|
| 326 |
+
|
| 327 |
+
# Step 6: Save metrics CSV
|
| 328 |
+
metrics = {
|
| 329 |
+
"total_nuclei": total_cells,
|
| 330 |
+
"til_nuclei": til_cells,
|
| 331 |
+
"til_fraction": til_fraction,
|
| 332 |
+
"til_density_per_mm2": til_density,
|
| 333 |
+
"total_density_per_mm2": total_density,
|
| 334 |
+
"roi_area_mm2": roi_area_mm2
|
| 335 |
+
}
|
| 336 |
+
metrics_df = pd.DataFrame([metrics])
|
| 337 |
+
metrics_path = os.path.join(output_dir, "metrics.csv")
|
| 338 |
+
metrics_df.to_csv(metrics_path, index=False)
|
| 339 |
+
|
| 340 |
+
# Step 7: Create and save overlay visualization
|
| 341 |
+
overlay = np.zeros((*type_map.shape, 3), dtype=np.uint8)
|
| 342 |
+
overlay[type_map == til_type_id] = [255, 0, 0] # red for TILs
|
| 343 |
+
overlay[(type_map != til_type_id) & (instance_map > 0)] = [0, 255, 0] # green for other nuclei
|
| 344 |
+
overlay_path = os.path.join(output_dir, "overlay.png")
|
| 345 |
+
plt.imsave(overlay_path, overlay)
|
| 346 |
+
|
| 347 |
+
# Step 8: Create and return research log
|
| 348 |
+
log = f"""
|
| 349 |
+
Research Log: Quantification of Tumor-Infiltrating Lymphocytes
|
| 350 |
+
Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 351 |
+
Image: {os.path.basename(image_path)}
|
| 352 |
+
|
| 353 |
+
Analysis Steps:
|
| 354 |
+
1. Loaded and preprocessed the whole slide image into upscaled tiles
|
| 355 |
+
2. Applied NuLite Nucleus Instance Segmentation and Classification on tiles
|
| 356 |
+
3. Computed and quantified TIL (based on inflammed cell class) and total nuclear density
|
| 357 |
+
|
| 358 |
+
Results:
|
| 359 |
+
- Total Nuclei: {int(total_cells)}
|
| 360 |
+
- Total Inflammed Nuclei: {int(til_cells)}
|
| 361 |
+
- Fiber Density: {til_density:.2f}
|
| 362 |
+
|
| 363 |
+
Output Files:
|
| 364 |
+
- Segmented Image: {os.path.basename(overlay_path)}
|
| 365 |
+
- Measurements: {os.path.basename(metrics_path)}
|
| 366 |
+
"""
|
| 367 |
+
|
| 368 |
+
return log
|
| 369 |
+
|
| 370 |
+
def quantify_fibrosis(image_path, model="grandqc", output_dir="./output"):
|
| 371 |
+
"""Quantifies Fibrosis from Whole Slide Images (WSIs).
|
| 372 |
+
|
| 373 |
+
Parameters
|
| 374 |
+
----------
|
| 375 |
+
image_path: str
|
| 376 |
+
Path to the image file.
|
| 377 |
+
output_dir: str, optional
|
| 378 |
+
Directory to save output files (default: "./output")
|
| 379 |
+
model: str, optional
|
| 380 |
+
Tissue segmentation model to use (default: grandqc)
|
| 381 |
+
|
| 382 |
+
Returns
|
| 383 |
+
-------
|
| 384 |
+
str
|
| 385 |
+
Research log summarizing analysis and results
|
| 386 |
+
"""
|
| 387 |
+
import os
|
| 388 |
+
import lazyslide as zs
|
| 389 |
+
from datetime import datetime
|
| 390 |
+
# Step 1: Load WSI via LazySlide
|
| 391 |
+
try:
|
| 392 |
+
wsi = zs.open_wsi(image_path)
|
| 393 |
+
except Exception as e:
|
| 394 |
+
return f"Error loading WSI: {str(e)}"
|
| 395 |
+
|
| 396 |
+
zs.seg.tissue(wsi, model=model)
|
| 397 |
+
log = f"""
|
| 398 |
+
Research Log: Template
|
| 399 |
+
Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 400 |
+
Image: {os.path.basename(image_path)}
|
| 401 |
+
|
| 402 |
+
Analysis Steps:
|
| 403 |
+
1.
|
| 404 |
+
2.
|
| 405 |
+
3.
|
| 406 |
+
|
| 407 |
+
Results:
|
| 408 |
+
-
|
| 409 |
+
-
|
| 410 |
+
-
|
| 411 |
+
|
| 412 |
+
Output Files:
|
| 413 |
+
-
|
| 414 |
+
-
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
"""
|
| 418 |
+
return log
|
| 419 |
+
|
| 420 |
+
# def template(image_path, output_dir="./output"):
|
| 421 |
+
# """Template.
|
| 422 |
+
|
| 423 |
+
# Parameters
|
| 424 |
+
# ----------
|
| 425 |
+
# image_path: str
|
| 426 |
+
# Path to the image file.
|
| 427 |
+
# output_dir: str, optional
|
| 428 |
+
# Directory to save output files (default: "./output")
|
| 429 |
+
|
| 430 |
+
# Returns
|
| 431 |
+
# -------
|
| 432 |
+
# str
|
| 433 |
+
# Research log summarizing analysis and results
|
| 434 |
+
# """
|
| 435 |
+
# # Step X
|
| 436 |
+
|
| 437 |
+
# log = f"""
|
| 438 |
+
# Research Log: Template
|
| 439 |
+
# Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 440 |
+
# Image: {os.path.basename(image_path)}
|
| 441 |
+
|
| 442 |
+
# Analysis Steps:
|
| 443 |
+
# 1.
|
| 444 |
+
# 2.
|
| 445 |
+
# 3.
|
| 446 |
+
|
| 447 |
+
# Results:
|
| 448 |
+
# -
|
| 449 |
+
# -
|
| 450 |
+
# -
|
| 451 |
+
|
| 452 |
+
# Output Files:
|
| 453 |
+
# -
|
| 454 |
+
# -
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
# """
|
| 458 |
+
# return log
|
histopath/tool/support_tools.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from io import StringIO
|
| 3 |
+
|
| 4 |
+
# Create a persistent namespace that will be shared across all executions
|
| 5 |
+
_persistent_namespace = {}
|
| 6 |
+
|
| 7 |
+
def run_python_repl(command: str) -> str:
|
| 8 |
+
"""Executes the provided Python command in a persistent environment and returns the output.
|
| 9 |
+
Variables defined in one execution will be available in subsequent executions.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def execute_in_repl(command: str) -> str:
|
| 13 |
+
"""Helper function to execute the command in the persistent environment."""
|
| 14 |
+
old_stdout = sys.stdout
|
| 15 |
+
sys.stdout = mystdout = StringIO()
|
| 16 |
+
|
| 17 |
+
# Use the persistent namespace
|
| 18 |
+
global _persistent_namespace
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
# Execute the command in the persistent namespace
|
| 22 |
+
exec(command, _persistent_namespace)
|
| 23 |
+
output = mystdout.getvalue()
|
| 24 |
+
except Exception as e:
|
| 25 |
+
output = f"Error: {str(e)}"
|
| 26 |
+
finally:
|
| 27 |
+
sys.stdout = old_stdout
|
| 28 |
+
return output
|
| 29 |
+
|
| 30 |
+
command = command.strip("```").strip()
|
| 31 |
+
return execute_in_repl(command)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def read_function_source_code(function_name: str) -> str:
|
| 35 |
+
"""Read the source code of a function from any module path.
|
| 36 |
+
|
| 37 |
+
Parameters
|
| 38 |
+
----------
|
| 39 |
+
function_name (str): Fully qualified function name (e.g., 'bioagentos.tool.support_tools.write_python_code')
|
| 40 |
+
|
| 41 |
+
Returns
|
| 42 |
+
-------
|
| 43 |
+
str: The source code of the function
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
import importlib
|
| 47 |
+
import inspect
|
| 48 |
+
|
| 49 |
+
# Split the function name into module path and function name
|
| 50 |
+
parts = function_name.split(".")
|
| 51 |
+
module_path = ".".join(parts[:-1])
|
| 52 |
+
func_name = parts[-1]
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
# Import the module
|
| 56 |
+
module = importlib.import_module(module_path)
|
| 57 |
+
|
| 58 |
+
# Get the function object from the module
|
| 59 |
+
function = getattr(module, func_name)
|
| 60 |
+
|
| 61 |
+
# Get the source code of the function
|
| 62 |
+
source_code = inspect.getsource(function)
|
| 63 |
+
|
| 64 |
+
return source_code
|
| 65 |
+
except (ImportError, AttributeError) as e:
|
| 66 |
+
return f"Error: Could not find function '{function_name}'. Details: {str(e)}"
|
histopath/tool/tool_description/__pycache__/pathology.cpython-311.pyc
ADDED
|
Binary file (1.77 kB). View file
|
|
|
histopath/tool/tool_description/__pycache__/support_tools.cpython-311.pyc
ADDED
|
Binary file (822 Bytes). View file
|
|
|
histopath/tool/tool_description/pathology.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
description = [
|
| 2 |
+
{
|
| 3 |
+
"description": "Perform zero-shot classification on a whole slide image",
|
| 4 |
+
"name": "zero_shot_classification",
|
| 5 |
+
"optional_parameters": [
|
| 6 |
+
{
|
| 7 |
+
"default": "./output",
|
| 8 |
+
"description": "Directory to save output files",
|
| 9 |
+
"name": "output_dir",
|
| 10 |
+
"type": "str"
|
| 11 |
+
}
|
| 12 |
+
],
|
| 13 |
+
"required_parameters": [
|
| 14 |
+
{
|
| 15 |
+
"default": None,
|
| 16 |
+
"description": "File path of the whole slide image",
|
| 17 |
+
"name": "image_path",
|
| 18 |
+
"type": "str"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"default": None,
|
| 22 |
+
"description": "Labels of the classes to perform zero-shot classification",
|
| 23 |
+
"name": "labels",
|
| 24 |
+
"type": "list"
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"description": "Segment a Whole Slide Image (WSI)",
|
| 30 |
+
"name": "segment_slide",
|
| 31 |
+
"optional_parameters": [
|
| 32 |
+
{
|
| 33 |
+
"default": "./output",
|
| 34 |
+
"description": "Directory to save output files",
|
| 35 |
+
"name": "output_dir",
|
| 36 |
+
"type": "str"
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"required_parameters": [
|
| 40 |
+
{
|
| 41 |
+
"default": None,
|
| 42 |
+
"description": "Path of the whole slide image",
|
| 43 |
+
"name": "image_path",
|
| 44 |
+
"type": "str"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"default": None,
|
| 48 |
+
"description": "Type of segmentation to perform",
|
| 49 |
+
"name": "seg_type",
|
| 50 |
+
"type": "str"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"default": None,
|
| 54 |
+
"description": "Segmentation model to use",
|
| 55 |
+
"name": "model",
|
| 56 |
+
"type": "str"
|
| 57 |
+
}
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"description": "Quantify Fibrosis from a Whole Slide Image",
|
| 62 |
+
"name": "quantify_fibrosis",
|
| 63 |
+
"optional_parameters": [
|
| 64 |
+
{
|
| 65 |
+
"default": "./output",
|
| 66 |
+
"description": "Directory to save output files",
|
| 67 |
+
"name": "output_dir",
|
| 68 |
+
"type": "str"
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"default": "grandqc",
|
| 72 |
+
"description": "Tissue segmentation model to use (default: grandqc)",
|
| 73 |
+
"name": "model",
|
| 74 |
+
"type": "str"
|
| 75 |
+
}
|
| 76 |
+
],
|
| 77 |
+
"required_parameters": [
|
| 78 |
+
{
|
| 79 |
+
"default": None,
|
| 80 |
+
"description": "Path to the whole slide image",
|
| 81 |
+
"name": "image_path",
|
| 82 |
+
"type": "str"
|
| 83 |
+
}
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"description": "Caption a whole slide image directly from the slide file or via tiled tissue patches from the slide file",
|
| 88 |
+
"name": "caption_slide",
|
| 89 |
+
"optional_parameters": [
|
| 90 |
+
{
|
| 91 |
+
"default": "./output",
|
| 92 |
+
"description": "Directory to save output files",
|
| 93 |
+
"name": "output_dir",
|
| 94 |
+
"type": "str"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"default": "Diagnosis:",
|
| 98 |
+
"description": "Starting prompt of the generated caption ",
|
| 99 |
+
"name": "prompt",
|
| 100 |
+
"type": "str"
|
| 101 |
+
}
|
| 102 |
+
],
|
| 103 |
+
"required_parameters": [
|
| 104 |
+
{
|
| 105 |
+
"default": None,
|
| 106 |
+
"description": "Path to the whole slide image",
|
| 107 |
+
"name": "image_path",
|
| 108 |
+
"type": "str"
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"default": None,
|
| 112 |
+
"description": "Name of the whole slide image file",
|
| 113 |
+
"name": "slide_name",
|
| 114 |
+
"type": "str"
|
| 115 |
+
}
|
| 116 |
+
]
|
| 117 |
+
}
|
| 118 |
+
]
|
| 119 |
+
# {
|
| 120 |
+
# "description": "Quantify Tumor-Infiltrating Lymphocytes from "
|
| 121 |
+
# "whole slide image data via identification of inflammed nuclei "
|
| 122 |
+
# "region fractions and density"
|
| 123 |
+
# "name": "quantify_tumor_infilitrating_lymphocytes",
|
| 124 |
+
# "optional_parameters": [
|
| 125 |
+
# {
|
| 126 |
+
# "default": "./output",
|
| 127 |
+
# "description": "Directory to save output files"
|
| 128 |
+
# "name": "output_dir",
|
| 129 |
+
# "type": "str
|
| 130 |
+
# },
|
| 131 |
+
# {
|
| 132 |
+
# "default": ""
|
| 133 |
+
# "description": "",
|
| 134 |
+
# "name": ""
|
| 135 |
+
# "type": ""
|
| 136 |
+
# },
|
| 137 |
+
# {
|
| 138 |
+
# "default": ""
|
| 139 |
+
# "description": "",
|
| 140 |
+
# "name": ""
|
| 141 |
+
# "type": ""
|
| 142 |
+
# },
|
| 143 |
+
# {
|
| 144 |
+
# "default": ""
|
| 145 |
+
# "description": "",
|
| 146 |
+
# "name": ""
|
| 147 |
+
# "type": ""
|
| 148 |
+
# },
|
| 149 |
+
# ],
|
| 150 |
+
# "required_parameters": [
|
| 151 |
+
# "default": None,
|
| 152 |
+
# "description": "Path to the whole slide image",
|
| 153 |
+
# "name": "image_path",
|
| 154 |
+
# "type": "str"
|
| 155 |
+
# ],
|
| 156 |
+
# },
|
histopath/tool/tool_description/support_tools.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
description = [
|
| 2 |
+
{
|
| 3 |
+
"description": "Executes the provided Python command in the notebook environment and returns the output.",
|
| 4 |
+
"name": "run_python_repl",
|
| 5 |
+
"optional_parameters": [],
|
| 6 |
+
"required_parameters": [
|
| 7 |
+
{
|
| 8 |
+
"default": None,
|
| 9 |
+
"description": "Python command to execute in the notebook environment",
|
| 10 |
+
"name": "command",
|
| 11 |
+
"type": "str",
|
| 12 |
+
}
|
| 13 |
+
],
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"description": "Read the source code of a function from any module path.",
|
| 17 |
+
"name": "read_function_source_code",
|
| 18 |
+
"optional_parameters": [],
|
| 19 |
+
"required_parameters": [
|
| 20 |
+
{
|
| 21 |
+
"default": None,
|
| 22 |
+
"description": "Fully qualified function name "
|
| 23 |
+
"(e.g., "
|
| 24 |
+
"'bioagentos.tool.support_tools.write_python_code')",
|
| 25 |
+
"name": "function_name",
|
| 26 |
+
"type": "str",
|
| 27 |
+
}
|
| 28 |
+
],
|
| 29 |
+
},
|
| 30 |
+
]
|
histopath/tool/tool_registry.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
class ToolRegistry:
|
| 6 |
+
def __init__(self, tools):
|
| 7 |
+
self.tools = []
|
| 8 |
+
self.next_id = 0
|
| 9 |
+
|
| 10 |
+
for j in tools.values():
|
| 11 |
+
for tool in j:
|
| 12 |
+
self.register_tool(tool)
|
| 13 |
+
|
| 14 |
+
docs = []
|
| 15 |
+
for tool_id in range(len(self.tools)):
|
| 16 |
+
docs.append([int(tool_id), self.get_tool_by_id(int(tool_id))])
|
| 17 |
+
self.document_df = pd.DataFrame(docs, columns=["docid", "document_content"])
|
| 18 |
+
|
| 19 |
+
def register_tool(self, tool):
|
| 20 |
+
if self.validate_tool(tool):
|
| 21 |
+
tool["id"] = self.next_id
|
| 22 |
+
self.tools.append(tool)
|
| 23 |
+
self.next_id += 1
|
| 24 |
+
else:
|
| 25 |
+
raise ValueError("Invalid tool format")
|
| 26 |
+
|
| 27 |
+
def validate_tool(self, tool):
|
| 28 |
+
required_keys = ["name", "description", "required_parameters"]
|
| 29 |
+
return all(key in tool for key in required_keys)
|
| 30 |
+
|
| 31 |
+
def get_tool_by_name(self, name):
|
| 32 |
+
for tool in self.tools:
|
| 33 |
+
if tool["name"] == name:
|
| 34 |
+
return tool
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
def get_tool_by_id(self, tool_id):
|
| 38 |
+
for tool in self.tools:
|
| 39 |
+
if tool["id"] == tool_id:
|
| 40 |
+
return tool
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
def get_id_by_name(self, name):
|
| 44 |
+
for tool in self.tools:
|
| 45 |
+
if tool["name"] == name:
|
| 46 |
+
return tool["id"]
|
| 47 |
+
return None
|
| 48 |
+
|
| 49 |
+
def get_name_by_id(self, tool_id):
|
| 50 |
+
for tool in self.tools:
|
| 51 |
+
if tool["id"] == tool_id:
|
| 52 |
+
return tool["name"]
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
def list_tools(self):
|
| 56 |
+
return [{"name": tool["name"], "id": tool["id"]} for tool in self.tools]
|
| 57 |
+
|
| 58 |
+
def remove_tool_by_id(self, tool_id):
|
| 59 |
+
# Remove the tool with the given id
|
| 60 |
+
tool = self.get_tool_by_id(tool_id)
|
| 61 |
+
if tool:
|
| 62 |
+
self.tools = [t for t in self.tools if t["id"] != tool_id]
|
| 63 |
+
return True
|
| 64 |
+
return False
|
| 65 |
+
|
| 66 |
+
def remove_tool_by_name(self, name):
|
| 67 |
+
# Remove the tool with the given name
|
| 68 |
+
tool = self.get_tool_by_name(name)
|
| 69 |
+
if tool:
|
| 70 |
+
self.tools = [t for t in self.tools if t["name"] != name]
|
| 71 |
+
return True
|
| 72 |
+
return False
|
| 73 |
+
|
| 74 |
+
def save_registry(self, filename):
|
| 75 |
+
with open(filename, "wb") as file:
|
| 76 |
+
pickle.dump(self, file)
|
| 77 |
+
|
| 78 |
+
# def get_langchain_tool_by_id(self, id):
|
| 79 |
+
# return self.langchain_tools[id]
|
| 80 |
+
|
| 81 |
+
@staticmethod
|
| 82 |
+
def load_registry(filename):
|
| 83 |
+
with open(filename, "rb") as file:
|
| 84 |
+
return pickle.load(file)
|
histopath/utils.py
ADDED
|
@@ -0,0 +1,722 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import enum
|
| 3 |
+
import importlib
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import pickle
|
| 7 |
+
import subprocess
|
| 8 |
+
import tempfile
|
| 9 |
+
import traceback
|
| 10 |
+
import zipfile
|
| 11 |
+
from typing import Any, ClassVar
|
| 12 |
+
from urllib.parse import urljoin
|
| 13 |
+
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import requests
|
| 16 |
+
import tqdm # Add tqdm for progress bar
|
| 17 |
+
from langchain_core.callbacks import BaseCallbackHandler
|
| 18 |
+
from langchain_core.messages.base import get_msg_title_repr
|
| 19 |
+
from langchain_core.tools import StructuredTool
|
| 20 |
+
from langchain_core.utils.interactive_env import is_interactive_env
|
| 21 |
+
from pydantic import BaseModel, Field, ValidationError
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def run_bash_script(script: str) -> str:
|
| 25 |
+
"""Run a Bash script using subprocess.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
script: Bash script to run
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Output of the Bash script
|
| 32 |
+
|
| 33 |
+
Example:
|
| 34 |
+
```
|
| 35 |
+
# Example of a complex Bash script
|
| 36 |
+
script = '''
|
| 37 |
+
#!/bin/bash
|
| 38 |
+
|
| 39 |
+
# Define variables
|
| 40 |
+
DATA_DIR="/path/to/data"
|
| 41 |
+
OUTPUT_FILE="results.txt"
|
| 42 |
+
|
| 43 |
+
# Create output directory if it doesn't exist
|
| 44 |
+
mkdir -p $(dirname $OUTPUT_FILE)
|
| 45 |
+
|
| 46 |
+
# Loop through files
|
| 47 |
+
for file in $DATA_DIR/*.txt; do
|
| 48 |
+
echo "Processing $file..."
|
| 49 |
+
# Count lines in each file
|
| 50 |
+
line_count=$(wc -l < $file)
|
| 51 |
+
echo "$file: $line_count lines" >> $OUTPUT_FILE
|
| 52 |
+
done
|
| 53 |
+
|
| 54 |
+
echo "Processing complete. Results saved to $OUTPUT_FILE"
|
| 55 |
+
'''
|
| 56 |
+
result = run_bash_script(script)
|
| 57 |
+
print(result)
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
# Trim any leading/trailing whitespace
|
| 63 |
+
script = script.strip()
|
| 64 |
+
|
| 65 |
+
# If the script is empty, return an error
|
| 66 |
+
if not script:
|
| 67 |
+
return "Error: Empty script"
|
| 68 |
+
|
| 69 |
+
# Create a temporary file to store the Bash script
|
| 70 |
+
with tempfile.NamedTemporaryFile(suffix=".sh", mode="w", delete=False) as f:
|
| 71 |
+
# Add shebang if not present
|
| 72 |
+
if not script.startswith("#!/"):
|
| 73 |
+
f.write("#!/bin/bash\n")
|
| 74 |
+
# Add set -e to exit on error
|
| 75 |
+
if "set -e" not in script:
|
| 76 |
+
f.write("set -e\n")
|
| 77 |
+
f.write(script)
|
| 78 |
+
temp_file = f.name
|
| 79 |
+
|
| 80 |
+
# Make the script executable
|
| 81 |
+
os.chmod(temp_file, 0o755)
|
| 82 |
+
|
| 83 |
+
# Get current environment variables and working directory
|
| 84 |
+
env = os.environ.copy()
|
| 85 |
+
cwd = os.getcwd()
|
| 86 |
+
|
| 87 |
+
# Run the Bash script with the current environment and working directory
|
| 88 |
+
result = subprocess.run(
|
| 89 |
+
[temp_file],
|
| 90 |
+
shell=True,
|
| 91 |
+
capture_output=True,
|
| 92 |
+
text=True,
|
| 93 |
+
check=False,
|
| 94 |
+
env=env,
|
| 95 |
+
cwd=cwd,
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Clean up the temporary file
|
| 99 |
+
os.unlink(temp_file)
|
| 100 |
+
|
| 101 |
+
# Return the output
|
| 102 |
+
if result.returncode != 0:
|
| 103 |
+
traceback.print_stack()
|
| 104 |
+
print(result)
|
| 105 |
+
return f"Error running Bash script (exit code {result.returncode}):\n{result.stderr}"
|
| 106 |
+
else:
|
| 107 |
+
return result.stdout
|
| 108 |
+
except Exception as e:
|
| 109 |
+
traceback.print_exc()
|
| 110 |
+
return f"Error running Bash script: {str(e)}"
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# Keep the run_cli_command for backward compatibility
|
| 114 |
+
def run_cli_command(command: str) -> str:
|
| 115 |
+
"""Run a CLI command using subprocess.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
command: CLI command to run
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Output of the CLI command
|
| 122 |
+
|
| 123 |
+
"""
|
| 124 |
+
try:
|
| 125 |
+
# Trim any leading/trailing whitespace
|
| 126 |
+
command = command.strip()
|
| 127 |
+
|
| 128 |
+
# If the command is empty, return an error
|
| 129 |
+
if not command:
|
| 130 |
+
return "Error: Empty command"
|
| 131 |
+
|
| 132 |
+
# Split the command into a list of arguments, handling quoted arguments correctly
|
| 133 |
+
import shlex
|
| 134 |
+
|
| 135 |
+
args = shlex.split(command)
|
| 136 |
+
|
| 137 |
+
# Run the command
|
| 138 |
+
result = subprocess.run(args, capture_output=True, text=True, check=False)
|
| 139 |
+
|
| 140 |
+
# Return the output
|
| 141 |
+
if result.returncode != 0:
|
| 142 |
+
return f"Error running command '{command}':\n{result.stderr}"
|
| 143 |
+
else:
|
| 144 |
+
return result.stdout
|
| 145 |
+
except Exception as e:
|
| 146 |
+
return f"Error running command '{command}': {str(e)}"
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def run_with_timeout(func, args=None, kwargs=None, timeout=600):
|
| 150 |
+
"""Run a function with a timeout using threading instead of multiprocessing.
|
| 151 |
+
This allows variables to persist in the global namespace between function calls.
|
| 152 |
+
Returns the function result or a timeout error message.
|
| 153 |
+
"""
|
| 154 |
+
if args is None:
|
| 155 |
+
args = []
|
| 156 |
+
if kwargs is None:
|
| 157 |
+
kwargs = {}
|
| 158 |
+
|
| 159 |
+
import ctypes
|
| 160 |
+
import queue
|
| 161 |
+
import threading
|
| 162 |
+
|
| 163 |
+
result_queue = queue.Queue()
|
| 164 |
+
|
| 165 |
+
def thread_func(func, args, kwargs, result_queue):
|
| 166 |
+
"""Function to run in a separate thread."""
|
| 167 |
+
try:
|
| 168 |
+
result = func(*args, **kwargs)
|
| 169 |
+
result_queue.put(("success", result))
|
| 170 |
+
except Exception as e:
|
| 171 |
+
result_queue.put(("error", str(e)))
|
| 172 |
+
|
| 173 |
+
# Start a separate thread
|
| 174 |
+
thread = threading.Thread(target=thread_func, args=(func, args, kwargs, result_queue))
|
| 175 |
+
thread.daemon = True # Set as daemon so it will be killed when main thread exits
|
| 176 |
+
thread.start()
|
| 177 |
+
|
| 178 |
+
# Wait for the specified timeout
|
| 179 |
+
thread.join(timeout)
|
| 180 |
+
|
| 181 |
+
# Check if the thread is still running after timeout
|
| 182 |
+
if thread.is_alive():
|
| 183 |
+
print(f"TIMEOUT: Code execution timed out after {timeout} seconds")
|
| 184 |
+
|
| 185 |
+
# Unfortunately, there's no clean way to force terminate a thread in Python
|
| 186 |
+
# The recommended approach is to use daemon threads and let them be killed when main thread exits
|
| 187 |
+
# Here, we'll try to raise an exception in the thread to make it stop
|
| 188 |
+
try:
|
| 189 |
+
# Get thread ID and try to terminate it
|
| 190 |
+
thread_id = thread.ident
|
| 191 |
+
if thread_id:
|
| 192 |
+
# This is a bit dangerous and not 100% reliable
|
| 193 |
+
# It attempts to raise a SystemExit exception in the thread
|
| 194 |
+
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), ctypes.py_object(SystemExit))
|
| 195 |
+
if res > 1:
|
| 196 |
+
# Oops, we raised too many exceptions
|
| 197 |
+
ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(thread_id), None)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
print(f"Error trying to terminate thread: {e}")
|
| 200 |
+
|
| 201 |
+
return f"ERROR: Code execution timed out after {timeout} seconds. Please try with simpler inputs or break your task into smaller steps."
|
| 202 |
+
|
| 203 |
+
# Get the result from the queue if available
|
| 204 |
+
try:
|
| 205 |
+
status, result = result_queue.get(block=False)
|
| 206 |
+
return result if status == "success" else f"Error in execution: {result}"
|
| 207 |
+
except queue.Empty:
|
| 208 |
+
return "Error: Execution completed but no result was returned"
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
class api_schema(BaseModel):
|
| 212 |
+
"""api schema specification."""
|
| 213 |
+
|
| 214 |
+
api_schema: str | None = Field(description="The api schema as a dictionary")
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def function_to_api_schema(function_string, llm):
|
| 218 |
+
prompt = """
|
| 219 |
+
Based on a code snippet and help me write an API docstring in the format like this:
|
| 220 |
+
|
| 221 |
+
{{'name': 'get_gene_set_enrichment',
|
| 222 |
+
'description': 'Given a list of genes, identify a pathway that is enriched for this gene set. Return a list of pathway name, p-value, z-scores.',
|
| 223 |
+
'required_parameters': [{{'name': 'genes',
|
| 224 |
+
'type': 'List[str]',
|
| 225 |
+
'description': 'List of g`ene symbols to analyze',
|
| 226 |
+
'default': None}}],
|
| 227 |
+
'optional_parameters': [{{'name': 'top_k',
|
| 228 |
+
'type': 'int',
|
| 229 |
+
'description': 'Top K pathways to return',
|
| 230 |
+
'default': 10}}, {{'name': 'database',
|
| 231 |
+
'type': 'str',
|
| 232 |
+
'description': 'Name of the database to use for enrichment analysis',
|
| 233 |
+
'default': "gene_ontology"}}]}}
|
| 234 |
+
|
| 235 |
+
Strictly follow the input from the function - don't create fake optional parameters.
|
| 236 |
+
For variable without default values, set them as None, not null.
|
| 237 |
+
For variable with boolean values, use capitalized True or False, not true or false.
|
| 238 |
+
Do not add any return type in the docstring.
|
| 239 |
+
Be as clear and succint as possible for the descriptions. Please do not make it overly verbose.
|
| 240 |
+
Here is the code snippet:
|
| 241 |
+
{code}
|
| 242 |
+
"""
|
| 243 |
+
llm = llm.with_structured_output(api_schema)
|
| 244 |
+
|
| 245 |
+
for _ in range(7):
|
| 246 |
+
try:
|
| 247 |
+
api = llm.invoke(prompt.format(code=function_string)).dict()["api_schema"]
|
| 248 |
+
return ast.literal_eval(api) # -> prefer "default": None
|
| 249 |
+
# return json.loads(api) # -> prefer "default": null
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print("API string:", api)
|
| 252 |
+
print("Error parsing the API string:", e)
|
| 253 |
+
continue
|
| 254 |
+
|
| 255 |
+
return "Error: Could not parse the API schema"
|
| 256 |
+
# return
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def get_all_functions_from_file(file_path):
|
| 260 |
+
with open(file_path) as file:
|
| 261 |
+
file_content = file.read()
|
| 262 |
+
|
| 263 |
+
# Parse the file content into an AST (Abstract Syntax Tree)
|
| 264 |
+
tree = ast.parse(file_content)
|
| 265 |
+
|
| 266 |
+
# List to hold the top-level functions as strings
|
| 267 |
+
functions = []
|
| 268 |
+
|
| 269 |
+
# Walk through the AST nodes
|
| 270 |
+
for node in tree.body: # Only consider top-level nodes in the body
|
| 271 |
+
if isinstance(node, ast.FunctionDef): # Check if the node is a function definition
|
| 272 |
+
# Skip if function name starts with underscore
|
| 273 |
+
if node.name.startswith("_"):
|
| 274 |
+
continue
|
| 275 |
+
|
| 276 |
+
start_line = node.lineno - 1 # Get the starting line of the function
|
| 277 |
+
end_line = node.end_lineno # Get the ending line of the function (only available in Python 3.8+)
|
| 278 |
+
func_code = file_content.splitlines()[start_line:end_line]
|
| 279 |
+
functions.append("\n".join(func_code)) # Join lines of the function and add to the list
|
| 280 |
+
|
| 281 |
+
return functions
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def write_python_code(request: str):
|
| 285 |
+
from langchain_anthropic import ChatAnthropic
|
| 286 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 287 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 288 |
+
|
| 289 |
+
model = ChatAnthropic(model="claude-3-5-sonnet-20240620")
|
| 290 |
+
template = """Write some python code to solve the user's problem.
|
| 291 |
+
|
| 292 |
+
Return only python code in Markdown format, e.g.:
|
| 293 |
+
|
| 294 |
+
```python
|
| 295 |
+
....
|
| 296 |
+
```"""
|
| 297 |
+
prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")])
|
| 298 |
+
|
| 299 |
+
def _sanitize_output(text: str):
|
| 300 |
+
_, after = text.split("```python")
|
| 301 |
+
return after.split("```")[0]
|
| 302 |
+
|
| 303 |
+
chain = prompt | model | StrOutputParser() | _sanitize_output
|
| 304 |
+
return chain.invoke({"input": "write a code that " + request})
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def execute_graphql_query(
|
| 308 |
+
query: str,
|
| 309 |
+
variables: dict,
|
| 310 |
+
api_address: str = "https://api.genetics.opentargets.org/graphql",
|
| 311 |
+
) -> dict:
|
| 312 |
+
"""Executes a GraphQL query with variables and returns the data as a dictionary."""
|
| 313 |
+
headers = {"Content-Type": "application/json"}
|
| 314 |
+
response = requests.post(api_address, json={"query": query, "variables": variables}, headers=headers)
|
| 315 |
+
if response.status_code == 200:
|
| 316 |
+
return response.json()
|
| 317 |
+
else:
|
| 318 |
+
print(response.text)
|
| 319 |
+
response.raise_for_status()
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def get_tool_decorated_functions(relative_path):
|
| 323 |
+
import ast
|
| 324 |
+
import importlib.util
|
| 325 |
+
import os
|
| 326 |
+
|
| 327 |
+
# Get the directory of the current file (__init__.py)
|
| 328 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 329 |
+
|
| 330 |
+
# Construct the absolute path from the relative path
|
| 331 |
+
file_path = os.path.join(current_dir, relative_path)
|
| 332 |
+
|
| 333 |
+
with open(file_path) as file:
|
| 334 |
+
tree = ast.parse(file.read(), filename=file_path)
|
| 335 |
+
|
| 336 |
+
tool_function_names = []
|
| 337 |
+
|
| 338 |
+
for node in ast.walk(tree):
|
| 339 |
+
if isinstance(node, ast.FunctionDef):
|
| 340 |
+
for decorator in node.decorator_list:
|
| 341 |
+
if (
|
| 342 |
+
isinstance(decorator, ast.Name)
|
| 343 |
+
and decorator.id == "tool"
|
| 344 |
+
or (
|
| 345 |
+
isinstance(decorator, ast.Call)
|
| 346 |
+
and isinstance(decorator.func, ast.Name)
|
| 347 |
+
and decorator.func.id == "tool"
|
| 348 |
+
)
|
| 349 |
+
):
|
| 350 |
+
tool_function_names.append(node.name)
|
| 351 |
+
|
| 352 |
+
# Calculate the module name from the relative path
|
| 353 |
+
package_path = os.path.relpath(file_path, start=current_dir)
|
| 354 |
+
module_name = package_path.replace(os.path.sep, ".").rsplit(".", 1)[0]
|
| 355 |
+
|
| 356 |
+
# Import the module and get the function objects
|
| 357 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 358 |
+
module = importlib.util.module_from_spec(spec)
|
| 359 |
+
spec.loader.exec_module(module)
|
| 360 |
+
|
| 361 |
+
tool_functions = [getattr(module, name) for name in tool_function_names]
|
| 362 |
+
|
| 363 |
+
return tool_functions
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def load_pickle(file):
|
| 367 |
+
import pickle
|
| 368 |
+
|
| 369 |
+
with open(file, "rb") as f:
|
| 370 |
+
return pickle.load(f)
|
| 371 |
+
|
| 372 |
+
def pretty_print(message, printout=True):
|
| 373 |
+
if isinstance(message, tuple):
|
| 374 |
+
title = message
|
| 375 |
+
elif isinstance(message.content, list):
|
| 376 |
+
title = get_msg_title_repr(message.type.title().upper() + " Message", bold=is_interactive_env())
|
| 377 |
+
if message.name is not None:
|
| 378 |
+
title += f"\nName: {message.name}"
|
| 379 |
+
|
| 380 |
+
for i in message.content:
|
| 381 |
+
if i["type"] == "text":
|
| 382 |
+
title += f"\n{i['text']}\n"
|
| 383 |
+
elif i["type"] == "tool_use":
|
| 384 |
+
title += f"\nTool: {i['name']}"
|
| 385 |
+
title += f"\nInput: {i['input']}"
|
| 386 |
+
if printout:
|
| 387 |
+
print(f"{title}")
|
| 388 |
+
else:
|
| 389 |
+
title = get_msg_title_repr(message.type.title() + " Message", bold=is_interactive_env())
|
| 390 |
+
if message.name is not None:
|
| 391 |
+
title += f"\nName: {message.name}"
|
| 392 |
+
title += f"\n\n{message.content}"
|
| 393 |
+
if printout:
|
| 394 |
+
print(f"{title}")
|
| 395 |
+
return title
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
class CustomBaseModel(BaseModel):
|
| 399 |
+
api_schema: ClassVar[dict] = None # Class variable to store api_schema
|
| 400 |
+
|
| 401 |
+
# Add model_config with arbitrary_types_allowed=True
|
| 402 |
+
model_config = {"arbitrary_types_allowed": True}
|
| 403 |
+
|
| 404 |
+
@classmethod
|
| 405 |
+
def set_api_schema(cls, schema: dict):
|
| 406 |
+
cls.api_schema = schema
|
| 407 |
+
|
| 408 |
+
@classmethod
|
| 409 |
+
def model_validate(cls, obj):
|
| 410 |
+
try:
|
| 411 |
+
return super().model_validate(obj)
|
| 412 |
+
except (ValidationError, AttributeError) as e:
|
| 413 |
+
if not cls.api_schema:
|
| 414 |
+
raise e # If no api_schema is set, raise original error
|
| 415 |
+
|
| 416 |
+
error_msg = "Required Parameters:\n"
|
| 417 |
+
for param in cls.api_schema["required_parameters"]:
|
| 418 |
+
error_msg += f"- {param['name']} ({param['type']}): {param['description']}\n"
|
| 419 |
+
|
| 420 |
+
error_msg += "\nErrors:\n"
|
| 421 |
+
for err in e.errors():
|
| 422 |
+
field = err["loc"][0] if err["loc"] else "input"
|
| 423 |
+
error_msg += f"- {field}: {err['msg']}\n"
|
| 424 |
+
|
| 425 |
+
if not obj:
|
| 426 |
+
error_msg += "\nNo input provided"
|
| 427 |
+
else:
|
| 428 |
+
error_msg += "\nProvided Input:\n"
|
| 429 |
+
for key, value in obj.items():
|
| 430 |
+
error_msg += f"- {key}: {value}\n"
|
| 431 |
+
|
| 432 |
+
missing_params = {param["name"] for param in cls.api_schema["required_parameters"]} - set(obj.keys())
|
| 433 |
+
if missing_params:
|
| 434 |
+
error_msg += "\nMissing Parameters:\n"
|
| 435 |
+
for param in missing_params:
|
| 436 |
+
error_msg += f"- {param}\n"
|
| 437 |
+
|
| 438 |
+
# # Create proper validation error structure
|
| 439 |
+
raise ValidationError.from_exception_data(
|
| 440 |
+
title="Validation Error",
|
| 441 |
+
line_errors=[
|
| 442 |
+
{
|
| 443 |
+
"type": "value_error",
|
| 444 |
+
"loc": ("input",),
|
| 445 |
+
"input": obj,
|
| 446 |
+
"ctx": {
|
| 447 |
+
"error": error_msg,
|
| 448 |
+
},
|
| 449 |
+
}
|
| 450 |
+
],
|
| 451 |
+
) from None
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
def safe_execute_decorator(func):
|
| 455 |
+
def wrapper(*args, **kwargs):
|
| 456 |
+
try:
|
| 457 |
+
return func(*args, **kwargs)
|
| 458 |
+
except Exception as e:
|
| 459 |
+
return str(e)
|
| 460 |
+
|
| 461 |
+
return wrapper
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
def api_schema_to_langchain_tool(api_schema, mode="generated_tool", module_name=None):
|
| 465 |
+
if mode == "generated_tool":
|
| 466 |
+
module = importlib.import_module("histopath.tool.generated_tool." + api_schema["tool_name"] + ".api")
|
| 467 |
+
elif mode == "custom_tool":
|
| 468 |
+
module = importlib.import_module(module_name)
|
| 469 |
+
|
| 470 |
+
api_function = getattr(module, api_schema["name"])
|
| 471 |
+
api_function = safe_execute_decorator(api_function)
|
| 472 |
+
|
| 473 |
+
# Define a mapping from string type names to actual Python type objects
|
| 474 |
+
type_mapping = {
|
| 475 |
+
"string": str,
|
| 476 |
+
"integer": int,
|
| 477 |
+
"boolean": bool,
|
| 478 |
+
"pandas": pd.DataFrame, # Use the imported pandas.DataFrame directly
|
| 479 |
+
"str": str,
|
| 480 |
+
"int": int,
|
| 481 |
+
"bool": bool,
|
| 482 |
+
"List[str]": list[str],
|
| 483 |
+
"List[int]": list[int],
|
| 484 |
+
"Dict": dict,
|
| 485 |
+
"Any": Any,
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
# Create the fields and annotations
|
| 489 |
+
annotations = {}
|
| 490 |
+
for param in api_schema["required_parameters"]:
|
| 491 |
+
param_type = param["type"]
|
| 492 |
+
if param_type in type_mapping:
|
| 493 |
+
annotations[param["name"]] = type_mapping[param_type]
|
| 494 |
+
else:
|
| 495 |
+
# For types not in the mapping, try a safer approach than direct eval
|
| 496 |
+
try:
|
| 497 |
+
annotations[param["name"]] = eval(param_type)
|
| 498 |
+
except (NameError, SyntaxError):
|
| 499 |
+
# Default to Any for unknown types
|
| 500 |
+
annotations[param["name"]] = Any
|
| 501 |
+
|
| 502 |
+
fields = {param["name"]: Field(description=param["description"]) for param in api_schema["required_parameters"]}
|
| 503 |
+
|
| 504 |
+
# Create the ApiInput class dynamically
|
| 505 |
+
ApiInput = type("Input", (CustomBaseModel,), {"__annotations__": annotations, **fields})
|
| 506 |
+
# Set the api_schema
|
| 507 |
+
ApiInput.set_api_schema(api_schema)
|
| 508 |
+
|
| 509 |
+
# Create the StructuredTool
|
| 510 |
+
api_tool = StructuredTool.from_function(
|
| 511 |
+
func=api_function,
|
| 512 |
+
name=api_schema["name"],
|
| 513 |
+
description=api_schema["description"],
|
| 514 |
+
args_schema=ApiInput,
|
| 515 |
+
return_direct=True,
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
return api_tool
|
| 519 |
+
|
| 520 |
+
class ID(enum.Enum):
|
| 521 |
+
ENTREZ = "Entrez"
|
| 522 |
+
ENSEMBL = "Ensembl without version" # e.g. ENSG00000123374
|
| 523 |
+
ENSEMBL_W_VERSION = "Ensembl with version" # e.g. ENSG00000123374.10 (needed for GTEx)
|
| 524 |
+
|
| 525 |
+
def save_pkl(f, filename):
|
| 526 |
+
with open(filename, "wb") as file:
|
| 527 |
+
pickle.dump(f, file)
|
| 528 |
+
|
| 529 |
+
def load_pkl(filename):
|
| 530 |
+
with open(filename, "rb") as file:
|
| 531 |
+
return pickle.load(file)
|
| 532 |
+
|
| 533 |
+
_TEXT_COLOR_MAPPING = {
|
| 534 |
+
"blue": "36;1",
|
| 535 |
+
"yellow": "33;1",
|
| 536 |
+
"pink": "38;5;200",
|
| 537 |
+
"green": "32;1",
|
| 538 |
+
"red": "31;1",
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
def color_print(text, color="blue"):
|
| 542 |
+
color_str = _TEXT_COLOR_MAPPING[color]
|
| 543 |
+
print(f"\u001b[{color_str}m\033[1;3m{text}\u001b[0m")
|
| 544 |
+
|
| 545 |
+
class PromptLogger(BaseCallbackHandler):
|
| 546 |
+
def on_chat_model_start(self, serialized, messages, **kwargs):
|
| 547 |
+
for message in messages[0]:
|
| 548 |
+
color_print(message.pretty_repr(), color="green")
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
class NodeLogger(BaseCallbackHandler):
|
| 552 |
+
def on_llm_end(self, response, **kwargs): # response of type LLMResult
|
| 553 |
+
for generations in response.generations: # response.generations of type List[List[Generations]] becuase "each input could have multiple candidate generations"
|
| 554 |
+
for generation in generations:
|
| 555 |
+
generated_text = generation.message.content
|
| 556 |
+
# token_usage = generation.message.response_metadata["token_usage"]
|
| 557 |
+
color_print(generated_text, color="yellow")
|
| 558 |
+
|
| 559 |
+
def on_agent_action(self, action, **kwargs):
|
| 560 |
+
color_print(action.log, color="pink")
|
| 561 |
+
|
| 562 |
+
def on_agent_finish(self, finish, **kwargs):
|
| 563 |
+
color_print(finish, color="red")
|
| 564 |
+
|
| 565 |
+
def on_tool_start(self, serialized, input_str, **kwargs):
|
| 566 |
+
tool_name = serialized.get("name")
|
| 567 |
+
color_print(f"Calling {tool_name} with inputs: {input_str}", color="pink")
|
| 568 |
+
|
| 569 |
+
def on_tool_end(self, output, **kwargs):
|
| 570 |
+
output = str(output)
|
| 571 |
+
color_print(output, color="blue")
|
| 572 |
+
|
| 573 |
+
|
| 574 |
+
def check_or_create_path(path=None):
|
| 575 |
+
# Set a default path if none is provided
|
| 576 |
+
if path is None:
|
| 577 |
+
path = os.path.join(os.getcwd(), "tmp_directory")
|
| 578 |
+
|
| 579 |
+
# Check if the path exists
|
| 580 |
+
if not os.path.exists(path):
|
| 581 |
+
# If it doesn't exist, create the directory
|
| 582 |
+
os.makedirs(path)
|
| 583 |
+
print(f"Directory created at: {path}")
|
| 584 |
+
else:
|
| 585 |
+
print(f"Directory already exists at: {path}")
|
| 586 |
+
|
| 587 |
+
return path
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
def langchain_to_gradio_message(message):
|
| 591 |
+
# Build the title and content based on the message type
|
| 592 |
+
if isinstance(message.content, list):
|
| 593 |
+
# For a message with multiple content items (like text and tool use)
|
| 594 |
+
gradio_messages = []
|
| 595 |
+
for item in message.content:
|
| 596 |
+
gradio_message = {
|
| 597 |
+
"role": "user" if message.type == "human" else "assistant",
|
| 598 |
+
"content": "",
|
| 599 |
+
"metadata": {},
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
if item["type"] == "text":
|
| 603 |
+
item["text"] = item["text"].replace("<think>", "\n")
|
| 604 |
+
item["text"] = item["text"].replace("</think>", "\n")
|
| 605 |
+
gradio_message["content"] += f"{item['text']}\n"
|
| 606 |
+
gradio_messages.append(gradio_message)
|
| 607 |
+
elif item["type"] == "tool_use":
|
| 608 |
+
if item["name"] == "run_python_repl":
|
| 609 |
+
gradio_message["metadata"]["title"] = "🛠️ Writing code..."
|
| 610 |
+
# input = "```python {code_block}```\n".format(code_block=item['input']["command"])
|
| 611 |
+
gradio_message["metadata"]["log"] = "Executing Code block..."
|
| 612 |
+
gradio_message["content"] = f"##### Code: \n ```python \n {item['input']['command']} \n``` \n"
|
| 613 |
+
else:
|
| 614 |
+
gradio_message["metadata"]["title"] = f"🛠️ Used tool ```{item['name']}```"
|
| 615 |
+
to_print = ";".join([i + ": " + str(j) for i, j in item["input"].items()])
|
| 616 |
+
gradio_message["metadata"]["log"] = f"🔍 Input -- {to_print}\n"
|
| 617 |
+
gradio_message["metadata"]["status"] = "pending"
|
| 618 |
+
gradio_messages.append(gradio_message)
|
| 619 |
+
|
| 620 |
+
else:
|
| 621 |
+
gradio_message = {
|
| 622 |
+
"role": "user" if message.type == "human" else "assistant",
|
| 623 |
+
"content": "",
|
| 624 |
+
"metadata": {},
|
| 625 |
+
}
|
| 626 |
+
print(message)
|
| 627 |
+
content = message.content
|
| 628 |
+
content = content.replace("<think>", "\n")
|
| 629 |
+
content = content.replace("</think>", "\n")
|
| 630 |
+
content = content.replace("<solution>", "\n")
|
| 631 |
+
content = content.replace("</solution>", "\n")
|
| 632 |
+
|
| 633 |
+
gradio_message["content"] = content
|
| 634 |
+
gradio_messages = [gradio_message]
|
| 635 |
+
return gradio_messages
|
| 636 |
+
|
| 637 |
+
|
| 638 |
+
def parse_hpo_obo(file_path):
|
| 639 |
+
"""Parse the HPO OBO file and create a dictionary mapping HP IDs to phenotype descriptions.
|
| 640 |
+
|
| 641 |
+
Args:
|
| 642 |
+
file_path (str): Path to the HPO OBO file.
|
| 643 |
+
|
| 644 |
+
Returns:
|
| 645 |
+
dict: A dictionary where keys are HP IDs and values are phenotype descriptions.
|
| 646 |
+
|
| 647 |
+
"""
|
| 648 |
+
hp_dict = {}
|
| 649 |
+
current_id = None
|
| 650 |
+
current_name = None
|
| 651 |
+
|
| 652 |
+
with open(file_path) as file:
|
| 653 |
+
for line in file:
|
| 654 |
+
line = line.strip()
|
| 655 |
+
if line.startswith("[Term]"):
|
| 656 |
+
# If a new term block starts, save the previous term
|
| 657 |
+
if current_id and current_name:
|
| 658 |
+
hp_dict[current_id] = current_name
|
| 659 |
+
current_id = None
|
| 660 |
+
current_name = None
|
| 661 |
+
elif line.startswith("id: HP:"):
|
| 662 |
+
current_id = line.split(": ")[1]
|
| 663 |
+
elif line.startswith("name:"):
|
| 664 |
+
current_name = line.split(": ", 1)[1]
|
| 665 |
+
|
| 666 |
+
# Add the last term to the dictionary
|
| 667 |
+
if current_id and current_name:
|
| 668 |
+
hp_dict[current_id] = current_name
|
| 669 |
+
|
| 670 |
+
return hp_dict
|
| 671 |
+
|
| 672 |
+
|
| 673 |
+
def textify_api_dict(api_dict):
|
| 674 |
+
"""Convert a nested API dictionary to a nicely formatted string."""
|
| 675 |
+
lines = []
|
| 676 |
+
for category, methods in api_dict.items():
|
| 677 |
+
lines.append(f"Import file: {category}")
|
| 678 |
+
lines.append("=" * (len("Import file: ") + len(category)))
|
| 679 |
+
for method in methods:
|
| 680 |
+
lines.append(f"Method: {method.get('name', 'N/A')}")
|
| 681 |
+
lines.append(f" Description: {method.get('description', 'No description provided.')}")
|
| 682 |
+
|
| 683 |
+
# Process required parameters
|
| 684 |
+
req_params = method.get("required_parameters", [])
|
| 685 |
+
if req_params:
|
| 686 |
+
lines.append(" Required Parameters:")
|
| 687 |
+
for param in req_params:
|
| 688 |
+
param_name = param.get("name", "N/A")
|
| 689 |
+
param_type = param.get("type", "N/A")
|
| 690 |
+
param_desc = param.get("description", "No description")
|
| 691 |
+
param_default = param.get("default", "None")
|
| 692 |
+
lines.append(f" - {param_name} ({param_type}): {param_desc} [Default: {param_default}]")
|
| 693 |
+
|
| 694 |
+
# Process optional parameters
|
| 695 |
+
opt_params = method.get("optional_parameters", [])
|
| 696 |
+
if opt_params:
|
| 697 |
+
lines.append(" Optional Parameters:")
|
| 698 |
+
for param in opt_params:
|
| 699 |
+
param_name = param.get("name", "N/A")
|
| 700 |
+
param_type = param.get("type", "N/A")
|
| 701 |
+
param_desc = param.get("description", "No description")
|
| 702 |
+
param_default = param.get("default", "None")
|
| 703 |
+
lines.append(f" - {param_name} ({param_type}): {param_desc} [Default: {param_default}]")
|
| 704 |
+
|
| 705 |
+
lines.append("") # Empty line between methods
|
| 706 |
+
lines.append("") # Extra empty line after each category
|
| 707 |
+
|
| 708 |
+
return "\n".join(lines)
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
def read_module2api():
|
| 712 |
+
fields = [
|
| 713 |
+
"support_tools",
|
| 714 |
+
"pathology"
|
| 715 |
+
]
|
| 716 |
+
|
| 717 |
+
module2api = {}
|
| 718 |
+
for field in fields:
|
| 719 |
+
module_name = f"histopath.tool.tool_description.{field}"
|
| 720 |
+
module = importlib.import_module(module_name)
|
| 721 |
+
module2api[f"histopath.tool.{field}"] = module.description
|
| 722 |
+
return module2api
|
histopath_env/environment.yml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: histopath
|
| 2 |
+
channels:
|
| 3 |
+
- conda-forge
|
| 4 |
+
- defaults
|
| 5 |
+
dependencies:
|
| 6 |
+
- python=3.11
|
| 7 |
+
- pip
|
| 8 |
+
- pip:
|
| 9 |
+
# Core dependencies
|
| 10 |
+
- requests
|
| 11 |
+
- pandas
|
| 12 |
+
- numpy
|
| 13 |
+
- pydantic>=2.0
|
| 14 |
+
|
| 15 |
+
# LangChain ecosystem
|
| 16 |
+
- langchain
|
| 17 |
+
- langchain_core
|
| 18 |
+
- langchain-core
|
| 19 |
+
- langchain-openai
|
| 20 |
+
- langchain-anthropic
|
| 21 |
+
- langchain-ollama
|
| 22 |
+
- langchain-huggingface
|
| 23 |
+
- langgraph
|
| 24 |
+
# OpenAI + Anthropic API SDKs
|
| 25 |
+
- openai
|
| 26 |
+
- anthropic
|
| 27 |
+
- dotenv
|
| 28 |
+
# LazySlide (histopathology toolkit)
|
| 29 |
+
- lazyslide
|
| 30 |
+
# packages for prism
|
| 31 |
+
- environ
|
| 32 |
+
- protobuf
|
| 33 |
+
- sacremoses
|
histopath_env/histo_env.yml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: histopath
|
| 2 |
+
channels:
|
| 3 |
+
- conda-forge
|
| 4 |
+
- defaults
|
| 5 |
+
dependencies:
|
| 6 |
+
- python=3.11
|
| 7 |
+
- pip
|
| 8 |
+
- pip:
|
| 9 |
+
# Core dependencies
|
| 10 |
+
- requests
|
| 11 |
+
- pandas
|
| 12 |
+
- numpy
|
| 13 |
+
- pydantic>=2.0
|
| 14 |
+
|
| 15 |
+
# LangChain ecosystem
|
| 16 |
+
- langchain
|
| 17 |
+
- langchain_core
|
| 18 |
+
- langchain-core
|
| 19 |
+
- langchain-openai
|
| 20 |
+
- langchain-anthropic
|
| 21 |
+
- langchain-ollama
|
| 22 |
+
- langchain-huggingface
|
| 23 |
+
- langgraph
|
| 24 |
+
# OpenAI + Anthropic API SDKs
|
| 25 |
+
- openai
|
| 26 |
+
- anthropic
|
| 27 |
+
- dotenv
|
| 28 |
+
# LazySlide (histopathology toolkit)
|
| 29 |
+
- lazyslide
|
| 30 |
+
- histolab
|
| 31 |
+
- biolearn
|
| 32 |
+
- pyaging
|
histopath_env/setup.sh
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# This script sets up a comprehensive histopathology environment
|
| 4 |
+
|
| 5 |
+
# Set up colors for output
|
| 6 |
+
GREEN='\033[0;32m'
|
| 7 |
+
RED='\033[0;31m'
|
| 8 |
+
YELLOW='\033[1;33m'
|
| 9 |
+
BLUE='\033[0;34m'
|
| 10 |
+
NC='\033[0m' # No Color
|
| 11 |
+
|
| 12 |
+
# Default tools directory is the current directory
|
| 13 |
+
DEFAULT_TOOLS_DIR="$(pwd)/histopath_tools"
|
| 14 |
+
TOOLS_DIR=""
|
| 15 |
+
|
| 16 |
+
echo -e "${YELLOW}=== HistoPath Environment Setup ===${NC}"
|
| 17 |
+
echo -e "${BLUE}This script will set up a comprehensive histopathology environment with various tools and packages.${NC}"
|
| 18 |
+
|
| 19 |
+
# Check if conda is installed
|
| 20 |
+
if ! command -v conda &> /dev/null && ! command -v micromamba &> /dev/null; then
|
| 21 |
+
echo -e "${RED}Error: Conda is not installed or not in PATH.${NC}"
|
| 22 |
+
echo "Please install Miniconda or Anaconda first."
|
| 23 |
+
echo "Visit: https://docs.conda.io/en/latest/miniconda.html"
|
| 24 |
+
exit 1
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
# Function to handle errors
|
| 28 |
+
handle_error() {
|
| 29 |
+
local exit_code=$1
|
| 30 |
+
local error_message=$2
|
| 31 |
+
local optional=${3:-false}
|
| 32 |
+
|
| 33 |
+
if [ $exit_code -ne 0 ]; then
|
| 34 |
+
echo -e "${RED}Error: $error_message${NC}"
|
| 35 |
+
if [ "$optional" = true ]; then
|
| 36 |
+
echo -e "${YELLOW}Continuing with setup as this component is optional.${NC}"
|
| 37 |
+
return 0
|
| 38 |
+
else
|
| 39 |
+
if [ -z "$NON_INTERACTIVE" ]; then
|
| 40 |
+
read -p "Continue with setup? (y/n) " -n 1 -r
|
| 41 |
+
echo
|
| 42 |
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
| 43 |
+
echo -e "${RED}Setup aborted.${NC}"
|
| 44 |
+
exit 1
|
| 45 |
+
fi
|
| 46 |
+
else
|
| 47 |
+
echo -e "${YELLOW}Non-interactive mode: continuing despite error.${NC}"
|
| 48 |
+
fi
|
| 49 |
+
fi
|
| 50 |
+
fi
|
| 51 |
+
return $exit_code
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Function to install a specific environment file
|
| 55 |
+
install_env_file() {
|
| 56 |
+
local env_file=$1
|
| 57 |
+
local description=$2
|
| 58 |
+
local optional=${3:-false}
|
| 59 |
+
|
| 60 |
+
echo -e "\n${BLUE}=== Installing $description ===${NC}"
|
| 61 |
+
|
| 62 |
+
if [ "$optional" = true ]; then
|
| 63 |
+
if [ -z "$NON_INTERACTIVE" ]; then
|
| 64 |
+
read -p "Do you want to install $description? (y/n) " -n 1 -r
|
| 65 |
+
echo
|
| 66 |
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
| 67 |
+
echo -e "${YELLOW}Skipping $description installation.${NC}"
|
| 68 |
+
return 0
|
| 69 |
+
fi
|
| 70 |
+
else
|
| 71 |
+
echo -e "${YELLOW}Non-interactive mode: automatically installing $description.${NC}"
|
| 72 |
+
fi
|
| 73 |
+
fi
|
| 74 |
+
|
| 75 |
+
echo -e "${YELLOW}Installing $description from $env_file...${NC}"
|
| 76 |
+
conda env update -f $env_file
|
| 77 |
+
handle_error $? "Failed to install $description." $optional
|
| 78 |
+
|
| 79 |
+
if [ $? -eq 0 ]; then
|
| 80 |
+
echo -e "${GREEN}Successfully installed $description!${NC}"
|
| 81 |
+
fi
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Main installation process
|
| 85 |
+
main() {
|
| 86 |
+
# Step 1: Create base conda environment
|
| 87 |
+
echo -e "\n${YELLOW}Step 1: Creating base environment from environment.yml...${NC}"
|
| 88 |
+
conda env create -n histopath -f environment.yml
|
| 89 |
+
handle_error $? "Failed to create base conda environment."
|
| 90 |
+
|
| 91 |
+
# Step 2: Activate the environment
|
| 92 |
+
echo -e "\n${YELLOW}Step 2: Activating conda environment...${NC}"
|
| 93 |
+
if command -v micromamba &> /dev/null; then
|
| 94 |
+
eval "$("$MAMBA_EXE" shell hook --shell bash)"
|
| 95 |
+
micromamba activate histopath
|
| 96 |
+
else
|
| 97 |
+
eval "$(conda shell.bash hook)"
|
| 98 |
+
conda activate histopath
|
| 99 |
+
fi
|
| 100 |
+
handle_error $? "Failed to activate histopath environment."
|
| 101 |
+
|
| 102 |
+
# Step 3: Install core histopathology tools
|
| 103 |
+
echo -e "\n${YELLOW}Step 3: Installing core histopathology tools...${NC}"
|
| 104 |
+
install_env_file "histo_env.yml" "core bioinformatics tools"
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Run the main installation process
|
| 108 |
+
main
|
requirements.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
requests
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
pydantic
|
| 6 |
+
gradio
|
| 7 |
+
|
| 8 |
+
# LangChain ecosystem
|
| 9 |
+
langchain
|
| 10 |
+
langchain-core
|
| 11 |
+
langchain-openai
|
| 12 |
+
langchain-anthropic
|
| 13 |
+
langchain-ollama
|
| 14 |
+
langchain-huggingface
|
| 15 |
+
langgraph
|
| 16 |
+
|
| 17 |
+
# LLM API SDKs
|
| 18 |
+
openai
|
| 19 |
+
anthropic
|
| 20 |
+
python-dotenv
|
| 21 |
+
|
| 22 |
+
# Histopathology packages
|
| 23 |
+
lazyslide
|
| 24 |
+
histolab
|
| 25 |
+
biolearn
|
| 26 |
+
pyaging
|
| 27 |
+
|
| 28 |
+
# ML/Vision dependencies (for LazySlide and models)
|
| 29 |
+
torch
|
| 30 |
+
torchvision
|
| 31 |
+
timm
|
| 32 |
+
transformers
|
| 33 |
+
huggingface-hub
|
| 34 |
+
Pillow
|
| 35 |
+
scikit-image
|
| 36 |
+
opencv-python-headless
|
| 37 |
+
matplotlib
|
| 38 |
+
|
| 39 |
+
# Additional PRISM dependencies
|
| 40 |
+
python-environ
|
| 41 |
+
protobuf
|
| 42 |
+
sacremoses
|
| 43 |
+
|
| 44 |
+
# For whole slide image support
|
| 45 |
+
openslide-python
|