Commit
·
97c8e77
1
Parent(s):
3402b69
rushed demo
Browse files- .gitingore +216 -0
- agents/__init__.py +1 -0
- agents/search_agent.py +116 -0
- app.py +112 -0
- gui/__init__.py +1 -0
- gui/html_decorate.py +157 -0
- gui/web_ui.py +389 -0
- llm/__init__.py +1 -0
- llm/__pycache__/__init__.cpython-310.pyc +0 -0
- llm/__pycache__/oai.cpython-310.pyc +0 -0
- llm/__pycache__/qwen_dashscope.cpython-310.pyc +0 -0
- llm/oai.py +251 -0
- llm/qwen_dashscope.py +140 -0
- messages.json +34 -0
- prompt.py +15 -0
- requirements.txt +2 -0
- scholar.py +111 -0
- search.py +136 -0
- tool_python.py +481 -0
- utils/__pycache__/date.cpython-310.pyc +0 -0
- utils/date.py +71 -0
- utils/logs.py +51 -0
- visit.py +260 -0
.gitingore
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[codz]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py.cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
# Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
# poetry.lock
|
| 109 |
+
# poetry.toml
|
| 110 |
+
|
| 111 |
+
# pdm
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
+
# pdm.lock
|
| 116 |
+
# pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# pixi
|
| 121 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
+
# pixi.lock
|
| 123 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
+
.pixi
|
| 126 |
+
|
| 127 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
+
__pypackages__/
|
| 129 |
+
|
| 130 |
+
# Celery stuff
|
| 131 |
+
celerybeat-schedule
|
| 132 |
+
celerybeat.pid
|
| 133 |
+
|
| 134 |
+
# Redis
|
| 135 |
+
*.rdb
|
| 136 |
+
*.aof
|
| 137 |
+
*.pid
|
| 138 |
+
|
| 139 |
+
# RabbitMQ
|
| 140 |
+
mnesia/
|
| 141 |
+
rabbitmq/
|
| 142 |
+
rabbitmq-data/
|
| 143 |
+
|
| 144 |
+
# ActiveMQ
|
| 145 |
+
activemq-data/
|
| 146 |
+
|
| 147 |
+
# SageMath parsed files
|
| 148 |
+
*.sage.py
|
| 149 |
+
|
| 150 |
+
# Environments
|
| 151 |
+
.env
|
| 152 |
+
.envrc
|
| 153 |
+
.venv
|
| 154 |
+
env/
|
| 155 |
+
venv/
|
| 156 |
+
ENV/
|
| 157 |
+
env.bak/
|
| 158 |
+
venv.bak/
|
| 159 |
+
|
| 160 |
+
# Spyder project settings
|
| 161 |
+
.spyderproject
|
| 162 |
+
.spyproject
|
| 163 |
+
|
| 164 |
+
# Rope project settings
|
| 165 |
+
.ropeproject
|
| 166 |
+
|
| 167 |
+
# mkdocs documentation
|
| 168 |
+
/site
|
| 169 |
+
|
| 170 |
+
# mypy
|
| 171 |
+
.mypy_cache/
|
| 172 |
+
.dmypy.json
|
| 173 |
+
dmypy.json
|
| 174 |
+
|
| 175 |
+
# Pyre type checker
|
| 176 |
+
.pyre/
|
| 177 |
+
|
| 178 |
+
# pytype static type analyzer
|
| 179 |
+
.pytype/
|
| 180 |
+
|
| 181 |
+
# Cython debug symbols
|
| 182 |
+
cython_debug/
|
| 183 |
+
|
| 184 |
+
# PyCharm
|
| 185 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 186 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 187 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 188 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 189 |
+
# .idea/
|
| 190 |
+
|
| 191 |
+
# Abstra
|
| 192 |
+
# Abstra is an AI-powered process automation framework.
|
| 193 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 194 |
+
# Learn more at https://abstra.io/docs
|
| 195 |
+
.abstra/
|
| 196 |
+
|
| 197 |
+
# Visual Studio Code
|
| 198 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 199 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 200 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 201 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 202 |
+
# .vscode/
|
| 203 |
+
|
| 204 |
+
# Ruff stuff:
|
| 205 |
+
.ruff_cache/
|
| 206 |
+
|
| 207 |
+
# PyPI configuration file
|
| 208 |
+
.pypirc
|
| 209 |
+
|
| 210 |
+
# Marimo
|
| 211 |
+
marimo/_static/
|
| 212 |
+
marimo/_lsp/
|
| 213 |
+
__marimo__/
|
| 214 |
+
|
| 215 |
+
# Streamlit
|
| 216 |
+
.streamlit/secrets.toml
|
agents/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
agents/search_agent.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import copy
|
| 2 |
+
from typing import Dict, Iterator, List, Literal, Union, Optional
|
| 3 |
+
|
| 4 |
+
from qwen_agent.agents import Assistant
|
| 5 |
+
from qwen_agent.llm import BaseChatModel
|
| 6 |
+
from qwen_agent.llm.schema import USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE,SYSTEM,ROLE
|
| 7 |
+
from qwen_agent.tools import BaseTool
|
| 8 |
+
from qwen_agent.log import logger
|
| 9 |
+
|
| 10 |
+
class SearchAgent(Assistant):
|
| 11 |
+
|
| 12 |
+
def __init__(self,
|
| 13 |
+
function_list: Optional[List[Union[str, Dict, BaseTool]]] = None,
|
| 14 |
+
llm: Optional[Union[Dict, BaseChatModel]] = None,
|
| 15 |
+
system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE,
|
| 16 |
+
name: Optional[str] = None,
|
| 17 |
+
description: Optional[str] = None,
|
| 18 |
+
files: Optional[List[str]] = None,
|
| 19 |
+
rag_cfg: Optional[Dict] = None,
|
| 20 |
+
extra: Optional[Dict] = {},
|
| 21 |
+
custom_user_prompt: Optional[str] = '',
|
| 22 |
+
make_system_prompt = None,
|
| 23 |
+
addtional_agent = None):
|
| 24 |
+
super().__init__(function_list=function_list,
|
| 25 |
+
llm=llm,
|
| 26 |
+
system_message=system_message,
|
| 27 |
+
name=name,
|
| 28 |
+
description=description,
|
| 29 |
+
files=files,
|
| 30 |
+
rag_cfg=rag_cfg)
|
| 31 |
+
self.extra = extra
|
| 32 |
+
self.custom_user_prompt = custom_user_prompt
|
| 33 |
+
self.make_system_prompt = make_system_prompt
|
| 34 |
+
self.addtional_agent = addtional_agent
|
| 35 |
+
|
| 36 |
+
def insert_in_custom_user_prompt(self, messages: List[Message]) -> List[Message]:
|
| 37 |
+
for message in messages:
|
| 38 |
+
if message.role == USER:
|
| 39 |
+
message.content[0].text = self.custom_user_prompt + message.content[0].text
|
| 40 |
+
break
|
| 41 |
+
return messages
|
| 42 |
+
def _run(self,
|
| 43 |
+
messages: List[Message],
|
| 44 |
+
lang: Literal['en', 'zh'] = 'zh',
|
| 45 |
+
knowledge: str = '',
|
| 46 |
+
**kwargs) -> Iterator[List[Message]]:
|
| 47 |
+
messages = self._prepend_knowledge_prompt(messages=messages, lang=lang, knowledge=knowledge, **kwargs)
|
| 48 |
+
messages = copy.deepcopy(messages)
|
| 49 |
+
self.insert_in_custom_user_prompt(messages=messages)
|
| 50 |
+
if self.make_system_prompt:
|
| 51 |
+
if not messages or messages[0][ROLE] != SYSTEM:
|
| 52 |
+
messages.insert(0, Message(role=SYSTEM, content=self.make_system_prompt()))
|
| 53 |
+
for msg in messages:
|
| 54 |
+
if isinstance(msg.content, list):
|
| 55 |
+
assert len(msg.content) == 1
|
| 56 |
+
msg.content = msg.content[0].text
|
| 57 |
+
if msg.role == USER:
|
| 58 |
+
msg.content = msg.content.strip()
|
| 59 |
+
|
| 60 |
+
reasoning = self.extra.get('reasoning', True)
|
| 61 |
+
num_llm_calls_available = self.extra.get('max_llm_calls', 20)
|
| 62 |
+
response = []
|
| 63 |
+
while True and num_llm_calls_available > 0:
|
| 64 |
+
num_llm_calls_available -= 1
|
| 65 |
+
|
| 66 |
+
extra_generate_cfg = {'lang': lang}
|
| 67 |
+
if kwargs.get('seed') is not None:
|
| 68 |
+
extra_generate_cfg['seed'] = kwargs['seed']
|
| 69 |
+
output_stream = self._call_llm(messages=messages,
|
| 70 |
+
functions=[func.function for func in self.function_map.values()],
|
| 71 |
+
extra_generate_cfg=extra_generate_cfg)
|
| 72 |
+
output: List[Message] = []
|
| 73 |
+
for output in output_stream:
|
| 74 |
+
if output:
|
| 75 |
+
first_msg = output[0]
|
| 76 |
+
if reasoning and isinstance(first_msg.content, str):
|
| 77 |
+
first_msg.content = "<think>\n" + first_msg.content.strip()
|
| 78 |
+
yield response + output
|
| 79 |
+
if output:
|
| 80 |
+
response.extend(output)
|
| 81 |
+
messages.extend(output)
|
| 82 |
+
used_any_tool = False
|
| 83 |
+
|
| 84 |
+
for out in output:
|
| 85 |
+
print("out:\n",out)
|
| 86 |
+
print("output:\n",output)
|
| 87 |
+
# exit()
|
| 88 |
+
use_tool, tool_name, tool_args, _ = self._detect_tool(out)
|
| 89 |
+
logger.info(f"{self.name} use_tool: {use_tool}, tool_name: {tool_name}, tool_args: {tool_args}")
|
| 90 |
+
if use_tool:
|
| 91 |
+
tool_result = self._call_tool(tool_name, tool_args, messages=messages, **kwargs)
|
| 92 |
+
fn_msg = Message(
|
| 93 |
+
role=FUNCTION,
|
| 94 |
+
name=tool_name,
|
| 95 |
+
content=tool_result,
|
| 96 |
+
)
|
| 97 |
+
messages.append(fn_msg)
|
| 98 |
+
response.append(fn_msg)
|
| 99 |
+
yield response
|
| 100 |
+
used_any_tool = True
|
| 101 |
+
if not used_any_tool:
|
| 102 |
+
logger.info(f'{self.name} not used any tool, skip out')
|
| 103 |
+
break
|
| 104 |
+
yield response
|
| 105 |
+
|
| 106 |
+
if self.addtional_agent:
|
| 107 |
+
new_messages = copy.deepcopy(messages)
|
| 108 |
+
new_messages.pop()
|
| 109 |
+
new_response = copy.deepcopy(response)
|
| 110 |
+
new_response.pop()
|
| 111 |
+
if new_messages[0][ROLE] == SYSTEM:
|
| 112 |
+
# Add the system instruction to the agent
|
| 113 |
+
new_messages[0].content = self.addtional_agent.make_system_prompt()
|
| 114 |
+
for rsp in self.addtional_agent._run(messages=new_messages, **kwargs):
|
| 115 |
+
yield new_response + rsp
|
| 116 |
+
|
app.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""An image generation agent implemented by assistant with qwq"""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from agents.search_agent import SearchAgent
|
| 6 |
+
from llm.oai import TextChatAtOAI
|
| 7 |
+
from gui.web_ui import WebUI
|
| 8 |
+
import datetime
|
| 9 |
+
|
| 10 |
+
DR_MODEL_NAME = os.getenv("DR_MODEL_NAME", "")
|
| 11 |
+
DR_MODEL_SERVER = os.getenv("DR_MODEL_SERVER", "")
|
| 12 |
+
DR_MODEL_API_KEY = os.getenv("DR_MODEL_API_KEY", "")
|
| 13 |
+
|
| 14 |
+
ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
|
| 15 |
+
|
| 16 |
+
def today_date():
|
| 17 |
+
return datetime.date.today().strftime("%Y-%m-%d")
|
| 18 |
+
|
| 19 |
+
def init_dev_search_agent_service(name: str = 'SEARCH', port: int = 8002, desc: str = '初版', reasoning: bool = True, max_llm_calls: int = 20, tools = ['search', 'visit'], addtional_agent = None):
|
| 20 |
+
llm_cfg = TextChatAtOAI({
|
| 21 |
+
'model': DR_MODEL_NAME,
|
| 22 |
+
'model_type': 'oai',
|
| 23 |
+
'model_server': DR_MODEL_SERVER,
|
| 24 |
+
'api_key': DR_MODEL_API_KEY,
|
| 25 |
+
'generate_cfg': {
|
| 26 |
+
'fncall_prompt_type': 'nous',
|
| 27 |
+
'temperature': 0.85,
|
| 28 |
+
'top_p': 0.95,
|
| 29 |
+
'top_k': -1,
|
| 30 |
+
'presence_penalty': 1.1,
|
| 31 |
+
'max_tokens': 32768,
|
| 32 |
+
'stream_options': {
|
| 33 |
+
'include_usage': True,
|
| 34 |
+
},
|
| 35 |
+
'timeout': 3000
|
| 36 |
+
},
|
| 37 |
+
})
|
| 38 |
+
def make_system_prompt():
|
| 39 |
+
system_message = "You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.\n\n"
|
| 40 |
+
return system_message
|
| 41 |
+
|
| 42 |
+
bot = SearchAgent(
|
| 43 |
+
llm=llm_cfg,
|
| 44 |
+
function_list=tools,
|
| 45 |
+
system_message="",
|
| 46 |
+
name=f'Tongyi DeepResearch',
|
| 47 |
+
description=f"I am Tongyi DeepResearch, a leading open-source Deep Research Agent, welcome to try!",
|
| 48 |
+
extra={
|
| 49 |
+
'reasoning': reasoning,
|
| 50 |
+
'max_llm_calls': max_llm_calls,
|
| 51 |
+
},
|
| 52 |
+
addtional_agent = addtional_agent,
|
| 53 |
+
make_system_prompt = make_system_prompt,
|
| 54 |
+
custom_user_prompt=''''''
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return bot
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def app_gui():
|
| 62 |
+
agents = []
|
| 63 |
+
for name, port, desc, reasoning, max_llm_calls, tools in [
|
| 64 |
+
('Tongyi DeepResearch', 8004, '...', True, 50, ['search', 'visit', 'google_scholar', 'PythonInterpreter']),
|
| 65 |
+
]:
|
| 66 |
+
search_bot_dev = init_dev_search_agent_service(
|
| 67 |
+
name=name,
|
| 68 |
+
port=port,
|
| 69 |
+
desc=desc,
|
| 70 |
+
reasoning=reasoning,
|
| 71 |
+
max_llm_calls=max_llm_calls,
|
| 72 |
+
tools=tools,
|
| 73 |
+
)
|
| 74 |
+
agents.append(search_bot_dev)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
chatbot_config = {
|
| 78 |
+
'prompt.suggestions': [
|
| 79 |
+
'中国国足的一场比赛,国足首先失球,由一名宿姓球员扳平了。后来还发生了点球。比分最终是平均。有可能是哪几场比赛',
|
| 80 |
+
'When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?',
|
| 81 |
+
'On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?',
|
| 82 |
+
'有一位华语娱乐圈的重要人物,与其兄弟共同创作并主演了一部在中国南方沿海城市上映的喜剧电影,这部电影成为该类型的开山之作。与此同时,这位人物还凭借两首极具影响力的本地方言歌曲在音乐领域取得突破,极大推动了本地方言流行音乐的发展。请问,这一切发生在20世纪70年代的哪一年?',
|
| 83 |
+
'有一首欧洲国家的国歌自20世纪50年代初被正式采用,并只选用了其中的一部分歌词。同一年,一位中国文艺界的重要人物创作了一部以民间传说为基础的戏曲作品,并在当年担任了多个文化领域的重要职务。请问这位中国文艺界人物是谁?',
|
| 84 |
+
'有一部英国文坛上极具影响力的长篇诗歌,由一位16世纪末的著名诗人创作,这位诗人在16世纪90年代末于伦敦去世后,被安葬在一个象征英国文学传统的著名场所,与多位文学巨匠为邻。请问,这位诗人安息之地是哪里?',
|
| 85 |
+
'出一份三天两夜的端午北京旅游攻略',
|
| 86 |
+
'对比下最新小米汽车和保时捷性能参数,然后根据最终的结果分析下性价比最高的车型,并给出杭州的供应商',
|
| 87 |
+
'量子计算突破对现有加密体系的威胁',
|
| 88 |
+
'人工智能伦理框架的全球差异',
|
| 89 |
+
'老龄化社会对全球养老金体系的长期冲击',
|
| 90 |
+
'全球碳中和目标下的能源转型路径差异',
|
| 91 |
+
'塑料污染在海洋食物链中的累积效应',
|
| 92 |
+
'AI生成内容(如AI绘画)对传统艺术价值的重构'
|
| 93 |
+
],
|
| 94 |
+
'user.name': 'User',
|
| 95 |
+
'verbose': True
|
| 96 |
+
}
|
| 97 |
+
messages = {'role': 'user', 'content': '介绍下你自己'}
|
| 98 |
+
WebUI(
|
| 99 |
+
agent=agents,
|
| 100 |
+
chatbot_config=chatbot_config,
|
| 101 |
+
).run(
|
| 102 |
+
message=messages,
|
| 103 |
+
share=True,
|
| 104 |
+
server_name='127.0.0.1',
|
| 105 |
+
server_port=7860,
|
| 106 |
+
concurrency_limit=20,
|
| 107 |
+
enable_mention=False,
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
if __name__ == '__main__':
|
| 112 |
+
app_gui()
|
gui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
gui/html_decorate.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from markdown_it import MarkdownIt
|
| 2 |
+
import html
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
def get_style_css(style_name):
|
| 6 |
+
"""
|
| 7 |
+
根据选择的样式名称获取对应的CSS样式文件
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
style_name (str): 样式名称,可选值为"Default"、"MBE"、"Glassmorphism"、"Apple"
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
str: CSS样式内容
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
if style_name == "Default":
|
| 17 |
+
return open("assets/demo.css", "r").read()
|
| 18 |
+
elif style_name == "1":
|
| 19 |
+
return open("assets/demo.1.css", "r").read()
|
| 20 |
+
elif style_name == "MBE":
|
| 21 |
+
return open("assets/demo_mbe.css", "r").read()
|
| 22 |
+
elif style_name == "Glassmorphism":
|
| 23 |
+
return open("assets/demo_glassmorphism.css", "r").read()
|
| 24 |
+
elif style_name == "Apple":
|
| 25 |
+
return open("assets/demo_apple.css", "r").read()
|
| 26 |
+
elif style_name == "Paper":
|
| 27 |
+
return open("assets/demo_paper.css", "r").read()
|
| 28 |
+
else:
|
| 29 |
+
return open("assets/demo.css", "r").read()
|
| 30 |
+
|
| 31 |
+
def decorate_writing(writing_result, style="Default"):
|
| 32 |
+
if not writing_result:
|
| 33 |
+
return writing_result
|
| 34 |
+
|
| 35 |
+
cite_pattern = r'<qwen:cite\s+url=["\']([^"\']+)["\'](?:\s+[^>]*)?>(.*?)</qwen:cite>'
|
| 36 |
+
takeaway_pattern = r'<qwen:takeaway(?:\s+class=["\'](?P<class>[^"\']+)["\'])?>(?P<content>[^<]*)</qwen:takeaway>'
|
| 37 |
+
citation_map = {}
|
| 38 |
+
|
| 39 |
+
def replace_cite(match):
|
| 40 |
+
nonlocal citation_map
|
| 41 |
+
urls = match.group(1).split(',')
|
| 42 |
+
content = match.group(2)
|
| 43 |
+
citation_html = []
|
| 44 |
+
|
| 45 |
+
for url in urls:
|
| 46 |
+
if url not in citation_map:
|
| 47 |
+
citation_map[url] = len(citation_map) + 1
|
| 48 |
+
current_index = citation_map[url]
|
| 49 |
+
citation_html.append((f'<a href="{url}" title="点击查看引用来源: {url}">{current_index}</a>', current_index))
|
| 50 |
+
|
| 51 |
+
citation_html = sorted(citation_html, key=lambda x: x[1])
|
| 52 |
+
citation_html = ', '.join([x[0] for x in citation_html])
|
| 53 |
+
|
| 54 |
+
cite_html = f'{content}<sup class="citation">[{citation_html}]</sup>'
|
| 55 |
+
return cite_html
|
| 56 |
+
|
| 57 |
+
decorated_result = re.sub(cite_pattern, replace_cite, writing_result, flags=re.S)
|
| 58 |
+
|
| 59 |
+
def replace_takeaway(match):
|
| 60 |
+
class_attr = match.group('class')
|
| 61 |
+
content = match.group('content')
|
| 62 |
+
|
| 63 |
+
if class_attr:
|
| 64 |
+
return f'<div class="takeaway {class_attr}">{content}</div>'
|
| 65 |
+
else:
|
| 66 |
+
return f'<div class="takeaway">{content}</div>'
|
| 67 |
+
|
| 68 |
+
decorated_result = re.sub(takeaway_pattern, replace_takeaway, decorated_result, flags=re.S)
|
| 69 |
+
|
| 70 |
+
mermaid_pattern = r'```mermaid\n(.*?)\n```'
|
| 71 |
+
def decorate_mermaid(match):
|
| 72 |
+
return f"""
|
| 73 |
+
<pre class="mermaid">
|
| 74 |
+
{match.group(1)}
|
| 75 |
+
</pre>
|
| 76 |
+
"""
|
| 77 |
+
decorated_result = re.sub(mermaid_pattern, decorate_mermaid, decorated_result, flags=re.S)
|
| 78 |
+
|
| 79 |
+
echarts_pattern = r'```echarts\n(.*?)\n```'
|
| 80 |
+
echarts_index = 0
|
| 81 |
+
|
| 82 |
+
def replace_echarts(match):
|
| 83 |
+
"""
|
| 84 |
+
将echarts代码块转换为HTML和JavaScript
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
match: 正则表达式匹配对象
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
str: 包含HTML和JavaScript的echarts图表代码
|
| 91 |
+
"""
|
| 92 |
+
nonlocal echarts_index
|
| 93 |
+
echarts_code = match.group(1)
|
| 94 |
+
echarts_id = f'echarts-container-{echarts_index}'
|
| 95 |
+
echarts_index += 1
|
| 96 |
+
|
| 97 |
+
replace_code = f"""
|
| 98 |
+
<div class="echarts-container loading" id="{echarts_id}">Echarts Rendering...</div>
|
| 99 |
+
<script>
|
| 100 |
+
var chartDom = document.getElementById('{echarts_id}');
|
| 101 |
+
var myChart = echarts.init(chartDom);
|
| 102 |
+
var option;
|
| 103 |
+
option = {echarts_code};
|
| 104 |
+
myChart.setOption(option);
|
| 105 |
+
chartDom.classList.remove('loading');
|
| 106 |
+
</script>
|
| 107 |
+
"""
|
| 108 |
+
return replace_code
|
| 109 |
+
|
| 110 |
+
decorated_result = re.sub(echarts_pattern, replace_echarts, decorated_result, flags=re.S)
|
| 111 |
+
|
| 112 |
+
md = MarkdownIt()
|
| 113 |
+
body = md.render(decorated_result)
|
| 114 |
+
|
| 115 |
+
selected_css = get_style_css(style)
|
| 116 |
+
|
| 117 |
+
html_content = """
|
| 118 |
+
<html>
|
| 119 |
+
<head>
|
| 120 |
+
<!-- KaTeX for mathematical formulas -->
|
| 121 |
+
<link rel="stylesheet" href="https://s4.zstatic.net/npm/[email protected]/dist/katex.min.css">
|
| 122 |
+
<script src="https://s4.zstatic.net/npm/[email protected]/dist/katex.min.js"></script>
|
| 123 |
+
<script src="https://s4.zstatic.net/npm/[email protected]/dist/contrib/auto-render.min.js"></script>
|
| 124 |
+
<script src="https://s4.zstatic.net/npm/[email protected]/dist/echarts.min.js"></script>
|
| 125 |
+
<style>
|
| 126 |
+
""" + selected_css + """
|
| 127 |
+
</style>
|
| 128 |
+
</head>
|
| 129 |
+
<body>
|
| 130 |
+
<div class="generated-content">
|
| 131 |
+
""" + body + """</div>
|
| 132 |
+
<script type="module">
|
| 133 |
+
import mermaid from 'https://unpkg.com/[email protected]/dist/mermaid.esm.min.mjs';
|
| 134 |
+
</script>
|
| 135 |
+
<script>
|
| 136 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 137 |
+
renderMathInElement(document.body);
|
| 138 |
+
});
|
| 139 |
+
</script>
|
| 140 |
+
</body>
|
| 141 |
+
</html>
|
| 142 |
+
"""
|
| 143 |
+
# 转义HTML内容以便在iframe中安全使用
|
| 144 |
+
# 这是必要的,因为HTML内容包含引号和其他特殊字符
|
| 145 |
+
escaped_html_content = html.escape(html_content)
|
| 146 |
+
|
| 147 |
+
# 定义iframe的样式属性
|
| 148 |
+
iframe_style = "width: 100%; height: 1024px; transform-origin: top left; border-color: lightgrey; border-width: 1px; border-radius: 10px;"
|
| 149 |
+
|
| 150 |
+
# 创建最终的iframe HTML,通过srcdoc属性注入转义后的HTML内容
|
| 151 |
+
# 设置loading="eager"和importance="high"以优先加载
|
| 152 |
+
# pointer-events="none"防止用户与iframe内容交互
|
| 153 |
+
iframe_content = f'<iframe id="ai-ui-iframe" loading="eager" importance="high" pointer-events="none" style="{iframe_style}" srcdoc="{escaped_html_content}"></iframe>'
|
| 154 |
+
|
| 155 |
+
# 返回最终的iframe HTML内容
|
| 156 |
+
iframe_content = re.sub(r'\n\s*\n', '\n', iframe_content)
|
| 157 |
+
return iframe_content
|
gui/web_ui.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pprint
|
| 3 |
+
import re
|
| 4 |
+
from typing import List, Optional, Union
|
| 5 |
+
|
| 6 |
+
from qwen_agent import Agent, MultiAgentHub
|
| 7 |
+
from qwen_agent.agents.user_agent import PENDING_USER_INPUT
|
| 8 |
+
from qwen_agent.gui.gradio_utils import format_cover_html
|
| 9 |
+
from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chatbot, get_avatar_image
|
| 10 |
+
from qwen_agent.llm.schema import AUDIO, CONTENT, FILE, IMAGE, NAME, ROLE, USER, VIDEO, Message
|
| 11 |
+
from qwen_agent.log import logger
|
| 12 |
+
from qwen_agent.utils.utils import print_traceback
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class WebUI:
|
| 16 |
+
"""A Common chatbot application for agent."""
|
| 17 |
+
|
| 18 |
+
def __init__(self, agent: Union[Agent, MultiAgentHub, List[Agent]], chatbot_config: Optional[dict] = None):
|
| 19 |
+
"""
|
| 20 |
+
Initialization the chatbot.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
agent: The agent or a list of agents,
|
| 24 |
+
supports various types of agents such as Assistant, GroupChat, Router, etc.
|
| 25 |
+
chatbot_config: The chatbot configuration.
|
| 26 |
+
Set the configuration as {'user.name': '', 'user.avatar': '', 'agent.avatar': '', 'input.placeholder': '', 'prompt.suggestions': []}.
|
| 27 |
+
"""
|
| 28 |
+
chatbot_config = chatbot_config or {}
|
| 29 |
+
|
| 30 |
+
if isinstance(agent, MultiAgentHub):
|
| 31 |
+
self.agent_list = [agent for agent in agent.nonuser_agents]
|
| 32 |
+
self.agent_hub = agent
|
| 33 |
+
elif isinstance(agent, list):
|
| 34 |
+
self.agent_list = agent
|
| 35 |
+
self.agent_hub = None
|
| 36 |
+
else:
|
| 37 |
+
self.agent_list = [agent]
|
| 38 |
+
self.agent_hub = None
|
| 39 |
+
|
| 40 |
+
user_name = chatbot_config.get('user.name', 'user')
|
| 41 |
+
self.user_config = {
|
| 42 |
+
'name': user_name,
|
| 43 |
+
'avatar': chatbot_config.get(
|
| 44 |
+
'user.avatar',
|
| 45 |
+
get_avatar_image(user_name),
|
| 46 |
+
),
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
self.agent_config_list = [{
|
| 50 |
+
'name': agent.name,
|
| 51 |
+
'avatar': chatbot_config.get(
|
| 52 |
+
'agent.avatar',
|
| 53 |
+
get_avatar_image(agent.name),
|
| 54 |
+
),
|
| 55 |
+
'description': agent.description or "I'm a helpful assistant.",
|
| 56 |
+
} for agent in self.agent_list]
|
| 57 |
+
|
| 58 |
+
self.input_placeholder = chatbot_config.get('input.placeholder', '请输入需要分析的问题,尽管交给我吧~')
|
| 59 |
+
self.prompt_suggestions = chatbot_config.get('prompt.suggestions', [])
|
| 60 |
+
self.verbose = chatbot_config.get('verbose', False)
|
| 61 |
+
|
| 62 |
+
"""
|
| 63 |
+
Run the chatbot.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
messages: The chat history.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
def run(self,
|
| 70 |
+
messages: List[Message] = None,
|
| 71 |
+
share: bool = False,
|
| 72 |
+
server_name: str = None,
|
| 73 |
+
server_port: int = None,
|
| 74 |
+
concurrency_limit: int = 10,
|
| 75 |
+
enable_mention: bool = False,
|
| 76 |
+
**kwargs):
|
| 77 |
+
self.run_kwargs = kwargs
|
| 78 |
+
|
| 79 |
+
from qwen_agent.gui.gradio_dep import gr, mgr, ms
|
| 80 |
+
|
| 81 |
+
customTheme = gr.themes.Default(
|
| 82 |
+
primary_hue=gr.themes.utils.colors.blue,
|
| 83 |
+
radius_size=gr.themes.utils.sizes.radius_none,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
if messages is not None:
|
| 87 |
+
logger.info('web-ui messages.size %s' % len(messages))
|
| 88 |
+
|
| 89 |
+
with gr.Blocks(
|
| 90 |
+
css=os.path.join(os.path.dirname(__file__), 'assets/appBot.css'),
|
| 91 |
+
theme=customTheme,
|
| 92 |
+
) as demo:
|
| 93 |
+
history = gr.State([])
|
| 94 |
+
with ms.Application():
|
| 95 |
+
with gr.Row(elem_classes='container'):
|
| 96 |
+
with gr.Column(scale=4):
|
| 97 |
+
chatbot = mgr.Chatbot(value=convert_history_to_chatbot(messages=messages),
|
| 98 |
+
avatar_images=[
|
| 99 |
+
self.user_config,
|
| 100 |
+
self.agent_config_list,
|
| 101 |
+
],
|
| 102 |
+
height=850,
|
| 103 |
+
avatar_image_width=80,
|
| 104 |
+
flushing=False,
|
| 105 |
+
show_copy_button=True,
|
| 106 |
+
latex_delimiters=[{
|
| 107 |
+
'left': '\\(',
|
| 108 |
+
'right': '\\)',
|
| 109 |
+
'display': True
|
| 110 |
+
}, {
|
| 111 |
+
'left': '\\begin{equation}',
|
| 112 |
+
'right': '\\end{equation}',
|
| 113 |
+
'display': True
|
| 114 |
+
}, {
|
| 115 |
+
'left': '\\begin{align}',
|
| 116 |
+
'right': '\\end{align}',
|
| 117 |
+
'display': True
|
| 118 |
+
}, {
|
| 119 |
+
'left': '\\begin{alignat}',
|
| 120 |
+
'right': '\\end{alignat}',
|
| 121 |
+
'display': True
|
| 122 |
+
}, {
|
| 123 |
+
'left': '\\begin{gather}',
|
| 124 |
+
'right': '\\end{gather}',
|
| 125 |
+
'display': True
|
| 126 |
+
}, {
|
| 127 |
+
'left': '\\begin{CD}',
|
| 128 |
+
'right': '\\end{CD}',
|
| 129 |
+
'display': True
|
| 130 |
+
}, {
|
| 131 |
+
'left': '\\[',
|
| 132 |
+
'right': '\\]',
|
| 133 |
+
'display': True
|
| 134 |
+
}])
|
| 135 |
+
|
| 136 |
+
input = mgr.MultimodalInput(
|
| 137 |
+
placeholder=self.input_placeholder,
|
| 138 |
+
show_copy_button=True,
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
with gr.Column(scale=1):
|
| 142 |
+
if len(self.agent_list) > 1:
|
| 143 |
+
agent_selector = gr.Dropdown(
|
| 144 |
+
[(agent.name, i) for i, agent in enumerate(self.agent_list)],
|
| 145 |
+
label='Agents',
|
| 146 |
+
info='请选择一个 Agent',
|
| 147 |
+
value=0,
|
| 148 |
+
interactive=True,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
agent_info_block = self._create_agent_info_block()
|
| 152 |
+
|
| 153 |
+
agent_plugins_block = self._create_agent_plugins_block()
|
| 154 |
+
|
| 155 |
+
if self.prompt_suggestions:
|
| 156 |
+
gr.Examples(
|
| 157 |
+
label='推荐对话',
|
| 158 |
+
examples=self.prompt_suggestions,
|
| 159 |
+
inputs=[input],
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
if len(self.agent_list) > 1:
|
| 163 |
+
agent_selector.change(
|
| 164 |
+
fn=self.change_agent,
|
| 165 |
+
inputs=[agent_selector],
|
| 166 |
+
outputs=[agent_selector, agent_info_block, agent_plugins_block],
|
| 167 |
+
queue=False,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
input.change(
|
| 171 |
+
fn=self.change_text,
|
| 172 |
+
inputs=[input],
|
| 173 |
+
)
|
| 174 |
+
input_promise = input.submit(
|
| 175 |
+
fn=self.add_text,
|
| 176 |
+
inputs=[input, chatbot, history],
|
| 177 |
+
outputs=[input, chatbot, history],
|
| 178 |
+
queue=True,
|
| 179 |
+
concurrency_limit=concurrency_limit,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if len(self.agent_list) > 1:
|
| 183 |
+
if enable_mention:
|
| 184 |
+
input_promise = input_promise.then(
|
| 185 |
+
self.add_mention,
|
| 186 |
+
[chatbot, agent_selector],
|
| 187 |
+
[chatbot, agent_selector],
|
| 188 |
+
).then(
|
| 189 |
+
self.agent_run,
|
| 190 |
+
[chatbot, history, agent_selector],
|
| 191 |
+
[chatbot, history, agent_selector],
|
| 192 |
+
)
|
| 193 |
+
else:
|
| 194 |
+
input_promise = input_promise.then(
|
| 195 |
+
self.agent_run,
|
| 196 |
+
[chatbot, history, agent_selector],
|
| 197 |
+
[chatbot, history, agent_selector],
|
| 198 |
+
)
|
| 199 |
+
else:
|
| 200 |
+
input_promise = input_promise.then(
|
| 201 |
+
self.agent_run,
|
| 202 |
+
[chatbot, history],
|
| 203 |
+
[chatbot, history],
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
input_promise.then(self.flushed, None, [input])
|
| 207 |
+
|
| 208 |
+
demo.load(None)
|
| 209 |
+
|
| 210 |
+
demo.queue(default_concurrency_limit=concurrency_limit).launch(share=share,
|
| 211 |
+
server_name=server_name,
|
| 212 |
+
server_port=server_port)
|
| 213 |
+
|
| 214 |
+
def change_agent(self, agent_selector):
|
| 215 |
+
yield agent_selector, self._create_agent_info_block(agent_selector), self._create_agent_plugins_block(
|
| 216 |
+
agent_selector)
|
| 217 |
+
|
| 218 |
+
def change_text(self, _input):
|
| 219 |
+
logger.info(f'agent_run change_text input:{_input.text}')
|
| 220 |
+
|
| 221 |
+
def add_text(self, _input, _chatbot, _history):
|
| 222 |
+
_history.append({
|
| 223 |
+
ROLE: USER,
|
| 224 |
+
CONTENT: [{
|
| 225 |
+
'text': _input.text
|
| 226 |
+
}],
|
| 227 |
+
})
|
| 228 |
+
|
| 229 |
+
if self.user_config[NAME]:
|
| 230 |
+
_history[-1][NAME] = self.user_config[NAME]
|
| 231 |
+
|
| 232 |
+
logger.info('agent_run add_text input:\n' + pprint.pformat(_history, indent=2))
|
| 233 |
+
|
| 234 |
+
if _input.files:
|
| 235 |
+
for file in _input.files:
|
| 236 |
+
if file.mime_type.startswith('image/'):
|
| 237 |
+
_history[-1][CONTENT].append({IMAGE: 'file://' + file.path})
|
| 238 |
+
elif file.mime_type.startswith('audio/'):
|
| 239 |
+
_history[-1][CONTENT].append({AUDIO: 'file://' + file.path})
|
| 240 |
+
elif file.mime_type.startswith('video/'):
|
| 241 |
+
_history[-1][CONTENT].append({VIDEO: 'file://' + file.path})
|
| 242 |
+
else:
|
| 243 |
+
_history[-1][CONTENT].append({FILE: file.path})
|
| 244 |
+
|
| 245 |
+
_chatbot.append([_input, None])
|
| 246 |
+
|
| 247 |
+
from qwen_agent.gui.gradio_dep import gr
|
| 248 |
+
yield gr.update(interactive=False, value=''), _chatbot, _history
|
| 249 |
+
|
| 250 |
+
def add_mention(self, _chatbot, _agent_selector):
|
| 251 |
+
if len(self.agent_list) == 1:
|
| 252 |
+
yield _chatbot, _agent_selector
|
| 253 |
+
|
| 254 |
+
query = _chatbot[-1][0].text
|
| 255 |
+
match = re.search(r'@\w+\b', query)
|
| 256 |
+
if match:
|
| 257 |
+
_agent_selector = self._get_agent_index_by_name(match.group()[1:])
|
| 258 |
+
|
| 259 |
+
agent_name = self.agent_list[_agent_selector].name
|
| 260 |
+
|
| 261 |
+
if ('@' + agent_name) not in query and self.agent_hub is None:
|
| 262 |
+
_chatbot[-1][0].text = '@' + agent_name + ' ' + query
|
| 263 |
+
|
| 264 |
+
yield _chatbot, _agent_selector
|
| 265 |
+
|
| 266 |
+
def agent_run(self, _chatbot, _history, _agent_selector=None):
|
| 267 |
+
# TODO 仅保持任务的单论对话
|
| 268 |
+
if self.verbose:
|
| 269 |
+
logger.info('agent_run input[all]:\n' + pprint.pformat(_history, indent=2))
|
| 270 |
+
_history = _history[-1:]
|
| 271 |
+
if self.verbose:
|
| 272 |
+
logger.info('agent_run input[new]:\n' + pprint.pformat(_history, indent=2))
|
| 273 |
+
|
| 274 |
+
if len(_history) == 0:
|
| 275 |
+
if _agent_selector is not None:
|
| 276 |
+
yield _chatbot, _history, _agent_selector
|
| 277 |
+
else:
|
| 278 |
+
yield _chatbot, _history
|
| 279 |
+
logger.info('agent_run input with empty input, do nothing.')
|
| 280 |
+
return
|
| 281 |
+
|
| 282 |
+
num_input_bubbles = len(_chatbot) - 1
|
| 283 |
+
num_output_bubbles = 1
|
| 284 |
+
_chatbot[-1][1] = [None for _ in range(len(self.agent_list))]
|
| 285 |
+
|
| 286 |
+
logger.info('agent_run input:_agent_selector %s' % _agent_selector)
|
| 287 |
+
agent_runner = self.agent_list[_agent_selector or 0]
|
| 288 |
+
if self.agent_hub:
|
| 289 |
+
agent_runner = self.agent_hub
|
| 290 |
+
agent_runner.function_map
|
| 291 |
+
|
| 292 |
+
responses = []
|
| 293 |
+
for responses in agent_runner.run(_history, **self.run_kwargs):
|
| 294 |
+
if not responses:
|
| 295 |
+
continue
|
| 296 |
+
if responses[-1][CONTENT] == PENDING_USER_INPUT:
|
| 297 |
+
logger.info('Interrupted. Waiting for user input!')
|
| 298 |
+
break
|
| 299 |
+
|
| 300 |
+
display_responses = convert_fncall_to_text(responses)
|
| 301 |
+
if not display_responses:
|
| 302 |
+
continue
|
| 303 |
+
if display_responses[-1][CONTENT] is None:
|
| 304 |
+
continue
|
| 305 |
+
|
| 306 |
+
while len(display_responses) > num_output_bubbles:
|
| 307 |
+
# Create a new chat bubble
|
| 308 |
+
_chatbot.append([None, None])
|
| 309 |
+
_chatbot[-1][1] = [None for _ in range(len(self.agent_list))]
|
| 310 |
+
num_output_bubbles += 1
|
| 311 |
+
|
| 312 |
+
assert num_output_bubbles == len(display_responses)
|
| 313 |
+
assert num_input_bubbles + num_output_bubbles == len(_chatbot)
|
| 314 |
+
|
| 315 |
+
for i, rsp in enumerate(display_responses):
|
| 316 |
+
agent_index = self._get_agent_index_by_name(rsp[NAME])
|
| 317 |
+
_chatbot[num_input_bubbles + i][1][agent_index] = rsp[CONTENT]
|
| 318 |
+
|
| 319 |
+
if len(self.agent_list) > 1:
|
| 320 |
+
_agent_selector = agent_index
|
| 321 |
+
|
| 322 |
+
if _agent_selector is not None:
|
| 323 |
+
yield _chatbot, _history, _agent_selector
|
| 324 |
+
else:
|
| 325 |
+
yield _chatbot, _history
|
| 326 |
+
|
| 327 |
+
if responses:
|
| 328 |
+
_history.extend([res for res in responses if res[CONTENT] != PENDING_USER_INPUT])
|
| 329 |
+
|
| 330 |
+
if _agent_selector is not None:
|
| 331 |
+
yield _chatbot, _history, _agent_selector
|
| 332 |
+
else:
|
| 333 |
+
yield _chatbot, _history
|
| 334 |
+
|
| 335 |
+
if self.verbose:
|
| 336 |
+
logger.info('agent_run response:\n' + pprint.pformat(responses, indent=2))
|
| 337 |
+
|
| 338 |
+
def flushed(self):
|
| 339 |
+
logger.info('agent_run flushed')
|
| 340 |
+
from qwen_agent.gui.gradio_dep import gr
|
| 341 |
+
return gr.update(interactive=True, value='')
|
| 342 |
+
|
| 343 |
+
def _get_agent_index_by_name(self, agent_name):
|
| 344 |
+
if agent_name is None:
|
| 345 |
+
return 0
|
| 346 |
+
|
| 347 |
+
try:
|
| 348 |
+
agent_name = agent_name.strip()
|
| 349 |
+
for i, agent in enumerate(self.agent_list):
|
| 350 |
+
if agent.name == agent_name:
|
| 351 |
+
return i
|
| 352 |
+
return 0
|
| 353 |
+
except Exception:
|
| 354 |
+
print_traceback()
|
| 355 |
+
return 0
|
| 356 |
+
|
| 357 |
+
def _create_agent_info_block(self, agent_index=0):
|
| 358 |
+
from qwen_agent.gui.gradio_dep import gr
|
| 359 |
+
|
| 360 |
+
agent_config_interactive = self.agent_config_list[agent_index]
|
| 361 |
+
|
| 362 |
+
return gr.HTML(
|
| 363 |
+
format_cover_html(
|
| 364 |
+
bot_name=agent_config_interactive['name'],
|
| 365 |
+
bot_description=agent_config_interactive['description'],
|
| 366 |
+
bot_avatar=agent_config_interactive['avatar'],
|
| 367 |
+
))
|
| 368 |
+
|
| 369 |
+
def _create_agent_plugins_block(self, agent_index=0):
|
| 370 |
+
from qwen_agent.gui.gradio_dep import gr
|
| 371 |
+
|
| 372 |
+
agent_interactive = self.agent_list[agent_index]
|
| 373 |
+
|
| 374 |
+
if agent_interactive.function_map:
|
| 375 |
+
capabilities = [key for key in agent_interactive.function_map.keys()]
|
| 376 |
+
return gr.CheckboxGroup(
|
| 377 |
+
label='插件',
|
| 378 |
+
value=capabilities,
|
| 379 |
+
choices=capabilities,
|
| 380 |
+
interactive=False,
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
else:
|
| 384 |
+
return gr.CheckboxGroup(
|
| 385 |
+
label='插件',
|
| 386 |
+
value=[],
|
| 387 |
+
choices=[],
|
| 388 |
+
interactive=False,
|
| 389 |
+
)
|
llm/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
llm/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (154 Bytes). View file
|
|
|
llm/__pycache__/oai.cpython-310.pyc
ADDED
|
Binary file (9.96 kB). View file
|
|
|
llm/__pycache__/qwen_dashscope.cpython-310.pyc
ADDED
|
Binary file (4.6 kB). View file
|
|
|
llm/oai.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import copy
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
from http import HTTPStatus
|
| 6 |
+
from pprint import pformat
|
| 7 |
+
from typing import Dict, Iterator, List, Optional, Literal, Union
|
| 8 |
+
|
| 9 |
+
import openai
|
| 10 |
+
|
| 11 |
+
if openai.__version__.startswith('0.'):
|
| 12 |
+
from openai.error import OpenAIError # noqa
|
| 13 |
+
else:
|
| 14 |
+
from openai import OpenAIError
|
| 15 |
+
|
| 16 |
+
from qwen_agent.llm.base import ModelServiceError, register_llm
|
| 17 |
+
from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat
|
| 18 |
+
from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall
|
| 19 |
+
from qwen_agent.log import logger
|
| 20 |
+
import datetime
|
| 21 |
+
def today_date():
|
| 22 |
+
return datetime.date.today().strftime("%Y-%m-%d")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
SYSTEM_PROMPT = """You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.
|
| 26 |
+
|
| 27 |
+
# Tools
|
| 28 |
+
|
| 29 |
+
You may call one or more functions to assist with the user query.
|
| 30 |
+
|
| 31 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 32 |
+
<tools>
|
| 33 |
+
{"type": "function", "function": {"name": "search", "description": "Perform Google web searches then returns a string of the top search results. Accepts multiple queries.", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries."}}, "required": ["query"]}}}
|
| 34 |
+
{"type": "function", "function": {"name": "visit", "description": "Visit webpage(s) and return the summary of the content.", "parameters": {"type": "object", "properties": {"url": {"type": "array", "items": {"type": "string"}, "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs."}, "goal": {"type": "string", "description": "The specific information goal for visiting webpage(s)."}}, "required": ["url", "goal"]}}}
|
| 35 |
+
{"type": "function", "function": {"name": "PythonInterpreter", "description": "Executes Python code in a sandboxed environment. To use this tool, you must follow this format:
|
| 36 |
+
1. The 'arguments' JSON object must be empty: {}.
|
| 37 |
+
2. The Python code to be executed must be placed immediately after the JSON block, enclosed within <code> and </code> tags.
|
| 38 |
+
|
| 39 |
+
IMPORTANT: Any output you want to see MUST be printed to standard output using the print() function.
|
| 40 |
+
|
| 41 |
+
Example of a correct call:
|
| 42 |
+
<tool_call>
|
| 43 |
+
{"name": "PythonInterpreter", "arguments": {}}
|
| 44 |
+
<code>
|
| 45 |
+
import numpy as np
|
| 46 |
+
# Your code here
|
| 47 |
+
print(f"The result is: {np.mean([1,2,3])}")
|
| 48 |
+
</code>
|
| 49 |
+
</tool_call>", "parameters": {"type": "object", "properties": {}, "required": []}}}
|
| 50 |
+
{"type": "function", "function": {"name": "google_scholar", "description": "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries for Google Scholar."}}, "required": ["query"]}}}
|
| 51 |
+
{"type": "function", "function": {"name": "parse_file", "description": "This is a tool that can be used to parse multiple user uploaded local files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3.", "parameters": {"type": "object", "properties": {"files": {"type": "array", "items": {"type": "string"}, "description": "The file name of the user uploaded local files to be parsed."}}, "required": ["files"]}}}
|
| 52 |
+
</tools>
|
| 53 |
+
|
| 54 |
+
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
| 55 |
+
<tool_call>
|
| 56 |
+
{"name": <function-name>, "arguments": <args-json-object>}
|
| 57 |
+
</tool_call>
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
@register_llm('oai')
|
| 62 |
+
class TextChatAtOAI(BaseFnCallModel):
|
| 63 |
+
|
| 64 |
+
def __init__(self, cfg: Optional[Dict] = None):
|
| 65 |
+
super().__init__(cfg)
|
| 66 |
+
self.model = self.model or 'gpt-4o-mini'
|
| 67 |
+
cfg = cfg or {}
|
| 68 |
+
|
| 69 |
+
api_base = cfg.get('api_base')
|
| 70 |
+
api_base = api_base or cfg.get('base_url')
|
| 71 |
+
api_base = api_base or cfg.get('model_server')
|
| 72 |
+
api_base = (api_base or '').strip()
|
| 73 |
+
|
| 74 |
+
api_key = cfg.get('api_key')
|
| 75 |
+
api_key = api_key or os.getenv('OPENAI_API_KEY')
|
| 76 |
+
api_key = (api_key or 'EMPTY').strip()
|
| 77 |
+
|
| 78 |
+
if openai.__version__.startswith('0.'):
|
| 79 |
+
if api_base:
|
| 80 |
+
openai.api_base = api_base
|
| 81 |
+
if api_key:
|
| 82 |
+
openai.api_key = api_key
|
| 83 |
+
self._complete_create = openai.Completion.create
|
| 84 |
+
self._chat_complete_create = openai.ChatCompletion.create
|
| 85 |
+
else:
|
| 86 |
+
api_kwargs = {}
|
| 87 |
+
if api_base:
|
| 88 |
+
api_kwargs['base_url'] = api_base
|
| 89 |
+
if api_key:
|
| 90 |
+
api_kwargs['api_key'] = api_key
|
| 91 |
+
|
| 92 |
+
def _chat_complete_create(*args, **kwargs):
|
| 93 |
+
# OpenAI API v1 does not allow the following args, must pass by extra_body
|
| 94 |
+
extra_params = ['top_k', 'repetition_penalty']
|
| 95 |
+
if any((k in kwargs) for k in extra_params):
|
| 96 |
+
kwargs['extra_body'] = copy.deepcopy(kwargs.get('extra_body', {}))
|
| 97 |
+
for k in extra_params:
|
| 98 |
+
if k in kwargs:
|
| 99 |
+
kwargs['extra_body'][k] = kwargs.pop(k)
|
| 100 |
+
if 'request_timeout' in kwargs:
|
| 101 |
+
kwargs['timeout'] = kwargs.pop('request_timeout')
|
| 102 |
+
|
| 103 |
+
client = openai.OpenAI(**api_kwargs)
|
| 104 |
+
return client.chat.completions.create(*args, **kwargs)
|
| 105 |
+
|
| 106 |
+
def _complete_create(*args, **kwargs):
|
| 107 |
+
# OpenAI API v1 does not allow the following args, must pass by extra_body
|
| 108 |
+
extra_params = ['top_k', 'repetition_penalty']
|
| 109 |
+
if any((k in kwargs) for k in extra_params):
|
| 110 |
+
kwargs['extra_body'] = copy.deepcopy(kwargs.get('extra_body', {}))
|
| 111 |
+
for k in extra_params:
|
| 112 |
+
if k in kwargs:
|
| 113 |
+
kwargs['extra_body'][k] = kwargs.pop(k)
|
| 114 |
+
if 'request_timeout' in kwargs:
|
| 115 |
+
kwargs['timeout'] = kwargs.pop('request_timeout')
|
| 116 |
+
|
| 117 |
+
client = openai.OpenAI(**api_kwargs)
|
| 118 |
+
return client.completions.create(*args, **kwargs)
|
| 119 |
+
|
| 120 |
+
self._complete_create = _complete_create
|
| 121 |
+
self._chat_complete_create = _chat_complete_create
|
| 122 |
+
|
| 123 |
+
def _chat_stream(
|
| 124 |
+
self,
|
| 125 |
+
messages: List[Message],
|
| 126 |
+
delta_stream: bool,
|
| 127 |
+
generate_cfg: dict,
|
| 128 |
+
) -> Iterator[List[Message]]:
|
| 129 |
+
messages = self.convert_messages_to_dicts(messages)
|
| 130 |
+
|
| 131 |
+
try:
|
| 132 |
+
response = self._chat_complete_create(model=self.model, messages=messages, stream=True, **generate_cfg)
|
| 133 |
+
if delta_stream:
|
| 134 |
+
for chunk in response:
|
| 135 |
+
if chunk.choices:
|
| 136 |
+
choice = chunk.choices[0]
|
| 137 |
+
if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
|
| 138 |
+
yield [
|
| 139 |
+
Message(
|
| 140 |
+
role=ASSISTANT,
|
| 141 |
+
content='',
|
| 142 |
+
reasoning_content=choice.delta.reasoning_content
|
| 143 |
+
)
|
| 144 |
+
]
|
| 145 |
+
if hasattr(choice.delta, 'content') and choice.delta.content:
|
| 146 |
+
yield [Message(role=ASSISTANT, content=choice.delta.content, reasoning_content='')]
|
| 147 |
+
# 兼容 map agent 模型
|
| 148 |
+
if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
|
| 149 |
+
function_name = choice.delta.tool_calls[0].function.name
|
| 150 |
+
function_call = {
|
| 151 |
+
'name': function_name,
|
| 152 |
+
'arguments': json.loads(choice.delta.tool_calls[0].function.arguments)
|
| 153 |
+
}
|
| 154 |
+
function_json = json.dumps(function_call, ensure_ascii=False)
|
| 155 |
+
yield [Message(role=ASSISTANT, content=f'<tool_call>{function_json}</tool_call>')]
|
| 156 |
+
logger.info(f'delta_stream message chunk: {chunk}')
|
| 157 |
+
else:
|
| 158 |
+
full_response = ''
|
| 159 |
+
full_reasoning_content = ''
|
| 160 |
+
for chunk in response:
|
| 161 |
+
if chunk.choices:
|
| 162 |
+
choice = chunk.choices[0]
|
| 163 |
+
if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
|
| 164 |
+
full_reasoning_content += choice.delta.reasoning_content
|
| 165 |
+
if hasattr(choice.delta, 'content') and choice.delta.content:
|
| 166 |
+
full_response += choice.delta.content
|
| 167 |
+
# 兼容 map agent 模型
|
| 168 |
+
if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
|
| 169 |
+
function_name = choice.delta.tool_calls[0].function.name
|
| 170 |
+
# function_call = FunctionCall(
|
| 171 |
+
# name=function_name,
|
| 172 |
+
# arguments=choice.delta.tool_calls[0].function.arguments,
|
| 173 |
+
# )
|
| 174 |
+
# yield [Message(role=ASSISTANT, content='', function_call=function_call)]
|
| 175 |
+
function_call = {
|
| 176 |
+
'name': function_name,
|
| 177 |
+
'arguments': json.loads(choice.delta.tool_calls[0].function.arguments)
|
| 178 |
+
}
|
| 179 |
+
function_json = json.dumps(function_call, ensure_ascii=False)
|
| 180 |
+
logger.info(json.dumps(function_call, ensure_ascii=False, indent=4))
|
| 181 |
+
full_response += f'<tool_call>{function_json}</tool_call>'
|
| 182 |
+
yield [Message(role=ASSISTANT, content=full_response, reasoning_content=full_reasoning_content)]
|
| 183 |
+
logger.info(f'message chunk: {chunk}')
|
| 184 |
+
except OpenAIError as ex:
|
| 185 |
+
raise ModelServiceError(exception=ex)
|
| 186 |
+
|
| 187 |
+
def _chat_no_stream(
|
| 188 |
+
self,
|
| 189 |
+
messages: List[Message],
|
| 190 |
+
generate_cfg: dict,
|
| 191 |
+
) -> List[Message]:
|
| 192 |
+
messages = self.convert_messages_to_dicts(messages)
|
| 193 |
+
try:
|
| 194 |
+
response = self._chat_complete_create(model=self.model, messages=messages, stream=False, **generate_cfg)
|
| 195 |
+
if hasattr(response.choices[0].message, 'reasoning_content'):
|
| 196 |
+
return [
|
| 197 |
+
Message(role=ASSISTANT,
|
| 198 |
+
content=response.choices[0].message.content,
|
| 199 |
+
reasoning_content=response.choices[0].message.reasoning_content)
|
| 200 |
+
]
|
| 201 |
+
else:
|
| 202 |
+
return [Message(role=ASSISTANT, content=response.choices[0].message.content)]
|
| 203 |
+
except OpenAIError as ex:
|
| 204 |
+
raise ModelServiceError(exception=ex)
|
| 205 |
+
|
| 206 |
+
def _chat_with_functions(
|
| 207 |
+
self,
|
| 208 |
+
messages: List[Message],
|
| 209 |
+
functions: List[Dict],
|
| 210 |
+
stream: bool,
|
| 211 |
+
delta_stream: bool,
|
| 212 |
+
generate_cfg: dict,
|
| 213 |
+
lang: Literal['en', 'zh'],
|
| 214 |
+
) -> Union[List[Message], Iterator[List[Message]]]:
|
| 215 |
+
# if delta_stream:
|
| 216 |
+
# raise NotImplementedError('Please use stream=True with delta_stream=False, because delta_stream=True'
|
| 217 |
+
# ' is not implemented for function calling due to some technical reasons.')
|
| 218 |
+
generate_cfg = copy.deepcopy(generate_cfg)
|
| 219 |
+
for k in ['parallel_function_calls', 'function_choice', 'thought_in_content']:
|
| 220 |
+
if k in generate_cfg:
|
| 221 |
+
del generate_cfg[k]
|
| 222 |
+
messages = simulate_response_completion_with_chat(messages)
|
| 223 |
+
return self._chat(messages, stream=stream, delta_stream=delta_stream, generate_cfg=generate_cfg)
|
| 224 |
+
|
| 225 |
+
def _chat(
|
| 226 |
+
self,
|
| 227 |
+
messages: List[Union[Message, Dict]],
|
| 228 |
+
stream: bool,
|
| 229 |
+
delta_stream: bool,
|
| 230 |
+
generate_cfg: dict,
|
| 231 |
+
) -> Union[List[Message], Iterator[List[Message]]]:
|
| 232 |
+
if stream:
|
| 233 |
+
return self._chat_stream(messages, delta_stream=delta_stream, generate_cfg=generate_cfg)
|
| 234 |
+
else:
|
| 235 |
+
return self._chat_no_stream(messages, generate_cfg=generate_cfg)
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def convert_messages_to_dicts(messages: List[Message]) -> List[dict]:
|
| 239 |
+
# TODO: Change when the VLLM deployed model needs to pass reasoning_complete.
|
| 240 |
+
# At this time, in order to be compatible with lower versions of vLLM,
|
| 241 |
+
# and reasoning content is currently not useful
|
| 242 |
+
|
| 243 |
+
messages = [msg.model_dump() for msg in messages]
|
| 244 |
+
return_messages = []
|
| 245 |
+
messages[0]["content"] = SYSTEM_PROMPT + "Current date: " + str(today_date())
|
| 246 |
+
for i in messages:
|
| 247 |
+
i["content"] = i["content"].replace("<think>\n<think>\n","<think>\n\n")
|
| 248 |
+
return_messages.append(i)
|
| 249 |
+
if logger.isEnabledFor(logging.DEBUG):
|
| 250 |
+
logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
|
| 251 |
+
return return_messages
|
llm/qwen_dashscope.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from http import HTTPStatus
|
| 3 |
+
from pprint import pformat
|
| 4 |
+
from typing import Dict, Iterator, List, Optional
|
| 5 |
+
|
| 6 |
+
import dashscope
|
| 7 |
+
|
| 8 |
+
from qwen_agent.llm.base import ModelServiceError, register_llm
|
| 9 |
+
from qwen_agent.llm.function_calling import BaseFnCallModel
|
| 10 |
+
from qwen_agent.llm.schema import ASSISTANT, Message
|
| 11 |
+
from qwen_agent.log import logger
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@register_llm('qwen_dashscope')
|
| 15 |
+
class QwenChatAtDS(BaseFnCallModel):
|
| 16 |
+
|
| 17 |
+
def __init__(self, cfg: Optional[Dict] = None):
|
| 18 |
+
super().__init__(cfg)
|
| 19 |
+
self.model = self.model or 'qwen-max'
|
| 20 |
+
initialize_dashscope(cfg)
|
| 21 |
+
|
| 22 |
+
def _chat_stream(
|
| 23 |
+
self,
|
| 24 |
+
messages: List[Message],
|
| 25 |
+
delta_stream: bool,
|
| 26 |
+
generate_cfg: dict,
|
| 27 |
+
) -> Iterator[List[Message]]:
|
| 28 |
+
messages = [msg.model_dump() for msg in messages]
|
| 29 |
+
if messages[-1]['role'] == ASSISTANT:
|
| 30 |
+
messages[-1]['partial'] = True
|
| 31 |
+
logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
|
| 32 |
+
response = dashscope.Generation.call(
|
| 33 |
+
self.model,
|
| 34 |
+
messages=messages, # noqa
|
| 35 |
+
result_format='message',
|
| 36 |
+
stream=True,
|
| 37 |
+
**generate_cfg)
|
| 38 |
+
if delta_stream:
|
| 39 |
+
return self._delta_stream_output(response)
|
| 40 |
+
else:
|
| 41 |
+
return self._full_stream_output(response)
|
| 42 |
+
|
| 43 |
+
def _chat_no_stream(
|
| 44 |
+
self,
|
| 45 |
+
messages: List[Message],
|
| 46 |
+
generate_cfg: dict,
|
| 47 |
+
) -> List[Message]:
|
| 48 |
+
messages = [msg.model_dump() for msg in messages]
|
| 49 |
+
if messages[-1]['role'] == ASSISTANT:
|
| 50 |
+
messages[-1]['partial'] = True
|
| 51 |
+
logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
|
| 52 |
+
response = dashscope.Generation.call(
|
| 53 |
+
self.model,
|
| 54 |
+
messages=messages, # noqa
|
| 55 |
+
result_format='message',
|
| 56 |
+
stream=False,
|
| 57 |
+
**generate_cfg)
|
| 58 |
+
if response.status_code == HTTPStatus.OK:
|
| 59 |
+
return [
|
| 60 |
+
Message(role=ASSISTANT,
|
| 61 |
+
content=response.output.choices[0].message.content,
|
| 62 |
+
reasoning_content=response.output.choices[0].message.get('reasoning_content', ''),
|
| 63 |
+
extra={'model_service_info': response})
|
| 64 |
+
]
|
| 65 |
+
else:
|
| 66 |
+
raise ModelServiceError(code=response.code,
|
| 67 |
+
message=response.message,
|
| 68 |
+
extra={'model_service_info': response})
|
| 69 |
+
|
| 70 |
+
def _continue_assistant_response(
|
| 71 |
+
self,
|
| 72 |
+
messages: List[Message],
|
| 73 |
+
generate_cfg: dict,
|
| 74 |
+
stream: bool,
|
| 75 |
+
) -> Iterator[List[Message]]:
|
| 76 |
+
return self._chat(messages, stream=stream, delta_stream=False, generate_cfg=generate_cfg)
|
| 77 |
+
|
| 78 |
+
@staticmethod
|
| 79 |
+
def _delta_stream_output(response) -> Iterator[List[Message]]:
|
| 80 |
+
for chunk in response:
|
| 81 |
+
if chunk.status_code == HTTPStatus.OK:
|
| 82 |
+
yield [
|
| 83 |
+
Message(role=ASSISTANT,
|
| 84 |
+
content=chunk.output.choices[0].message.content,
|
| 85 |
+
reasoning_content=chunk.output.choices[0].message.reasoning_content,
|
| 86 |
+
extra={'model_service_info': chunk})
|
| 87 |
+
]
|
| 88 |
+
else:
|
| 89 |
+
raise ModelServiceError(code=chunk.code, message=chunk.message, extra={'model_service_info': chunk})
|
| 90 |
+
|
| 91 |
+
@staticmethod
|
| 92 |
+
def _full_stream_output(response) -> Iterator[List[Message]]:
|
| 93 |
+
full_content = ''
|
| 94 |
+
full_reasoning_content = ''
|
| 95 |
+
for chunk in response:
|
| 96 |
+
if chunk.status_code == HTTPStatus.OK:
|
| 97 |
+
if chunk.output.choices[0].message.get('reasoning_content', ''):
|
| 98 |
+
full_reasoning_content += chunk.output.choices[0].message.reasoning_content
|
| 99 |
+
if chunk.output.choices[0].message.content:
|
| 100 |
+
full_content += chunk.output.choices[0].message.content
|
| 101 |
+
yield [
|
| 102 |
+
Message(role=ASSISTANT,
|
| 103 |
+
content=full_content,
|
| 104 |
+
reasoning_content=full_reasoning_content,
|
| 105 |
+
extra={'model_service_info': chunk})
|
| 106 |
+
]
|
| 107 |
+
else:
|
| 108 |
+
raise ModelServiceError(code=chunk.code, message=chunk.message, extra={'model_service_info': chunk})
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def initialize_dashscope(cfg: Optional[Dict] = None) -> None:
|
| 112 |
+
cfg = cfg or {}
|
| 113 |
+
|
| 114 |
+
api_key = cfg.get('api_key', '')
|
| 115 |
+
base_http_api_url = cfg.get('base_http_api_url', None)
|
| 116 |
+
base_websocket_api_url = cfg.get('base_websocket_api_url', None)
|
| 117 |
+
|
| 118 |
+
if not api_key:
|
| 119 |
+
api_key = os.getenv('DASHSCOPE_API_KEY', 'EMPTY')
|
| 120 |
+
if not base_http_api_url:
|
| 121 |
+
base_http_api_url = os.getenv('DASHSCOPE_HTTP_URL', None)
|
| 122 |
+
if not base_websocket_api_url:
|
| 123 |
+
base_websocket_api_url = os.getenv('DASHSCOPE_WEBSOCKET_URL', None)
|
| 124 |
+
|
| 125 |
+
api_key = api_key.strip()
|
| 126 |
+
if api_key in ('', 'EMPTY'):
|
| 127 |
+
if dashscope.api_key is None or dashscope.api_key in ('', 'EMPTY'):
|
| 128 |
+
logger.warning('No valid dashscope api_key found in cfg, environment variable `DASHSCOPE_API_KEY` or dashscope.api_key, the model call may raise errors.')
|
| 129 |
+
else:
|
| 130 |
+
logger.info('No dashscope api_key found in cfg, using the dashscope.api_key that has already been set.')
|
| 131 |
+
else: # valid api_key
|
| 132 |
+
if api_key != dashscope.api_key:
|
| 133 |
+
logger.info('Setting the dashscope api_key.')
|
| 134 |
+
dashscope.api_key = api_key
|
| 135 |
+
# or do nothing since both keys are the same
|
| 136 |
+
|
| 137 |
+
if base_http_api_url is not None:
|
| 138 |
+
dashscope.base_http_api_url = base_http_api_url.strip()
|
| 139 |
+
if base_websocket_api_url is not None:
|
| 140 |
+
dashscope.base_websocket_api_url = base_websocket_api_url.strip()
|
messages.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"role": "system",
|
| 4 |
+
"content": "You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"search\", \"description\": \"Perform Google web searches then returns a string of the top search results. Accepts multiple queries.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"description\": \"The search query.\"}, \"minItems\": 1, \"description\": \"The list of search queries.\"}}, \"required\": [\"query\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"visit\", \"description\": \"Visit webpage(s) and return the summary of the content.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"url\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs.\"}, \"goal\": {\"type\": \"string\", \"description\": \"The specific information goal for visiting webpage(s).\"}}, \"required\": [\"url\", \"goal\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"PythonInterpreter\", \"description\": \"Executes Python code in a sandboxed environment. To use this tool, you must follow this format:\n1. The 'arguments' JSON object must be empty: {}.\n2. The Python code to be executed must be placed immediately after the JSON block, enclosed within <code> and </code> tags.\n\nIMPORTANT: Any output you want to see MUST be printed to standard output using the print() function.\n\nExample of a correct call:\n<tool_call>\n{\"name\": \"PythonInterpreter\", \"arguments\": {}}\n<code>\nimport numpy as np\n# Your code here\nprint(f\"The result is: {np.mean([1,2,3])}\")\n</code>\n</tool_call>\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}}\n{\"type\": \"function\", \"function\": {\"name\": \"google_scholar\", \"description\": \"Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"description\": \"The search query.\"}, \"minItems\": 1, \"description\": \"The list of search queries for Google Scholar.\"}}, \"required\": [\"query\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"parse_file\", \"description\": \"This is a tool that can be used to parse multiple user uploaded local files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"files\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"The file name of the user uploaded local files to be parsed.\"}}, \"required\": [\"files\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n\nCurrent date: 2025-09-19"
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"role": "user",
|
| 8 |
+
"content": "When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"role": "assistant",
|
| 12 |
+
"content": "<think>\n\nThe user asks: \"When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?\" So we need to find:\n\n- ACL 2025 (Association for Computational Linguistics) conference. Specifically the Industry Track submission deadline date.\n- Venue address for the conference (likely the location, city, country; maybe specific venue address like a convention center).\n\nWe need to browse the web to locate official information about ACL 2025 (maybe it hasn't happened yet). Possibly the site is something like https://2025.aclweb.org or similar.\n\nSearch queries could be:\n\n- \"ACL 2025 industry track submission deadline\"\n- \"ACL 2025 conference venue address\"\n- \"ACL 2025 location\"\n- \"ACL 2025 main conference venue\"\n- \"ACL 2025 call for papers industry track\"\n\nWe will search using the functions.search tool.\n</think>\n<tool_call>\n{\"name\": \"search\", \"arguments\": {\"query\": [\"ACL 2025 industry track submission deadline\", \"ACL 2025 conference venue address\", \"ACL 2025 location\", \"ACL 2025 call for papers industry track\", \"ACL 2025 conference details\"]}}\n</tool_call>"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"role": "user",
|
| 16 |
+
"content": "<tool_response>\nA Google search for 'ACL 2025 industry track submission deadline' found 10 results:\n\n## Web Results\n1. [Industry Track](https://2025.aclweb.org/calls/industry_track/)\n\nNote: All deadlines are 11:59PM UTC-12:00 (anywhere on Earth). Following the ACL and ARR Policies for Review and Citation, updated in early 2024, there is no ...\n\n2. [ACL 2025 Industry Track](https://openreview.net/group?id=aclweb.org/ACL/2025/Industry_Track)\nDate published: Jul 28, 2025\n\nPlease see the venue website for more information. Submission Start: Jan 27 2025 12:00AM UTC-0, Submission Deadline: Mar 22 2025 12:59PM UTC-0.\n\n3. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nCall for Industry Track is out. Oct 7, 2024, First call for papers is out. Sep ... Commitment deadline for ACL 2025, April 20, 2025. Notification of ...\n\n4. [Call for Industry Track Papers](https://2025.naacl.org/calls/industry/)\n\nAll deadlines are 11:59PM UTC-12:00 (“anywhere on Earth”). Paper submission deadline, December 2, 2024 December 7, 2024. Notification of acceptance, February 5, ...\n\n5. [Call for Industry Track Papers](https://2025.emnlp.org/calls/industry_track/)\n\nImportant Dates · Paper submission deadline: July 4, 2025 · Reviewer-author discussion period: August 28 – September 8, 2025 · Notification: September 24, 2025 ...\n\n6. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1/)\nDate published: by G Rehm · 2025\n\nFor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025), it was decided once again to organise a dedicated Industry Track.\n\n7. [The 2025 Conference on Empirical Methods in Natural ...](https://2025.emnlp.org/)\n\nReview submission deadline, June 18, 2025 ; Author response and author-reviewer discussion, June 26 - July 2, 2025 ; Commitment deadline, August 1, 2025.\n\n8. [Acl 2025 deadline](https://assets-global.website-files.com/683fdb5e8008a99e5b00140f/6854688e54c673b87dfbc2c6_nukanavowazix.pdf)\nDate published: Jun 29, 2025\n\nAuthors of accepted papers must submit pre-recorded video presentations, poster PDFs and/or slides. CL and TACL authors also need to upload ...\n\n9. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\n... submission deadline: Oct 6, 2025 • EACL commitment deadline: Dec 14, 2025. A tall building with intricate geometric patterns on its facade, featuring arched ...\n\n10. [ACL 2025 The 63rd Annual Meeting of the Association for ...](https://aclanthology.org/2025.acl-industry.0.pdf)\nDate published: by G Rehm · 2025\n\n... Industry Track submissions as well as the reviewers and area chairs for their hard and dedicated work under very tight deadlines. We would ...\n=======\nA Google search for 'ACL 2025 conference venue address' found 10 results:\n\n## Web Results\n1. [Venue](https://2025.aclweb.org/venue/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right ...\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n3. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n4. [ACL 2025 Conference](https://openreview.net/group?id=aclweb.org/ACL/2025/Conference)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics. ACL 2025. Vienna, AustriaJul 27 2025https://2025.aclweb.org/[email protected].\n\n5. [ACL 2025 in Vienna, Austria](https://scads.ai/acl-2025-in-vienna-austria/)\nDate published: Aug 5, 2025\n\nFrom July 27 to August 1 the 63rd Annual Meeting of the Association for Computational Linguistics ACL 2025 took place in Vienna, Austria.\n\n6. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nJoin us for our 30th anniversary conference , November 4–9, 2025! Early registration ends on October 6, 2025. Space is limited, so register soon to ...\n\n7. [ACL 2025 : The 63rd Annual Meeting of the Association for ...](http://www.wikicfp.com/cfp/servlet/event.showcfp?copyownerid=90704&eventid=183095)\n\nACL 2025 Call for Papers Main Conference ACL 2025. Website: https://2025.aclweb.org/ Submission Deadline: February 15, 2025. Conference Dates: July 27 to ...\n\n8. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\n2025 Annual Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics Albuquerque, New Mexico April 29–May 4, 2025\n\n9. [Annual Meeting of the Association for Computational ...](https://aclanthology.org/events/acl-2025/)\n\nAnnual Meeting of the Association for Computational Linguistics (2025) ... address the lack of ground truth explanations to train the model, we propose ...\n\n10. [IBM at ACL 2025 - Vienna, Austria and virtual](https://research.ibm.com/events/acl-2025)\nDate published: Jul 27, 2025\n\nThis event has ended. About. IBM is proud to sponsor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025).\n=======\nA Google search for 'ACL 2025 location' found 10 results:\n\n## Web Results\n1. [Austin City Limits Music Festival | Austin, TX | October 3-5 & 10 ...](https://www.aclfestival.com/)\n\nAustin City Limits Music Festival will be presented at Zilker Park, in Austin, TX on October 3-5 & 10-12, 2025. Stay connected for upcoming announcements ...\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n3. [Venue](https://2025.aclweb.org/venue/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right ...\n\n4. [Tickets | Austin City Limits Music Festival](https://www.aclfestival.com/tickets)\n\nBuy tickets for Austin City Limits Music Festival for October 3-5 & 10-12, 2025.\n\n5. [Austin City Limits Music Festival | Tickets, Dates & More](https://www.austintexas.org/events/acl-fest/)\n\nPlan your ACL 2025 visit! Get tickets, VIP passes, hotel options, and learn about the best activities for an unforgettable experience in Austin.\n\n6. [Austin City Limits Music Festival 2025 guide: tickets, tips ...](https://www.statesman.com/entertainment/music/article/acl-fest-guide-lineup-schedule-zilker-rules-21040936.php)\nDate published: 3 days ago\n\nACL Fest runs Oct. 3-5 and Oct. 10-12 in Zilker Park, 2100 Barton Springs Road. Gates open at noon and close at 10 p.m. on Friday, Saturday and ...\n\n7. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nWelcome to Vienna! What an incredible week it has been here in Vienna for the main conference!\n\n8. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n9. [Festival Map](https://support.aclfestival.com/hc/en-us/articles/4405399774484-Festival-Map)\n\nWhat are the dates of ACL Festival 2025? Where is the festival located? Can I leave and re-enter on the same day? Can I check my chair at ...\n\n10. [What are the dates of ACL Festival 2025?](https://support.aclfestival.com/hc/en-us/articles/4405406283924-What-are-the-dates-of-ACL-Festival-2025)\nDate published: Aug 27, 2025\n\nAustin City Limits Music Festival will take place on October 3-5 and October 10-12, 2025. Sign-up for our SMS list to be the first to know ...\n=======\nA Google search for 'ACL 2025 call for papers industry track' found 10 results:\n\n## Web Results\n1. [Industry Track](https://2025.aclweb.org/calls/industry_track/)\n\nThis track provides an opportunity to highlight the key insights and new research challenges that arise from real world implementations.\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nCall for System Demonstrations is out. Jan 20, 2025, Second call for papers is out. Dec 3, 2024, Call for Industry Track is out. Oct 7, 2024, First call for ...\n\n3. [ACL 2025 Industry Track](https://openreview.net/group?id=aclweb.org/ACL/2025/Industry_Track)\n\nPlease see the venue website for more information. Submission Start: Jan 27 2025 12:00AM UTC-0, Submission Deadline: Mar 22 2025 12:59PM UTC-0.\n\n4. [Call for Industry Track Papers](https://2025.naacl.org/calls/industry/)\n\nThe industry track welcomes submissions describing innovations and implementations of applications in all areas of speech and natural language processing.\n\n5. [ACL Industry track 2025 decisions : r/airesearch](https://www.reddit.com/r/airesearch/comments/1ki8ohz/acl_industry_track_2025_decisions/)\nDate published: 4 months ago\n\nACL indutry track reviews will be out soon. This is a thread for all types of discussions.\n\n6. [Call for Industry Track Papers](https://2025.emnlp.org/calls/industry_track/)\n\nThe EMNLP 2025 Industry Track provides the opportunity to highlight the key insights and new research challenges that arise from the development and deployment ...\n\n7. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1/)\nDate published: by G Rehm · 2025\n\nFor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025), it was decided once again to organise a dedicated Industry Track.\n\n8. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\nCall for industry track papers is out. Oct 01, 2024, Second call for papers is out. Sep 10, 2024, The ACL-NAACL-EMNLP joint call for tutorials is out. Sep 10 ...\n\n9. [Call for Industry Track](https://coling2025.org/calls/industry_track/)\n\nTopics of InterestPermalink. The purpose of this track is to showcase key insights and emerging research challenges emanating from practical applications.\n\n10. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1.pdf)\nDate published: by G Rehm · 2025\n\n3 Submissions and Results. The call for Industry Track papers attracted an un- precedented number of 421 paper submissions. A total of 453 ...\n=======\nA Google search for 'ACL 2025 conference details' found 10 results:\n\n## Web Results\n1. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nImportant Dates ; Camera-ready papers due, May 30, 2025 ; Overall Conference, July 27 to August 1st, 2025 ; Tutorials & Welcome Reception, July 27, 2025 ; Main ...\n\n2. [ACL 2025 Conference](https://openreview.net/group?id=aclweb.org/ACL/2025/Conference)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics. ACL 2025. Vienna, AustriaJul 27 2025https://2025.aclweb.org/[email protected].\n\n3. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nWe look forward to seeing you at #ACL2026 in San Diego! ACL SRW 2025 updates the program schedule: acl2025-srw.github.io/schedule. Welcome to Vienna! What an ...\n\n4. [Annual Meeting of the Association for Computational ...](https://aclanthology.org/events/acl-2025/)\n\n... ACL 2025 1388 papers; Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025) 29 papers; Proceedings of the 2nd Workshop on ...\n\n5. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\nSubmission Deadline (via ACL Rolling Review), October 15, 2024 ; Welcome Reception, Tuesday, April 29, 2025 ; Main Conference, Wednesday–Friday, April 30–May 2, ...\n\n6. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n7. [2025 ACL Conference Registration](https://acl.swoogo.com/acl2025)\n\n2025 ACL Conference Registration July 27 - August 1, 2025. Early Registration: Ends Wednesday, July 2, 2025. AOE Late Registration: Ends for In-Person and ...\n\n8. [ACL 2025 The 63rd Annual Meeting of the Association for ...](https://aclanthology.org/2025.acl-long.0.pdf)\nDate published: by W Che · 2025\n\nACL 2025 hosts a rich set of 28 workshops and 8 tutorials, thanks to the dedicated efforts of our Wo- rkshop Chairs — Terra Blevins and ...\n\n9. [Dates and Venues](http://aclrollingreview.org/dates)\nDate published: May 19, 2025\n\nThis table lists all the key dates, by which the authors may need to take action, for 2025 cycles following the new schedule.\n\n10. [ACL 2025 : The 63rd Annual Meeting of the Association for ...](http://www.wikicfp.com/cfp/servlet/event.showcfp?copyownerid=90704&eventid=183095)\n\nConference July 28 - 30, 2025. Workshops July 31 - August 1, 2025. Note: All deadlines are 11:59PM UTC-12:00 (“anywhere on Earth”). Paper Submission Details\n</tool_response>"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"role": "assistant",
|
| 20 |
+
"content": "<think>\n\nWe have search results. The relevant ones for submission deadline of Industry Track:\n\n- #1: \"Industry Track\" page at https://2025.aclweb.org/calls/industry_track/. It might have the exact deadline.\n\n- #2: OpenReview group for ACL 2025 Industry Track shows \"Submission Deadline: Mar 22 2025 12:59PM UTC-0\".\n\nBut we need to verify which is correct. Could be that the industry track call has a deadline in March (like March 22, 2025). But we also see other info from different sources showing deadlines like December 2/7, 2024 for NAACL (not relevant). The question is specifically \"paper submission deadline for the ACL 2025 Industry Track\". We should check the official ACL 2025 website's calls/industry_track page.\n\nLet's visit that URL.\n</think>\n<tool_call>\n{\"name\": \"visit\", \"arguments\": {\"url\": [\"https://2025.aclweb.org/calls/industry_track/\"], \"goal\": \"Find the paper submission deadline date for the ACL 2025 Industry Track.\"}}\n</tool_call>"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"role": "user",
|
| 24 |
+
"content": "<tool_response>\nThe useful information in https://2025.aclweb.org/calls/industry_track/ for user goal Find the paper submission deadline date for the ACL 2025 Industry Track. as follows: \n\nEvidence in page: \nIMPORTANT DATES\n---------------\n\nPaper submission deadline: 21 March 2025\n\nNotification of acceptance: 09 May 2025\n\nCamera-ready version of papers due: 23 May 2025\n\nMain conference: 28-30 July 2025\n\nNote: All deadlines are 11:59PM UTC-12:00 (anywhere on Earth).\n\nSummary: \nThe paper submission deadline for the ACL 2025 Industry Track is 21 March 2025, at 11:59 PM UTC-12:00 (anywhere on Earth). This deadline is explicitly stated in the 'IMPORTANT DATES' section of the call for papers, which also outlines subsequent key dates including notification of acceptance (9 May 2025), camera-ready version submission (23 May 2025), and the main conference dates (28–30 July 2025).\n</tool_response>"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"role": "assistant",
|
| 28 |
+
"content": "<think>\n\nGreat. The page shows \"Paper submission deadline: 21 March 2025\". However, earlier the OpenReview group indicated March 22 2025 12:59PM UTC-0. The official website is more authoritative.\n\nNow we need venue address for the conference. Search results show a \"Venue\" page at https://2025.aclweb.org/venue/. Let's visit that.\n</think>\n<tool_call>\n{\"name\": \"visit\", \"arguments\": {\"url\": [\"https://2025.aclweb.org/venue/\"], \"goal\": \"Find the venue address for the ACL 2025 conference.\"}}\n</tool_call>"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"role": "user",
|
| 32 |
+
"content": "<tool_response>\nThe useful information in https://2025.aclweb.org/venue/ for user goal Find the venue address for the ACL 2025 conference. as follows: \n\nEvidence in page: \nConference Venue\n----------------\n\nAustria Center Vienna Bruno-Kreisky-Platz 1, 1220 Wien, Austria\n\n +43 1 260690\n\n[https://www.acv.at/en/](https://www.acv.at/en/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right next door to a UN headquarters. Austria’s largest congress center with 21 halls, 134 meeting rooms and 5 exhibition halls on 5 levels, with room for up to 22,800 people.\n\nSummary: \nThe venue for the ACL 2025 conference is the Austria Center Vienna, located at Bruno-Kreisky-Platz 1, 1220 Wien, Austria. This major congress center is situated in the heart of Europe, adjacent to a UN headquarters, and features 21 halls, 134 meeting rooms, and 5 exhibition halls, capable of accommodating up to 22,800 attendees.\n</tool_response>"
|
| 33 |
+
}
|
| 34 |
+
]
|
prompt.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
EXTRACTOR_PROMPT = """Please process the following webpage content and user goal to extract relevant information:
|
| 2 |
+
|
| 3 |
+
## **Webpage Content**
|
| 4 |
+
{webpage_content}
|
| 5 |
+
|
| 6 |
+
## **User Goal**
|
| 7 |
+
{goal}
|
| 8 |
+
|
| 9 |
+
## **Task Guidelines**
|
| 10 |
+
1. **Content Scanning for Rational**: Locate the **specific sections/data** directly related to the user's goal within the webpage content
|
| 11 |
+
2. **Key Extraction for Evidence**: Identify and extract the **most relevant information** from the content, you never miss any important information, output the **full original context** of the content as far as possible, it can be more than three paragraphs.
|
| 12 |
+
3. **Summary Output for Summary**: Organize into a concise paragraph with logical flow, prioritizing clarity and judge the contribution of the information to the goal.
|
| 13 |
+
|
| 14 |
+
**Final Output Format using JSON format has "rational", "evidence", "summary" feilds**
|
| 15 |
+
"""
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
qwen-agent[gui,rag,code_interpreter,mcp]
|
| 2 |
+
sandbox_fusion
|
scholar.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import requests
|
| 4 |
+
from typing import Union, List
|
| 5 |
+
from qwen_agent.tools.base import BaseTool, register_tool
|
| 6 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 7 |
+
import http.client
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
SERPER_KEY=os.environ.get('SERPER_KEY_ID')
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@register_tool("google_scholar", allow_overwrite=True)
|
| 14 |
+
class Scholar(BaseTool):
|
| 15 |
+
name = "google_scholar"
|
| 16 |
+
description = "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries."
|
| 17 |
+
parameters = {
|
| 18 |
+
"type": "object",
|
| 19 |
+
"properties": {
|
| 20 |
+
"query": {
|
| 21 |
+
"type": "array",
|
| 22 |
+
"items": {"type": "string", "description": "The search query."},
|
| 23 |
+
"minItems": 1,
|
| 24 |
+
"description": "The list of search queries for Google Scholar."
|
| 25 |
+
},
|
| 26 |
+
},
|
| 27 |
+
"required": ["query"],
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def google_scholar_with_serp(self, query: str):
|
| 31 |
+
conn = http.client.HTTPSConnection("google.serper.dev")
|
| 32 |
+
payload = json.dumps({
|
| 33 |
+
"q": query,
|
| 34 |
+
})
|
| 35 |
+
headers = {
|
| 36 |
+
'X-API-KEY': SERPER_KEY,
|
| 37 |
+
'Content-Type': 'application/json'
|
| 38 |
+
}
|
| 39 |
+
for i in range(5):
|
| 40 |
+
try:
|
| 41 |
+
conn.request("POST", "/scholar", payload, headers)
|
| 42 |
+
res = conn.getresponse()
|
| 43 |
+
break
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(e)
|
| 46 |
+
if i == 4:
|
| 47 |
+
return f"Google Scholar Timeout, return None, Please try again later."
|
| 48 |
+
continue
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
data = res.read()
|
| 52 |
+
|
| 53 |
+
results = json.loads(data.decode("utf-8"))
|
| 54 |
+
try:
|
| 55 |
+
if "organic" not in results:
|
| 56 |
+
raise Exception(f"No results found for query: '{query}'. Use a less specific query.")
|
| 57 |
+
|
| 58 |
+
web_snippets = list()
|
| 59 |
+
idx = 0
|
| 60 |
+
if "organic" in results:
|
| 61 |
+
for page in results["organic"]:
|
| 62 |
+
idx += 1
|
| 63 |
+
date_published = ""
|
| 64 |
+
if "year" in page:
|
| 65 |
+
date_published = "\nDate published: " + str(page["year"])
|
| 66 |
+
|
| 67 |
+
publicationInfo = ""
|
| 68 |
+
if "publicationInfo" in page:
|
| 69 |
+
publicationInfo = "\npublicationInfo: " + page["publicationInfo"]
|
| 70 |
+
|
| 71 |
+
snippet = ""
|
| 72 |
+
if "snippet" in page:
|
| 73 |
+
snippet = "\n" + page["snippet"]
|
| 74 |
+
|
| 75 |
+
link_info = "no available link"
|
| 76 |
+
if "pdfUrl" in page:
|
| 77 |
+
link_info = "pdfUrl: " + page["pdfUrl"]
|
| 78 |
+
|
| 79 |
+
citedBy = ""
|
| 80 |
+
if "citedBy" in page:
|
| 81 |
+
citedBy = "\ncitedBy: " + str(page["citedBy"])
|
| 82 |
+
|
| 83 |
+
redacted_version = f"{idx}. [{page['title']}]({link_info}){publicationInfo}{date_published}{citedBy}\n{snippet}"
|
| 84 |
+
|
| 85 |
+
redacted_version = redacted_version.replace("Your browser can't play this video.", "")
|
| 86 |
+
web_snippets.append(redacted_version)
|
| 87 |
+
|
| 88 |
+
content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
|
| 89 |
+
return content
|
| 90 |
+
except:
|
| 91 |
+
return f"No results found for '{query}'. Try with a more general query."
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def call(self, params: Union[str, dict], **kwargs) -> str:
|
| 95 |
+
# assert GOOGLE_SEARCH_KEY is not None, "Please set the IDEALAB_SEARCH_KEY environment variable."
|
| 96 |
+
try:
|
| 97 |
+
params = json.loads(params)
|
| 98 |
+
params = self._verify_json_format_args(params)
|
| 99 |
+
query = params["query"]
|
| 100 |
+
except:
|
| 101 |
+
return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field"
|
| 102 |
+
|
| 103 |
+
if isinstance(query, str):
|
| 104 |
+
response = self.google_scholar_with_serp(query)
|
| 105 |
+
else:
|
| 106 |
+
assert isinstance(query, List)
|
| 107 |
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
| 108 |
+
|
| 109 |
+
response = list(executor.map(self.google_scholar_with_serp, query))
|
| 110 |
+
response = "\n=======\n".join(response)
|
| 111 |
+
return response
|
search.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 3 |
+
from typing import List, Union
|
| 4 |
+
import requests
|
| 5 |
+
from qwen_agent.tools.base import BaseTool, register_tool
|
| 6 |
+
import asyncio
|
| 7 |
+
from typing import Dict, List, Optional, Union
|
| 8 |
+
import uuid
|
| 9 |
+
import http.client
|
| 10 |
+
import json
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
SERPER_KEY=os.environ.get('SERPER_KEY_ID')
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@register_tool("search", allow_overwrite=True)
|
| 19 |
+
class Search(BaseTool):
|
| 20 |
+
name = "search"
|
| 21 |
+
description = "Performs batched web searches: supply an array 'query'; the tool retrieves the top 10 results for each query in one call."
|
| 22 |
+
parameters = {
|
| 23 |
+
"type": "object",
|
| 24 |
+
"properties": {
|
| 25 |
+
"query": {
|
| 26 |
+
"type": "array",
|
| 27 |
+
"items": {
|
| 28 |
+
"type": "string"
|
| 29 |
+
},
|
| 30 |
+
"description": "Array of query strings. Include multiple complementary search queries in a single call."
|
| 31 |
+
},
|
| 32 |
+
},
|
| 33 |
+
"required": ["query"],
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
def __init__(self, cfg: Optional[dict] = None):
|
| 37 |
+
super().__init__(cfg)
|
| 38 |
+
def google_search_with_serp(self, query: str):
|
| 39 |
+
def contains_chinese_basic(text: str) -> bool:
|
| 40 |
+
return any('\u4E00' <= char <= '\u9FFF' for char in text)
|
| 41 |
+
conn = http.client.HTTPSConnection("google.serper.dev")
|
| 42 |
+
if contains_chinese_basic(query):
|
| 43 |
+
payload = json.dumps({
|
| 44 |
+
"q": query,
|
| 45 |
+
"location": "China",
|
| 46 |
+
"gl": "cn",
|
| 47 |
+
"hl": "zh-cn"
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
payload = json.dumps({
|
| 52 |
+
"q": query,
|
| 53 |
+
"location": "United States",
|
| 54 |
+
"gl": "us",
|
| 55 |
+
"hl": "en"
|
| 56 |
+
})
|
| 57 |
+
headers = {
|
| 58 |
+
'X-API-KEY': SERPER_KEY,
|
| 59 |
+
'Content-Type': 'application/json'
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
for i in range(5):
|
| 64 |
+
try:
|
| 65 |
+
conn.request("POST", "/search", payload, headers)
|
| 66 |
+
res = conn.getresponse()
|
| 67 |
+
break
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(e)
|
| 70 |
+
if i == 4:
|
| 71 |
+
return f"Google search Timeout, return None, Please try again later."
|
| 72 |
+
continue
|
| 73 |
+
|
| 74 |
+
data = res.read()
|
| 75 |
+
results = json.loads(data.decode("utf-8"))
|
| 76 |
+
print(results)
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
if "organic" not in results:
|
| 80 |
+
raise Exception(f"No results found for query: '{query}'. Use a less specific query.")
|
| 81 |
+
|
| 82 |
+
web_snippets = list()
|
| 83 |
+
idx = 0
|
| 84 |
+
if "organic" in results:
|
| 85 |
+
for page in results["organic"]:
|
| 86 |
+
idx += 1
|
| 87 |
+
date_published = ""
|
| 88 |
+
if "date" in page:
|
| 89 |
+
date_published = "\nDate published: " + page["date"]
|
| 90 |
+
|
| 91 |
+
source = ""
|
| 92 |
+
if "source" in page:
|
| 93 |
+
source = "\nSource: " + page["source"]
|
| 94 |
+
|
| 95 |
+
snippet = ""
|
| 96 |
+
if "snippet" in page:
|
| 97 |
+
snippet = "\n" + page["snippet"]
|
| 98 |
+
|
| 99 |
+
redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
|
| 100 |
+
redacted_version = redacted_version.replace("Your browser can't play this video.", "")
|
| 101 |
+
web_snippets.append(redacted_version)
|
| 102 |
+
|
| 103 |
+
content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
|
| 104 |
+
return content
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(e)
|
| 107 |
+
return f"No results found for '{query}'. Try with a more general query."
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def search_with_serp(self, query: str):
|
| 112 |
+
result = self.google_search_with_serp(query)
|
| 113 |
+
return result
|
| 114 |
+
|
| 115 |
+
def call(self, params: Union[str, dict], **kwargs) -> str:
|
| 116 |
+
try:
|
| 117 |
+
print(params)
|
| 118 |
+
params = json.loads(params)
|
| 119 |
+
print(params)
|
| 120 |
+
query = params["query"]
|
| 121 |
+
print("query:\n", query)
|
| 122 |
+
except:
|
| 123 |
+
return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
|
| 124 |
+
|
| 125 |
+
if isinstance(query, str):
|
| 126 |
+
# 单个查询
|
| 127 |
+
response = self.search_with_serp(query)
|
| 128 |
+
else:
|
| 129 |
+
# 多个查询
|
| 130 |
+
assert isinstance(query, List)
|
| 131 |
+
responses = []
|
| 132 |
+
for q in query:
|
| 133 |
+
responses.append(self.search_with_serp(q))
|
| 134 |
+
response = "\n=======\n".join(responses)
|
| 135 |
+
|
| 136 |
+
return response
|
tool_python.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import Dict, List, Optional, Union
|
| 3 |
+
import json5
|
| 4 |
+
from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
|
| 5 |
+
from qwen_agent.utils.utils import extract_code
|
| 6 |
+
from sandbox_fusion import run_code, RunCodeRequest, RunStatus
|
| 7 |
+
from requests.exceptions import Timeout
|
| 8 |
+
import os
|
| 9 |
+
import random
|
| 10 |
+
import time
|
| 11 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 12 |
+
|
| 13 |
+
SANDBOX_URL=os.getenv('SANDBOX_URL','')
|
| 14 |
+
SANDBOX_FUSION_ENDPOINTS = [
|
| 15 |
+
SANDBOX_URL
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
# Fallback to single endpoint if environment variable exists
|
| 19 |
+
if 'SANDBOX_FUSION_ENDPOINT' in os.environ:
|
| 20 |
+
SANDBOX_FUSION_ENDPOINTS = os.environ['SANDBOX_FUSION_ENDPOINT'].split(',')
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@register_tool('PythonInterpreter', allow_overwrite=True)
|
| 24 |
+
class PythonInterpreter(BaseToolWithFileAccess):
|
| 25 |
+
name = "PythonInterpreter"
|
| 26 |
+
description = 'Execute Python code in a sandboxed environment. Use this to run Python code and get the execution results.\n**Make sure to use print() for any output you want to see in the results.**\nFor code parameters, use placeholders first, and then put the code within <code></code> XML tags, such as:\n<tool_call>\n{"purpose": <detailed-purpose-of-this-tool-call>, "name": <tool-name>, "arguments": {"code": ""}}\n<code>\nHere is the code.\n</code>\n</tool_call>\n'
|
| 27 |
+
|
| 28 |
+
parameters = {
|
| 29 |
+
"type": "object",
|
| 30 |
+
"properties": {
|
| 31 |
+
"code": {
|
| 32 |
+
"type": "string",
|
| 33 |
+
"description": "The Python code to execute. Must be provided within <code></code> XML tags. Remember to use print() statements for any output you want to see.",
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"required": ["code"],
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def __init__(self, cfg: Optional[Dict] = None):
|
| 40 |
+
super().__init__(cfg)
|
| 41 |
+
# self.summary_mapping = SummaryMapping()
|
| 42 |
+
|
| 43 |
+
@property
|
| 44 |
+
def args_format(self) -> str:
|
| 45 |
+
fmt = self.cfg.get('args_format')
|
| 46 |
+
if fmt is None:
|
| 47 |
+
if has_chinese_chars([self.name_for_human, self.name, self.description, self.parameters]):
|
| 48 |
+
fmt = 'The input for this tool should be a Markdown code block.'
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
fmt = 'Enclose the code within triple backticks (`) at the beginning and end of the code.'
|
| 52 |
+
return fmt
|
| 53 |
+
|
| 54 |
+
def observation(self, tool: dict, tool_dict: dict, tool_results, empty_mode: bool=False, readpage: bool=False, max_observation_length: int=None, tokenizer=None):
|
| 55 |
+
print('test')
|
| 56 |
+
assert isinstance(tool_results, str), f"result of python code should be str, instead of {type(tool_results)}. {tool_results}"
|
| 57 |
+
return tool_results
|
| 58 |
+
|
| 59 |
+
@property
|
| 60 |
+
def function(self) -> dict:
|
| 61 |
+
return {
|
| 62 |
+
'name': self.name,
|
| 63 |
+
'description': self.description,
|
| 64 |
+
'parameters': self.parameters,
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def call(self, params, files= None, timeout = 50, **kwargs) -> str:
|
| 68 |
+
try:
|
| 69 |
+
# super().call(params=params, files=files) # copy remote files to work_dir
|
| 70 |
+
try:
|
| 71 |
+
code=params.split('<code>')[1].split('</code')[0]
|
| 72 |
+
print(params)
|
| 73 |
+
if type(params) is str:
|
| 74 |
+
params = json5.loads(params)
|
| 75 |
+
code = params.get('code', '')
|
| 76 |
+
if not code:
|
| 77 |
+
code = params.get('raw', '')
|
| 78 |
+
triple_match = re.search(r'```[^\n]*\n(.+?)```', code, re.DOTALL)
|
| 79 |
+
if triple_match:
|
| 80 |
+
code = triple_match.group(1)
|
| 81 |
+
except Exception:
|
| 82 |
+
code = extract_code(params)
|
| 83 |
+
|
| 84 |
+
if not code.strip():
|
| 85 |
+
return '[Python Interpreter Error]: Empty code.'
|
| 86 |
+
|
| 87 |
+
# Retry mechanism with randomly sampled endpoints
|
| 88 |
+
# code=params
|
| 89 |
+
last_error = None
|
| 90 |
+
for attempt in range(8):
|
| 91 |
+
try:
|
| 92 |
+
# Randomly sample an endpoint for each attempt
|
| 93 |
+
endpoint = random.choice(SANDBOX_FUSION_ENDPOINTS)
|
| 94 |
+
print(f"Attempt {attempt + 1}/5 using endpoint: {endpoint}")
|
| 95 |
+
|
| 96 |
+
code_result = run_code(RunCodeRequest(code=code, language='python', run_timeout=timeout), max_attempts=1, client_timeout=timeout, endpoint=endpoint)
|
| 97 |
+
print("[Python] Code Result", code_result)
|
| 98 |
+
result = []
|
| 99 |
+
if code_result.run_result.stdout:
|
| 100 |
+
result.append(f"stdout:\n{code_result.run_result.stdout}")
|
| 101 |
+
if code_result.run_result.stderr:
|
| 102 |
+
result.append(f"stderr:\n{code_result.run_result.stderr}")
|
| 103 |
+
if code_result.run_result.execution_time >= timeout-1:
|
| 104 |
+
result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.")
|
| 105 |
+
result = '\n'.join(result)
|
| 106 |
+
print('SUCCESS RUNNING TOOL')
|
| 107 |
+
return result if result.strip() else 'Finished execution.'
|
| 108 |
+
|
| 109 |
+
except Timeout as e:
|
| 110 |
+
last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.'
|
| 111 |
+
print(f"Timeout on attempt {attempt + 1}: {last_error}")
|
| 112 |
+
if attempt == 4: # Last attempt
|
| 113 |
+
return last_error
|
| 114 |
+
continue
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
last_error = f'[Python Interpreter Error]: {str(e)} on endpoint {endpoint}'
|
| 118 |
+
print(f"Error on attempt {attempt + 1}: {last_error}")
|
| 119 |
+
if attempt == 4: # Last attempt
|
| 120 |
+
return last_error
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
+
return last_error if last_error else '[Python Interpreter Error]: All attempts failed.'
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
return f"[Python Interpreter Error]: {str(e)}"
|
| 127 |
+
|
| 128 |
+
def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeout: Optional[int] = 30, **kwargs) -> tuple:
|
| 129 |
+
"""Test a specific endpoint directly"""
|
| 130 |
+
try:
|
| 131 |
+
if type(params) is str:
|
| 132 |
+
params = json5.loads(params)
|
| 133 |
+
code = params.get('code', '')
|
| 134 |
+
if not code:
|
| 135 |
+
code = params.get('raw', '')
|
| 136 |
+
triple_match = re.search(r'```[^\n]*\n(.+?)```', code, re.DOTALL)
|
| 137 |
+
if triple_match:
|
| 138 |
+
code = triple_match.group(1)
|
| 139 |
+
except Exception:
|
| 140 |
+
code = extract_code(params)
|
| 141 |
+
|
| 142 |
+
if not code.strip():
|
| 143 |
+
return False, '[Python Interpreter Error]: Empty code.'
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
start_time = time.time()
|
| 147 |
+
code_result = run_code(RunCodeRequest(code=code, language='python', run_timeout=timeout),
|
| 148 |
+
max_attempts=1, client_timeout=timeout, endpoint=endpoint)
|
| 149 |
+
end_time = time.time()
|
| 150 |
+
|
| 151 |
+
result = []
|
| 152 |
+
if code_result.run_result.stdout:
|
| 153 |
+
result.append(f"stdout:\n{code_result.run_result.stdout}")
|
| 154 |
+
if code_result.run_result.stderr:
|
| 155 |
+
result.append(f"stderr:\n{code_result.run_result.stderr}")
|
| 156 |
+
|
| 157 |
+
result = '\n'.join(result)
|
| 158 |
+
execution_time = end_time - start_time
|
| 159 |
+
return True, result if result.strip() else 'Finished execution.', execution_time
|
| 160 |
+
|
| 161 |
+
except Timeout as e:
|
| 162 |
+
return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None
|
| 163 |
+
except Exception as e:
|
| 164 |
+
return False, f'[Python Interpreter Error]: {str(e)}', None
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def test_single_endpoint(endpoint: str, test_cases: List[dict], timeout: int = 30) -> dict:
|
| 169 |
+
"""Test a single endpoint with multiple test cases"""
|
| 170 |
+
executor = PythonInterpreter()
|
| 171 |
+
results = {
|
| 172 |
+
'endpoint': endpoint,
|
| 173 |
+
'status': 'unknown',
|
| 174 |
+
'passed_tests': 0,
|
| 175 |
+
'total_tests': len(test_cases),
|
| 176 |
+
'test_results': [],
|
| 177 |
+
'avg_execution_time': 0,
|
| 178 |
+
'errors': []
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
execution_times = []
|
| 182 |
+
|
| 183 |
+
print(f"\n🧪 Testing endpoint: {endpoint}")
|
| 184 |
+
|
| 185 |
+
for i, test_case in enumerate(test_cases):
|
| 186 |
+
test_name = test_case['name']
|
| 187 |
+
test_code = test_case['code']
|
| 188 |
+
expected_output = test_case.get('expected_output')
|
| 189 |
+
|
| 190 |
+
print(f" ├─ Running test {i+1}/{len(test_cases)}: {test_name}")
|
| 191 |
+
|
| 192 |
+
try:
|
| 193 |
+
success, result, exec_time = executor.call_specific_endpoint(
|
| 194 |
+
{"code": test_code}, endpoint, timeout
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
test_result = {
|
| 198 |
+
'name': test_name,
|
| 199 |
+
'success': success,
|
| 200 |
+
'result': result,
|
| 201 |
+
'execution_time': exec_time,
|
| 202 |
+
'expected_match': False
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
if success and exec_time is not None:
|
| 206 |
+
execution_times.append(exec_time)
|
| 207 |
+
|
| 208 |
+
# Check if output matches expected (if provided)
|
| 209 |
+
if expected_output:
|
| 210 |
+
# Clean up output for comparison
|
| 211 |
+
actual_output = result.replace('stdout:\n', '').strip()
|
| 212 |
+
if expected_output.strip() in actual_output:
|
| 213 |
+
test_result['expected_match'] = True
|
| 214 |
+
results['passed_tests'] += 1
|
| 215 |
+
print(f" │ ✅ PASSED ({exec_time:.2f}s)")
|
| 216 |
+
else:
|
| 217 |
+
print(f" │ ❌ OUTPUT MISMATCH ({exec_time:.2f}s)")
|
| 218 |
+
print(f" │ Expected: {expected_output.strip()}")
|
| 219 |
+
print(f" │ Got: {actual_output}")
|
| 220 |
+
else:
|
| 221 |
+
# No expected output specified, just check if it ran successfully
|
| 222 |
+
test_result['expected_match'] = True
|
| 223 |
+
results['passed_tests'] += 1
|
| 224 |
+
print(f" │ ✅ PASSED ({exec_time:.2f}s)")
|
| 225 |
+
else:
|
| 226 |
+
print(f" │ ❌ FAILED: {result}")
|
| 227 |
+
results['errors'].append(f"{test_name}: {result}")
|
| 228 |
+
|
| 229 |
+
results['test_results'].append(test_result)
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
print(f" │ 💥 EXCEPTION: {str(e)}")
|
| 233 |
+
results['errors'].append(f"{test_name}: Exception - {str(e)}")
|
| 234 |
+
results['test_results'].append({
|
| 235 |
+
'name': test_name,
|
| 236 |
+
'success': False,
|
| 237 |
+
'result': f"Exception: {str(e)}",
|
| 238 |
+
'execution_time': None,
|
| 239 |
+
'expected_match': False
|
| 240 |
+
})
|
| 241 |
+
|
| 242 |
+
# Calculate statistics
|
| 243 |
+
if execution_times:
|
| 244 |
+
results['avg_execution_time'] = sum(execution_times) / len(execution_times)
|
| 245 |
+
|
| 246 |
+
# Determine overall status
|
| 247 |
+
if results['passed_tests'] == results['total_tests']:
|
| 248 |
+
results['status'] = 'healthy'
|
| 249 |
+
print(f" └─ ✅ ALL TESTS PASSED ({results['passed_tests']}/{results['total_tests']})")
|
| 250 |
+
elif results['passed_tests'] > 0:
|
| 251 |
+
results['status'] = 'partial'
|
| 252 |
+
print(f" └─ ⚠️ PARTIAL SUCCESS ({results['passed_tests']}/{results['total_tests']})")
|
| 253 |
+
else:
|
| 254 |
+
results['status'] = 'failed'
|
| 255 |
+
print(f" └─ ❌ ALL TESTS FAILED ({results['passed_tests']}/{results['total_tests']})")
|
| 256 |
+
|
| 257 |
+
return results
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def test_all_endpoints_comprehensive():
|
| 261 |
+
"""Comprehensive test suite for all sandbox fusion endpoints"""
|
| 262 |
+
|
| 263 |
+
# Define comprehensive test cases
|
| 264 |
+
test_cases = [
|
| 265 |
+
{
|
| 266 |
+
'name': 'Basic Math',
|
| 267 |
+
'code': 'print(2 + 2)',
|
| 268 |
+
'expected_output': '4'
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
'name': 'String Operations',
|
| 272 |
+
'code': 'print("Hello, " + "World!")',
|
| 273 |
+
'expected_output': 'Hello, World!'
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
'name': 'List Operations',
|
| 277 |
+
'code': '''
|
| 278 |
+
numbers = [1, 2, 3, 4, 5]
|
| 279 |
+
print(f"Sum: {sum(numbers)}")
|
| 280 |
+
print(f"Length: {len(numbers)}")
|
| 281 |
+
''',
|
| 282 |
+
'expected_output': 'Sum: 15'
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
'name': 'Dictionary Operations',
|
| 286 |
+
'code': '''
|
| 287 |
+
data = {"name": "Test", "value": 42}
|
| 288 |
+
print(f"Name: {data['name']}, Value: {data['value']}")
|
| 289 |
+
''',
|
| 290 |
+
'expected_output': 'Name: Test, Value: 42'
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
'name': 'Loop and Conditionals',
|
| 294 |
+
'code': '''
|
| 295 |
+
result = []
|
| 296 |
+
for i in range(5):
|
| 297 |
+
if i % 2 == 0:
|
| 298 |
+
result.append(i)
|
| 299 |
+
print(f"Even numbers: {result}")
|
| 300 |
+
''',
|
| 301 |
+
'expected_output': 'Even numbers: [0, 2, 4]'
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
'name': 'Function Definition',
|
| 305 |
+
'code': '''
|
| 306 |
+
def factorial(n):
|
| 307 |
+
if n <= 1:
|
| 308 |
+
return 1
|
| 309 |
+
return n * factorial(n-1)
|
| 310 |
+
|
| 311 |
+
print(f"5! = {factorial(5)}")
|
| 312 |
+
''',
|
| 313 |
+
'expected_output': '5! = 120'
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
'name': 'Exception Handling',
|
| 317 |
+
'code': '''
|
| 318 |
+
try:
|
| 319 |
+
result = 10 / 2
|
| 320 |
+
print(f"Division result: {result}")
|
| 321 |
+
except ZeroDivisionError:
|
| 322 |
+
print("Cannot divide by zero")
|
| 323 |
+
''',
|
| 324 |
+
'expected_output': 'Division result: 5.0'
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
'name': 'Import Standard Library',
|
| 328 |
+
'code': '''
|
| 329 |
+
import math
|
| 330 |
+
print(f"Pi: {math.pi:.2f}")
|
| 331 |
+
print(f"Square root of 16: {math.sqrt(16)}")
|
| 332 |
+
''',
|
| 333 |
+
'expected_output': 'Pi: 3.14'
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
'name': 'Complex Calculation',
|
| 337 |
+
'code': '''
|
| 338 |
+
import math
|
| 339 |
+
|
| 340 |
+
# Calculate area of a circle
|
| 341 |
+
radius = 5
|
| 342 |
+
area = math.pi * radius ** 2
|
| 343 |
+
print(f"Area of circle with radius {radius}: {area:.2f}")
|
| 344 |
+
|
| 345 |
+
# Fibonacci sequence
|
| 346 |
+
def fibonacci(n):
|
| 347 |
+
if n <= 1:
|
| 348 |
+
return n
|
| 349 |
+
return fibonacci(n-1) + fibonacci(n-2)
|
| 350 |
+
|
| 351 |
+
fib_10 = fibonacci(10)
|
| 352 |
+
print(f"10th Fibonacci number: {fib_10}")
|
| 353 |
+
''',
|
| 354 |
+
'expected_output': 'Area of circle with radius 5: 78.54'
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
'name': 'Error Handling Test',
|
| 358 |
+
'code': '''
|
| 359 |
+
try:
|
| 360 |
+
undefined_variable
|
| 361 |
+
except NameError as e:
|
| 362 |
+
print("Caught NameError as expected")
|
| 363 |
+
print("Test passed")
|
| 364 |
+
''',
|
| 365 |
+
'expected_output': 'Test passed'
|
| 366 |
+
}
|
| 367 |
+
]
|
| 368 |
+
|
| 369 |
+
print("🚀 Starting comprehensive endpoint testing...")
|
| 370 |
+
print(f"📊 Testing {len(SANDBOX_FUSION_ENDPOINTS)} endpoints with {len(test_cases)} test cases each")
|
| 371 |
+
print("=" * 80)
|
| 372 |
+
|
| 373 |
+
# Test all endpoints concurrently
|
| 374 |
+
all_results = []
|
| 375 |
+
|
| 376 |
+
# Use ThreadPoolExecutor for concurrent testing
|
| 377 |
+
with ThreadPoolExecutor(max_workers=min(len(SANDBOX_FUSION_ENDPOINTS), 8)) as executor:
|
| 378 |
+
future_to_endpoint = {
|
| 379 |
+
executor.submit(test_single_endpoint, endpoint, test_cases): endpoint
|
| 380 |
+
for endpoint in SANDBOX_FUSION_ENDPOINTS
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
for future in as_completed(future_to_endpoint):
|
| 384 |
+
endpoint = future_to_endpoint[future]
|
| 385 |
+
try:
|
| 386 |
+
result = future.result()
|
| 387 |
+
all_results.append(result)
|
| 388 |
+
except Exception as exc:
|
| 389 |
+
print(f'❌ Endpoint {endpoint} generated an exception: {exc}')
|
| 390 |
+
all_results.append({
|
| 391 |
+
'endpoint': endpoint,
|
| 392 |
+
'status': 'failed',
|
| 393 |
+
'passed_tests': 0,
|
| 394 |
+
'total_tests': len(test_cases),
|
| 395 |
+
'test_results': [],
|
| 396 |
+
'avg_execution_time': 0,
|
| 397 |
+
'errors': [f'Exception during testing: {exc}']
|
| 398 |
+
})
|
| 399 |
+
|
| 400 |
+
# Print comprehensive summary
|
| 401 |
+
print("\n" + "=" * 80)
|
| 402 |
+
print("📈 COMPREHENSIVE TEST RESULTS SUMMARY")
|
| 403 |
+
print("=" * 80)
|
| 404 |
+
|
| 405 |
+
healthy_endpoints = [r for r in all_results if r['status'] == 'healthy']
|
| 406 |
+
partial_endpoints = [r for r in all_results if r['status'] == 'partial']
|
| 407 |
+
failed_endpoints = [r for r in all_results if r['status'] == 'failed']
|
| 408 |
+
|
| 409 |
+
print(f"✅ Healthy endpoints: {len(healthy_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
|
| 410 |
+
print(f"⚠️ Partial endpoints: {len(partial_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
|
| 411 |
+
print(f"❌ Failed endpoints: {len(failed_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
|
| 412 |
+
|
| 413 |
+
if healthy_endpoints:
|
| 414 |
+
print(f"\n🎉 HEALTHY ENDPOINTS ({len(healthy_endpoints)}):")
|
| 415 |
+
for result in healthy_endpoints:
|
| 416 |
+
print(f" ✅ {result['endpoint']} (avg: {result['avg_execution_time']:.2f}s)")
|
| 417 |
+
|
| 418 |
+
if partial_endpoints:
|
| 419 |
+
print(f"\n⚠️ PARTIAL ENDPOINTS ({len(partial_endpoints)}):")
|
| 420 |
+
for result in partial_endpoints:
|
| 421 |
+
print(f" ⚠️ {result['endpoint']} ({result['passed_tests']}/{result['total_tests']} tests passed)")
|
| 422 |
+
|
| 423 |
+
if failed_endpoints:
|
| 424 |
+
print(f"\n❌ FAILED ENDPOINTS ({len(failed_endpoints)}):")
|
| 425 |
+
for result in failed_endpoints:
|
| 426 |
+
print(f" ❌ {result['endpoint']} (0/{result['total_tests']} tests passed)")
|
| 427 |
+
if result['errors']:
|
| 428 |
+
for error in result['errors'][:3]: # Show first 3 errors
|
| 429 |
+
print(f" └─ {error}")
|
| 430 |
+
|
| 431 |
+
# Performance statistics
|
| 432 |
+
healthy_times = [r['avg_execution_time'] for r in healthy_endpoints if r['avg_execution_time'] > 0]
|
| 433 |
+
if healthy_times:
|
| 434 |
+
print(f"\n⚡ PERFORMANCE STATISTICS:")
|
| 435 |
+
print(f" Fastest endpoint: {min(healthy_times):.2f}s")
|
| 436 |
+
print(f" Slowest endpoint: {max(healthy_times):.2f}s")
|
| 437 |
+
print(f" Average response time: {sum(healthy_times)/len(healthy_times):.2f}s")
|
| 438 |
+
|
| 439 |
+
# Overall health score
|
| 440 |
+
total_tests = len(SANDBOX_FUSION_ENDPOINTS) * len(test_cases)
|
| 441 |
+
passed_tests = sum(r['passed_tests'] for r in all_results)
|
| 442 |
+
health_score = (passed_tests / total_tests) * 100
|
| 443 |
+
|
| 444 |
+
print(f"\n🏥 OVERALL SYSTEM HEALTH: {health_score:.1f}%")
|
| 445 |
+
print(f" ({passed_tests}/{total_tests} total tests passed)")
|
| 446 |
+
|
| 447 |
+
if health_score >= 90:
|
| 448 |
+
print(" 🟢 Excellent - System is performing well")
|
| 449 |
+
elif health_score >= 70:
|
| 450 |
+
print(" 🟡 Good - Some endpoints may need attention")
|
| 451 |
+
elif health_score >= 50:
|
| 452 |
+
print(" 🟠 Fair - Multiple endpoints having issues")
|
| 453 |
+
else:
|
| 454 |
+
print(" 🔴 Poor - System requires immediate attention")
|
| 455 |
+
|
| 456 |
+
return all_results
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
def _test():
|
| 460 |
+
"""Original test function"""
|
| 461 |
+
# test_code = """import sympy as sp\nX=sp.symbols('X')\npoly_factor = (X**2 - (sp.sqrt(34)+sp.sqrt(14))*X + 2*sp.sqrt(119))*(X**2 - 2*(sp.sqrt(11)+sp.sqrt(6))*X + 4*sp.sqrt(66))\npoly_original = X**4 - sp.sqrt(34)*X**3 - sp.sqrt(14)*X**3 - 2*sp.sqrt(11)*X**3 - 2*sp.sqrt(6)*X**3 + 2*sp.sqrt(374)*X**2 + 2*sp.sqrt(154)*X**2 + 2*sp.sqrt(119)*X**2 + 4*sp.sqrt(66)*X**2 + 4*sp.sqrt(51)*X**2 + 4*sp.sqrt(21)*X**2 - 4*sp.sqrt(1309)*X - 4*sp.sqrt(714)*X - 8*sp.sqrt(561)*X - 8*sp.sqrt(231)*X + 8*sp.sqrt(7854)\nprint('expanded factor matches?', sp.simplify(poly_factor - poly_original) == 0)\nprint('Difference simplified:', sp.simplify(poly_factor - poly_original))\n"""
|
| 462 |
+
# params = {"code": test_code}
|
| 463 |
+
# params = {"code": "x = 1.25\nprint(x*x)"}
|
| 464 |
+
params = "print(1234)"
|
| 465 |
+
# ''
|
| 466 |
+
executor = PythonInterpreter()
|
| 467 |
+
out = executor.call(params)
|
| 468 |
+
from pprint import pprint
|
| 469 |
+
pprint(out)
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
if __name__ == '__main__':
|
| 473 |
+
# Run comprehensive endpoint testing
|
| 474 |
+
test_all_endpoints_comprehensive()
|
| 475 |
+
|
| 476 |
+
print("\n" + "=" * 80)
|
| 477 |
+
print("🔧 Running original test...")
|
| 478 |
+
print("=" * 80)
|
| 479 |
+
|
| 480 |
+
# Also run the original test
|
| 481 |
+
_test()
|
utils/__pycache__/date.cpython-310.pyc
ADDED
|
Binary file (2.29 kB). View file
|
|
|
utils/date.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from datetime import datetime, timedelta, timezone
|
| 3 |
+
|
| 4 |
+
DEFAULT_DATE_FORMAT = "%Y-%m-%d"
|
| 5 |
+
|
| 6 |
+
wdays = {
|
| 7 |
+
"en": [
|
| 8 |
+
"Monday",
|
| 9 |
+
"Tuesday",
|
| 10 |
+
"Wednesday",
|
| 11 |
+
"Thursday",
|
| 12 |
+
"Friday",
|
| 13 |
+
"Saturday",
|
| 14 |
+
"Sunday",
|
| 15 |
+
],
|
| 16 |
+
"zh": [
|
| 17 |
+
"星期一",
|
| 18 |
+
"星期二",
|
| 19 |
+
"星期三",
|
| 20 |
+
"星期四",
|
| 21 |
+
"星期五",
|
| 22 |
+
"星期六",
|
| 23 |
+
"星期日",
|
| 24 |
+
],
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_date_now() -> list[int]:
|
| 29 |
+
beijing_time = datetime.now(timezone.utc) + timedelta(hours=8)
|
| 30 |
+
date = beijing_time.timetuple()
|
| 31 |
+
date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
|
| 32 |
+
return date
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def get_date_rand(before_days: int = 365, after_days: int = 365) -> list[int]:
|
| 36 |
+
random_timedelta = timedelta(days=random.randint(-before_days, after_days))
|
| 37 |
+
rand_time = datetime.now(timezone.utc) + random_timedelta
|
| 38 |
+
date = rand_time.timetuple()
|
| 39 |
+
date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
|
| 40 |
+
return date
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def str2date(date_str, date_format: str = DEFAULT_DATE_FORMAT) -> list[int]:
|
| 44 |
+
time = datetime.strptime(date_str, date_format)
|
| 45 |
+
date = time.timetuple()
|
| 46 |
+
date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
|
| 47 |
+
return date
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def date2str(
|
| 51 |
+
date,
|
| 52 |
+
sep="-",
|
| 53 |
+
with_week: bool = False,
|
| 54 |
+
language: str = "en",
|
| 55 |
+
date_format: str = DEFAULT_DATE_FORMAT,
|
| 56 |
+
) -> str:
|
| 57 |
+
# YYYY-MM-DD
|
| 58 |
+
if isinstance(date, str):
|
| 59 |
+
date = str2date(date, date_format=date_format)
|
| 60 |
+
|
| 61 |
+
date_str = sep.join([f"{date[0]:04d}", f"{date[1]:02d}", f"{date[2]:02d}"])
|
| 62 |
+
if with_week:
|
| 63 |
+
wday = wdays[language][date[3]]
|
| 64 |
+
date_str = f"{date_str} {wday}"
|
| 65 |
+
return date_str
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
rand_date = get_date_now()
|
| 70 |
+
print(date2str(rand_date, with_week=True))
|
| 71 |
+
print(date2str(rand_date, with_week=True, language="zh"))
|
utils/logs.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
import platform
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def check_macos():
|
| 11 |
+
os_name = platform.system()
|
| 12 |
+
if os_name == 'Darwin':
|
| 13 |
+
return True
|
| 14 |
+
return False
|
| 15 |
+
|
| 16 |
+
def setup_logger(level = None, logfile_name: str = 'search-agent'):
|
| 17 |
+
if level is None:
|
| 18 |
+
if os.getenv('SEARCH_AGENT_DEBUG', '0').strip().lower() in ('1', 'true'):
|
| 19 |
+
level = logging.DEBUG
|
| 20 |
+
else:
|
| 21 |
+
level = logging.INFO
|
| 22 |
+
|
| 23 |
+
formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s')
|
| 24 |
+
|
| 25 |
+
handler = logging.StreamHandler()
|
| 26 |
+
# Do not run handler.setLevel(level) so that users can change the level via logger.setLevel later
|
| 27 |
+
handler.setFormatter(formatter)
|
| 28 |
+
|
| 29 |
+
# agent path
|
| 30 |
+
if check_macos():
|
| 31 |
+
agent_path = 'logs'
|
| 32 |
+
else:
|
| 33 |
+
agent_path = os.getenv('AGENT_PATH', 'logs')
|
| 34 |
+
if len(agent_path) > 0:
|
| 35 |
+
agent_path = agent_path + '/logs'
|
| 36 |
+
# check path
|
| 37 |
+
if not os.path.exists(agent_path):
|
| 38 |
+
os.makedirs(agent_path)
|
| 39 |
+
file_handler = logging.FileHandler(f'{agent_path}/{logfile_name}.log')
|
| 40 |
+
file_handler.setFormatter(formatter)
|
| 41 |
+
|
| 42 |
+
_logger = logging.getLogger(logfile_name)
|
| 43 |
+
_logger.setLevel(level)
|
| 44 |
+
_logger.addHandler(handler)
|
| 45 |
+
_logger.addHandler(file_handler)
|
| 46 |
+
return _logger
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
logger = setup_logger(logfile_name='search-agent')
|
| 50 |
+
access_logger = setup_logger(logfile_name='search-agent-access')
|
| 51 |
+
error_logger = setup_logger(logfile_name='search-agent-error')
|
visit.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import signal
|
| 4 |
+
import threading
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 6 |
+
from typing import List, Union
|
| 7 |
+
import requests
|
| 8 |
+
from qwen_agent.tools.base import BaseTool, register_tool
|
| 9 |
+
from prompt import EXTRACTOR_PROMPT
|
| 10 |
+
from openai import OpenAI
|
| 11 |
+
import random
|
| 12 |
+
from urllib.parse import urlparse, unquote
|
| 13 |
+
import time
|
| 14 |
+
import tiktoken
|
| 15 |
+
|
| 16 |
+
VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200))
|
| 17 |
+
WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000))
|
| 18 |
+
|
| 19 |
+
JINA_API_KEYS = os.getenv("JINA_KEY", "")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@staticmethod
|
| 23 |
+
def truncate_to_tokens(text: str, max_tokens: int = 95000) -> str:
|
| 24 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
| 25 |
+
|
| 26 |
+
tokens = encoding.encode(text)
|
| 27 |
+
if len(tokens) <= max_tokens:
|
| 28 |
+
return text
|
| 29 |
+
|
| 30 |
+
truncated_tokens = tokens[:max_tokens]
|
| 31 |
+
return encoding.decode(truncated_tokens)
|
| 32 |
+
|
| 33 |
+
OSS_JSON_FORMAT = """# Response Formats
|
| 34 |
+
## visit_content
|
| 35 |
+
{"properties":{"rational":{"type":"string","description":"Locate the **specific sections/data** directly related to the user's goal within the webpage content"},"evidence":{"type":"string","description":"Identify and extract the **most relevant information** from the content, never miss any important information, output the **full original context** of the content as far as possible, it can be more than three paragraphs.","summary":{"type":"string","description":"Organize into a concise paragraph with logical flow, prioritizing clarity and judge the contribution of the information to the goal."}}}}"""
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@register_tool('visit', allow_overwrite=True)
|
| 39 |
+
class Visit(BaseTool):
|
| 40 |
+
# The `description` tells the agent the functionality of this tool.
|
| 41 |
+
name = 'visit'
|
| 42 |
+
description = 'Visit webpage(s) and return the summary of the content.'
|
| 43 |
+
# The `parameters` tell the agent what input parameters the tool has.
|
| 44 |
+
parameters = {
|
| 45 |
+
"type": "object",
|
| 46 |
+
"properties": {
|
| 47 |
+
"url": {
|
| 48 |
+
"type": ["string", "array"],
|
| 49 |
+
"items": {
|
| 50 |
+
"type": "string"
|
| 51 |
+
},
|
| 52 |
+
"minItems": 1,
|
| 53 |
+
"description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs."
|
| 54 |
+
},
|
| 55 |
+
"goal": {
|
| 56 |
+
"type": "string",
|
| 57 |
+
"description": "The goal of the visit for webpage(s)."
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"required": ["url", "goal"]
|
| 61 |
+
}
|
| 62 |
+
# The `call` method is the main function of the tool.
|
| 63 |
+
def call(self, params: Union[str, dict], **kwargs) -> str:
|
| 64 |
+
try:
|
| 65 |
+
params = json.loads(params)
|
| 66 |
+
url = params["url"]
|
| 67 |
+
goal = params["goal"]
|
| 68 |
+
except:
|
| 69 |
+
return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
|
| 70 |
+
|
| 71 |
+
start_time = time.time()
|
| 72 |
+
|
| 73 |
+
# Create log folder if it doesn't exist
|
| 74 |
+
log_folder = "log"
|
| 75 |
+
os.makedirs(log_folder, exist_ok=True)
|
| 76 |
+
|
| 77 |
+
if isinstance(url, str):
|
| 78 |
+
response = self.readpage_jina(url, goal)
|
| 79 |
+
else:
|
| 80 |
+
response = []
|
| 81 |
+
assert isinstance(url, List)
|
| 82 |
+
start_time = time.time()
|
| 83 |
+
for u in url:
|
| 84 |
+
if time.time() - start_time > 900:
|
| 85 |
+
cur_response = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
|
| 86 |
+
cur_response += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
|
| 87 |
+
cur_response += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
|
| 88 |
+
else:
|
| 89 |
+
try:
|
| 90 |
+
cur_response = self.readpage_jina(u, goal)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
cur_response = f"Error fetching {u}: {str(e)}"
|
| 93 |
+
response.append(cur_response)
|
| 94 |
+
response = "\n=======\n".join(response)
|
| 95 |
+
|
| 96 |
+
print(f'Summary Length {len(response)}; Summary Content {response}')
|
| 97 |
+
return response.strip()
|
| 98 |
+
|
| 99 |
+
def call_server(self, msgs, max_retries=2):
|
| 100 |
+
api_key = os.environ.get("API_KEY")
|
| 101 |
+
url_llm = os.environ.get("API_BASE")
|
| 102 |
+
model_name = "qwen/qwen3-30b-a3b-instruct-2507"
|
| 103 |
+
client = OpenAI(
|
| 104 |
+
api_key=api_key,
|
| 105 |
+
base_url=url_llm,
|
| 106 |
+
)
|
| 107 |
+
for attempt in range(max_retries):
|
| 108 |
+
try:
|
| 109 |
+
chat_response = client.chat.completions.create(
|
| 110 |
+
model=model_name,
|
| 111 |
+
messages=msgs,
|
| 112 |
+
temperature=0.7
|
| 113 |
+
)
|
| 114 |
+
content = chat_response.choices[0].message.content
|
| 115 |
+
if content:
|
| 116 |
+
try:
|
| 117 |
+
json.loads(content)
|
| 118 |
+
except:
|
| 119 |
+
# extract json from string
|
| 120 |
+
left = content.find('{')
|
| 121 |
+
right = content.rfind('}')
|
| 122 |
+
if left != -1 and right != -1 and left <= right:
|
| 123 |
+
content = content[left:right+1]
|
| 124 |
+
return content
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(e)
|
| 127 |
+
if attempt == (max_retries - 1):
|
| 128 |
+
return ""
|
| 129 |
+
continue
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def jina_readpage(self, url: str) -> str:
|
| 133 |
+
"""
|
| 134 |
+
Read webpage content using Jina service.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
url: The URL to read
|
| 138 |
+
goal: The goal/purpose of reading the page
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
str: The webpage content or error message
|
| 142 |
+
"""
|
| 143 |
+
max_retries = 3
|
| 144 |
+
timeout = 50
|
| 145 |
+
|
| 146 |
+
for attempt in range(max_retries):
|
| 147 |
+
headers = {
|
| 148 |
+
"Authorization": f"Bearer {JINA_API_KEYS}",
|
| 149 |
+
}
|
| 150 |
+
print(headers)
|
| 151 |
+
try:
|
| 152 |
+
response = requests.get(
|
| 153 |
+
f"https://r.jina.ai/{url}",
|
| 154 |
+
headers=headers,
|
| 155 |
+
timeout=timeout
|
| 156 |
+
)
|
| 157 |
+
if response.status_code == 200:
|
| 158 |
+
webpage_content = response.text
|
| 159 |
+
return webpage_content
|
| 160 |
+
else:
|
| 161 |
+
print(response.text)
|
| 162 |
+
raise ValueError("jina readpage error")
|
| 163 |
+
except Exception as e:
|
| 164 |
+
time.sleep(0.5)
|
| 165 |
+
if attempt == max_retries - 1:
|
| 166 |
+
return "[visit] Failed to read page."
|
| 167 |
+
|
| 168 |
+
return "[visit] Failed to read page."
|
| 169 |
+
|
| 170 |
+
def html_readpage_jina(self, url: str) -> str:
|
| 171 |
+
max_attempts = 8
|
| 172 |
+
for attempt in range(max_attempts):
|
| 173 |
+
content = self.jina_readpage(url)
|
| 174 |
+
service = "jina"
|
| 175 |
+
print(service)
|
| 176 |
+
if content and not content.startswith("[visit] Failed to read page.") and content != "[visit] Empty content." and not content.startswith("[document_parser]"):
|
| 177 |
+
return content
|
| 178 |
+
return "[visit] Failed to read page."
|
| 179 |
+
|
| 180 |
+
def readpage_jina(self, url: str, goal: str) -> str:
|
| 181 |
+
"""
|
| 182 |
+
Attempt to read webpage content by alternating between jina and aidata services.
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
url: The URL to read
|
| 186 |
+
goal: The goal/purpose of reading the page
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
str: The webpage content or error message
|
| 190 |
+
"""
|
| 191 |
+
|
| 192 |
+
summary_page_func = self.call_server
|
| 193 |
+
max_retries = int(os.getenv('VISIT_SERVER_MAX_RETRIES', 1))
|
| 194 |
+
|
| 195 |
+
content = self.html_readpage_jina(url)
|
| 196 |
+
|
| 197 |
+
if content and not content.startswith("[visit] Failed to read page.") and content != "[visit] Empty content." and not content.startswith("[document_parser]"):
|
| 198 |
+
content = truncate_to_tokens(content, max_tokens=95000)
|
| 199 |
+
messages = [{"role":"user","content": EXTRACTOR_PROMPT.format(webpage_content=content, goal=goal)}]
|
| 200 |
+
parse_retry_times = 0
|
| 201 |
+
raw = summary_page_func(messages, max_retries=max_retries)
|
| 202 |
+
summary_retries = 3
|
| 203 |
+
while len(raw) < 10 and summary_retries >= 0:
|
| 204 |
+
truncate_length = int(0.7 * len(content)) if summary_retries > 0 else 25000
|
| 205 |
+
status_msg = (
|
| 206 |
+
f"[visit] Summary url[{url}] "
|
| 207 |
+
f"attempt {3 - summary_retries + 1}/3, "
|
| 208 |
+
f"content length: {len(content)}, "
|
| 209 |
+
f"truncating to {truncate_length} chars"
|
| 210 |
+
) if summary_retries > 0 else (
|
| 211 |
+
f"[visit] Summary url[{url}] failed after 3 attempts, "
|
| 212 |
+
f"final truncation to 25000 chars"
|
| 213 |
+
)
|
| 214 |
+
print(status_msg)
|
| 215 |
+
content = content[:truncate_length]
|
| 216 |
+
extraction_prompt = EXTRACTOR_PROMPT.format(
|
| 217 |
+
webpage_content=content,
|
| 218 |
+
goal=goal
|
| 219 |
+
)
|
| 220 |
+
messages = [{"role": "user", "content": extraction_prompt}]
|
| 221 |
+
raw = summary_page_func(messages, max_retries=max_retries)
|
| 222 |
+
summary_retries -= 1
|
| 223 |
+
|
| 224 |
+
parse_retry_times = 2
|
| 225 |
+
if isinstance(raw, str):
|
| 226 |
+
raw = raw.replace("```json", "").replace("```", "").strip()
|
| 227 |
+
while parse_retry_times < 3:
|
| 228 |
+
try:
|
| 229 |
+
raw = json.loads(raw)
|
| 230 |
+
break
|
| 231 |
+
except:
|
| 232 |
+
raw = summary_page_func(messages, max_retries=max_retries)
|
| 233 |
+
parse_retry_times += 1
|
| 234 |
+
|
| 235 |
+
if parse_retry_times >= 3:
|
| 236 |
+
useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
|
| 237 |
+
useful_information += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
|
| 238 |
+
useful_information += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
|
| 239 |
+
else:
|
| 240 |
+
useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
|
| 241 |
+
useful_information += "Evidence in page: \n" + str(raw["evidence"]) + "\n\n"
|
| 242 |
+
useful_information += "Summary: \n" + str(raw["summary"]) + "\n\n"
|
| 243 |
+
|
| 244 |
+
if len(useful_information) < 10 and summary_retries < 0:
|
| 245 |
+
print("[visit] Could not generate valid summary after maximum retries")
|
| 246 |
+
useful_information = "[visit] Failed to read page"
|
| 247 |
+
|
| 248 |
+
return useful_information
|
| 249 |
+
|
| 250 |
+
# If no valid content was obtained after all retries
|
| 251 |
+
else:
|
| 252 |
+
useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
|
| 253 |
+
useful_information += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
|
| 254 |
+
useful_information += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
|
| 255 |
+
return useful_information
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
if __name__ == "__main__":
|
| 259 |
+
a = Visit()
|
| 260 |
+
print(a.call('{"url": ["https://2025.aclweb.org/"], "goal": "Find the important dates page and locate the Industry Track paper submission deadline"}'))
|