0123zzw666 commited on
Commit
97c8e77
·
1 Parent(s): 3402b69

rushed demo

Browse files
.gitingore ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # coding=utf-8
agents/search_agent.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from typing import Dict, Iterator, List, Literal, Union, Optional
3
+
4
+ from qwen_agent.agents import Assistant
5
+ from qwen_agent.llm import BaseChatModel
6
+ from qwen_agent.llm.schema import USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE,SYSTEM,ROLE
7
+ from qwen_agent.tools import BaseTool
8
+ from qwen_agent.log import logger
9
+
10
+ class SearchAgent(Assistant):
11
+
12
+ def __init__(self,
13
+ function_list: Optional[List[Union[str, Dict, BaseTool]]] = None,
14
+ llm: Optional[Union[Dict, BaseChatModel]] = None,
15
+ system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE,
16
+ name: Optional[str] = None,
17
+ description: Optional[str] = None,
18
+ files: Optional[List[str]] = None,
19
+ rag_cfg: Optional[Dict] = None,
20
+ extra: Optional[Dict] = {},
21
+ custom_user_prompt: Optional[str] = '',
22
+ make_system_prompt = None,
23
+ addtional_agent = None):
24
+ super().__init__(function_list=function_list,
25
+ llm=llm,
26
+ system_message=system_message,
27
+ name=name,
28
+ description=description,
29
+ files=files,
30
+ rag_cfg=rag_cfg)
31
+ self.extra = extra
32
+ self.custom_user_prompt = custom_user_prompt
33
+ self.make_system_prompt = make_system_prompt
34
+ self.addtional_agent = addtional_agent
35
+
36
+ def insert_in_custom_user_prompt(self, messages: List[Message]) -> List[Message]:
37
+ for message in messages:
38
+ if message.role == USER:
39
+ message.content[0].text = self.custom_user_prompt + message.content[0].text
40
+ break
41
+ return messages
42
+ def _run(self,
43
+ messages: List[Message],
44
+ lang: Literal['en', 'zh'] = 'zh',
45
+ knowledge: str = '',
46
+ **kwargs) -> Iterator[List[Message]]:
47
+ messages = self._prepend_knowledge_prompt(messages=messages, lang=lang, knowledge=knowledge, **kwargs)
48
+ messages = copy.deepcopy(messages)
49
+ self.insert_in_custom_user_prompt(messages=messages)
50
+ if self.make_system_prompt:
51
+ if not messages or messages[0][ROLE] != SYSTEM:
52
+ messages.insert(0, Message(role=SYSTEM, content=self.make_system_prompt()))
53
+ for msg in messages:
54
+ if isinstance(msg.content, list):
55
+ assert len(msg.content) == 1
56
+ msg.content = msg.content[0].text
57
+ if msg.role == USER:
58
+ msg.content = msg.content.strip()
59
+
60
+ reasoning = self.extra.get('reasoning', True)
61
+ num_llm_calls_available = self.extra.get('max_llm_calls', 20)
62
+ response = []
63
+ while True and num_llm_calls_available > 0:
64
+ num_llm_calls_available -= 1
65
+
66
+ extra_generate_cfg = {'lang': lang}
67
+ if kwargs.get('seed') is not None:
68
+ extra_generate_cfg['seed'] = kwargs['seed']
69
+ output_stream = self._call_llm(messages=messages,
70
+ functions=[func.function for func in self.function_map.values()],
71
+ extra_generate_cfg=extra_generate_cfg)
72
+ output: List[Message] = []
73
+ for output in output_stream:
74
+ if output:
75
+ first_msg = output[0]
76
+ if reasoning and isinstance(first_msg.content, str):
77
+ first_msg.content = "<think>\n" + first_msg.content.strip()
78
+ yield response + output
79
+ if output:
80
+ response.extend(output)
81
+ messages.extend(output)
82
+ used_any_tool = False
83
+
84
+ for out in output:
85
+ print("out:\n",out)
86
+ print("output:\n",output)
87
+ # exit()
88
+ use_tool, tool_name, tool_args, _ = self._detect_tool(out)
89
+ logger.info(f"{self.name} use_tool: {use_tool}, tool_name: {tool_name}, tool_args: {tool_args}")
90
+ if use_tool:
91
+ tool_result = self._call_tool(tool_name, tool_args, messages=messages, **kwargs)
92
+ fn_msg = Message(
93
+ role=FUNCTION,
94
+ name=tool_name,
95
+ content=tool_result,
96
+ )
97
+ messages.append(fn_msg)
98
+ response.append(fn_msg)
99
+ yield response
100
+ used_any_tool = True
101
+ if not used_any_tool:
102
+ logger.info(f'{self.name} not used any tool, skip out')
103
+ break
104
+ yield response
105
+
106
+ if self.addtional_agent:
107
+ new_messages = copy.deepcopy(messages)
108
+ new_messages.pop()
109
+ new_response = copy.deepcopy(response)
110
+ new_response.pop()
111
+ if new_messages[0][ROLE] == SYSTEM:
112
+ # Add the system instruction to the agent
113
+ new_messages[0].content = self.addtional_agent.make_system_prompt()
114
+ for rsp in self.addtional_agent._run(messages=new_messages, **kwargs):
115
+ yield new_response + rsp
116
+
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """An image generation agent implemented by assistant with qwq"""
2
+
3
+ import os
4
+
5
+ from agents.search_agent import SearchAgent
6
+ from llm.oai import TextChatAtOAI
7
+ from gui.web_ui import WebUI
8
+ import datetime
9
+
10
+ DR_MODEL_NAME = os.getenv("DR_MODEL_NAME", "")
11
+ DR_MODEL_SERVER = os.getenv("DR_MODEL_SERVER", "")
12
+ DR_MODEL_API_KEY = os.getenv("DR_MODEL_API_KEY", "")
13
+
14
+ ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
15
+
16
+ def today_date():
17
+ return datetime.date.today().strftime("%Y-%m-%d")
18
+
19
+ def init_dev_search_agent_service(name: str = 'SEARCH', port: int = 8002, desc: str = '初版', reasoning: bool = True, max_llm_calls: int = 20, tools = ['search', 'visit'], addtional_agent = None):
20
+ llm_cfg = TextChatAtOAI({
21
+ 'model': DR_MODEL_NAME,
22
+ 'model_type': 'oai',
23
+ 'model_server': DR_MODEL_SERVER,
24
+ 'api_key': DR_MODEL_API_KEY,
25
+ 'generate_cfg': {
26
+ 'fncall_prompt_type': 'nous',
27
+ 'temperature': 0.85,
28
+ 'top_p': 0.95,
29
+ 'top_k': -1,
30
+ 'presence_penalty': 1.1,
31
+ 'max_tokens': 32768,
32
+ 'stream_options': {
33
+ 'include_usage': True,
34
+ },
35
+ 'timeout': 3000
36
+ },
37
+ })
38
+ def make_system_prompt():
39
+ system_message = "You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.\n\n"
40
+ return system_message
41
+
42
+ bot = SearchAgent(
43
+ llm=llm_cfg,
44
+ function_list=tools,
45
+ system_message="",
46
+ name=f'Tongyi DeepResearch',
47
+ description=f"I am Tongyi DeepResearch, a leading open-source Deep Research Agent, welcome to try!",
48
+ extra={
49
+ 'reasoning': reasoning,
50
+ 'max_llm_calls': max_llm_calls,
51
+ },
52
+ addtional_agent = addtional_agent,
53
+ make_system_prompt = make_system_prompt,
54
+ custom_user_prompt=''''''
55
+ )
56
+
57
+ return bot
58
+
59
+
60
+
61
+ def app_gui():
62
+ agents = []
63
+ for name, port, desc, reasoning, max_llm_calls, tools in [
64
+ ('Tongyi DeepResearch', 8004, '...', True, 50, ['search', 'visit', 'google_scholar', 'PythonInterpreter']),
65
+ ]:
66
+ search_bot_dev = init_dev_search_agent_service(
67
+ name=name,
68
+ port=port,
69
+ desc=desc,
70
+ reasoning=reasoning,
71
+ max_llm_calls=max_llm_calls,
72
+ tools=tools,
73
+ )
74
+ agents.append(search_bot_dev)
75
+
76
+
77
+ chatbot_config = {
78
+ 'prompt.suggestions': [
79
+ '中国国足的一场比赛,国足首先失球,由一名宿姓球员扳平了。后来还发生了点球。比分最终是平均。有可能是哪几场比赛',
80
+ 'When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?',
81
+ 'On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?',
82
+ '有一位华语娱乐圈的重要人物,与其兄弟共同创作并主演了一部在中国南方沿海城市上映的喜剧电影,这部电影成为该类型的开山之作。与此同时,这位人物还凭借两首极具影响力的本地方言歌曲在音乐领域取得突破,极大推动了本地方言流行音乐的发展。请问,这一切发生在20世纪70年代的哪一年?',
83
+ '有一首欧洲国家的国歌自20世纪50年代初被正式采用,并只选用了其中的一部分歌词。同一年,一位中国文艺界的重要人物创作了一部以民间传说为基础的戏曲作品,并在当年担任了多个文化领域的重要职务。请问这位中国文艺界人物是谁?',
84
+ '有一部英国文坛上极具影响力的长篇诗歌,由一位16世纪末的著名诗人创作,这位诗人在16世纪90年代末于伦敦去世后,被安葬在一个象征英国文学传统的著名场所,与多位文学巨匠为邻。请问,这位诗人安息之地是哪里?',
85
+ '出一份三天两夜的端午北京旅游攻略',
86
+ '对比下最新小米汽车和保时捷性能参数,然后根据最终的结果分析下性价比最高的车型,并给出杭州的供应商',
87
+ '量子计算突破对现有加密体系的威胁',
88
+ '人工智能伦理框架的全球差异',
89
+ '老龄化社会对全球养老金体系的长期冲击',
90
+ '全球碳中和目标下的能源转型路径差异',
91
+ '塑料污染在海洋食物链中的累积效应',
92
+ 'AI生成内容(如AI绘画)对传统艺术价值的重构'
93
+ ],
94
+ 'user.name': 'User',
95
+ 'verbose': True
96
+ }
97
+ messages = {'role': 'user', 'content': '介绍下你自己'}
98
+ WebUI(
99
+ agent=agents,
100
+ chatbot_config=chatbot_config,
101
+ ).run(
102
+ message=messages,
103
+ share=True,
104
+ server_name='127.0.0.1',
105
+ server_port=7860,
106
+ concurrency_limit=20,
107
+ enable_mention=False,
108
+ )
109
+
110
+
111
+ if __name__ == '__main__':
112
+ app_gui()
gui/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # coding=utf-8
gui/html_decorate.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from markdown_it import MarkdownIt
2
+ import html
3
+ import re
4
+
5
+ def get_style_css(style_name):
6
+ """
7
+ 根据选择的样式名称获取对应的CSS样式文件
8
+
9
+ Args:
10
+ style_name (str): 样式名称,可选值为"Default"、"MBE"、"Glassmorphism"、"Apple"
11
+
12
+ Returns:
13
+ str: CSS样式内容
14
+ """
15
+
16
+ if style_name == "Default":
17
+ return open("assets/demo.css", "r").read()
18
+ elif style_name == "1":
19
+ return open("assets/demo.1.css", "r").read()
20
+ elif style_name == "MBE":
21
+ return open("assets/demo_mbe.css", "r").read()
22
+ elif style_name == "Glassmorphism":
23
+ return open("assets/demo_glassmorphism.css", "r").read()
24
+ elif style_name == "Apple":
25
+ return open("assets/demo_apple.css", "r").read()
26
+ elif style_name == "Paper":
27
+ return open("assets/demo_paper.css", "r").read()
28
+ else:
29
+ return open("assets/demo.css", "r").read()
30
+
31
+ def decorate_writing(writing_result, style="Default"):
32
+ if not writing_result:
33
+ return writing_result
34
+
35
+ cite_pattern = r'<qwen:cite\s+url=["\']([^"\']+)["\'](?:\s+[^>]*)?>(.*?)</qwen:cite>'
36
+ takeaway_pattern = r'<qwen:takeaway(?:\s+class=["\'](?P<class>[^"\']+)["\'])?>(?P<content>[^<]*)</qwen:takeaway>'
37
+ citation_map = {}
38
+
39
+ def replace_cite(match):
40
+ nonlocal citation_map
41
+ urls = match.group(1).split(',')
42
+ content = match.group(2)
43
+ citation_html = []
44
+
45
+ for url in urls:
46
+ if url not in citation_map:
47
+ citation_map[url] = len(citation_map) + 1
48
+ current_index = citation_map[url]
49
+ citation_html.append((f'<a href="{url}" title="点击查看引用来源: {url}">{current_index}</a>', current_index))
50
+
51
+ citation_html = sorted(citation_html, key=lambda x: x[1])
52
+ citation_html = ', '.join([x[0] for x in citation_html])
53
+
54
+ cite_html = f'{content}<sup class="citation">[{citation_html}]</sup>'
55
+ return cite_html
56
+
57
+ decorated_result = re.sub(cite_pattern, replace_cite, writing_result, flags=re.S)
58
+
59
+ def replace_takeaway(match):
60
+ class_attr = match.group('class')
61
+ content = match.group('content')
62
+
63
+ if class_attr:
64
+ return f'<div class="takeaway {class_attr}">{content}</div>'
65
+ else:
66
+ return f'<div class="takeaway">{content}</div>'
67
+
68
+ decorated_result = re.sub(takeaway_pattern, replace_takeaway, decorated_result, flags=re.S)
69
+
70
+ mermaid_pattern = r'```mermaid\n(.*?)\n```'
71
+ def decorate_mermaid(match):
72
+ return f"""
73
+ <pre class="mermaid">
74
+ {match.group(1)}
75
+ </pre>
76
+ """
77
+ decorated_result = re.sub(mermaid_pattern, decorate_mermaid, decorated_result, flags=re.S)
78
+
79
+ echarts_pattern = r'```echarts\n(.*?)\n```'
80
+ echarts_index = 0
81
+
82
+ def replace_echarts(match):
83
+ """
84
+ 将echarts代码块转换为HTML和JavaScript
85
+
86
+ Args:
87
+ match: 正则表达式匹配对象
88
+
89
+ Returns:
90
+ str: 包含HTML和JavaScript的echarts图表代码
91
+ """
92
+ nonlocal echarts_index
93
+ echarts_code = match.group(1)
94
+ echarts_id = f'echarts-container-{echarts_index}'
95
+ echarts_index += 1
96
+
97
+ replace_code = f"""
98
+ <div class="echarts-container loading" id="{echarts_id}">Echarts Rendering...</div>
99
+ <script>
100
+ var chartDom = document.getElementById('{echarts_id}');
101
+ var myChart = echarts.init(chartDom);
102
+ var option;
103
+ option = {echarts_code};
104
+ myChart.setOption(option);
105
+ chartDom.classList.remove('loading');
106
+ </script>
107
+ """
108
+ return replace_code
109
+
110
+ decorated_result = re.sub(echarts_pattern, replace_echarts, decorated_result, flags=re.S)
111
+
112
+ md = MarkdownIt()
113
+ body = md.render(decorated_result)
114
+
115
+ selected_css = get_style_css(style)
116
+
117
+ html_content = """
118
+ <html>
119
+ <head>
120
+ <!-- KaTeX for mathematical formulas -->
121
+ <link rel="stylesheet" href="https://s4.zstatic.net/npm/[email protected]/dist/katex.min.css">
122
+ <script src="https://s4.zstatic.net/npm/[email protected]/dist/katex.min.js"></script>
123
+ <script src="https://s4.zstatic.net/npm/[email protected]/dist/contrib/auto-render.min.js"></script>
124
+ <script src="https://s4.zstatic.net/npm/[email protected]/dist/echarts.min.js"></script>
125
+ <style>
126
+ """ + selected_css + """
127
+ </style>
128
+ </head>
129
+ <body>
130
+ <div class="generated-content">
131
+ """ + body + """</div>
132
+ <script type="module">
133
+ import mermaid from 'https://unpkg.com/[email protected]/dist/mermaid.esm.min.mjs';
134
+ </script>
135
+ <script>
136
+ document.addEventListener('DOMContentLoaded', function() {
137
+ renderMathInElement(document.body);
138
+ });
139
+ </script>
140
+ </body>
141
+ </html>
142
+ """
143
+ # 转义HTML内容以便在iframe中安全使用
144
+ # 这是必要的,因为HTML内容包含引号和其他特殊字符
145
+ escaped_html_content = html.escape(html_content)
146
+
147
+ # 定义iframe的样式属性
148
+ iframe_style = "width: 100%; height: 1024px; transform-origin: top left; border-color: lightgrey; border-width: 1px; border-radius: 10px;"
149
+
150
+ # 创建最终的iframe HTML,通过srcdoc属性注入转义后的HTML内容
151
+ # 设置loading="eager"和importance="high"以优先加载
152
+ # pointer-events="none"防止用户与iframe内容交互
153
+ iframe_content = f'<iframe id="ai-ui-iframe" loading="eager" importance="high" pointer-events="none" style="{iframe_style}" srcdoc="{escaped_html_content}"></iframe>'
154
+
155
+ # 返回最终的iframe HTML内容
156
+ iframe_content = re.sub(r'\n\s*\n', '\n', iframe_content)
157
+ return iframe_content
gui/web_ui.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pprint
3
+ import re
4
+ from typing import List, Optional, Union
5
+
6
+ from qwen_agent import Agent, MultiAgentHub
7
+ from qwen_agent.agents.user_agent import PENDING_USER_INPUT
8
+ from qwen_agent.gui.gradio_utils import format_cover_html
9
+ from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chatbot, get_avatar_image
10
+ from qwen_agent.llm.schema import AUDIO, CONTENT, FILE, IMAGE, NAME, ROLE, USER, VIDEO, Message
11
+ from qwen_agent.log import logger
12
+ from qwen_agent.utils.utils import print_traceback
13
+
14
+
15
+ class WebUI:
16
+ """A Common chatbot application for agent."""
17
+
18
+ def __init__(self, agent: Union[Agent, MultiAgentHub, List[Agent]], chatbot_config: Optional[dict] = None):
19
+ """
20
+ Initialization the chatbot.
21
+
22
+ Args:
23
+ agent: The agent or a list of agents,
24
+ supports various types of agents such as Assistant, GroupChat, Router, etc.
25
+ chatbot_config: The chatbot configuration.
26
+ Set the configuration as {'user.name': '', 'user.avatar': '', 'agent.avatar': '', 'input.placeholder': '', 'prompt.suggestions': []}.
27
+ """
28
+ chatbot_config = chatbot_config or {}
29
+
30
+ if isinstance(agent, MultiAgentHub):
31
+ self.agent_list = [agent for agent in agent.nonuser_agents]
32
+ self.agent_hub = agent
33
+ elif isinstance(agent, list):
34
+ self.agent_list = agent
35
+ self.agent_hub = None
36
+ else:
37
+ self.agent_list = [agent]
38
+ self.agent_hub = None
39
+
40
+ user_name = chatbot_config.get('user.name', 'user')
41
+ self.user_config = {
42
+ 'name': user_name,
43
+ 'avatar': chatbot_config.get(
44
+ 'user.avatar',
45
+ get_avatar_image(user_name),
46
+ ),
47
+ }
48
+
49
+ self.agent_config_list = [{
50
+ 'name': agent.name,
51
+ 'avatar': chatbot_config.get(
52
+ 'agent.avatar',
53
+ get_avatar_image(agent.name),
54
+ ),
55
+ 'description': agent.description or "I'm a helpful assistant.",
56
+ } for agent in self.agent_list]
57
+
58
+ self.input_placeholder = chatbot_config.get('input.placeholder', '请输入需要分析的问题,尽管交给我吧~')
59
+ self.prompt_suggestions = chatbot_config.get('prompt.suggestions', [])
60
+ self.verbose = chatbot_config.get('verbose', False)
61
+
62
+ """
63
+ Run the chatbot.
64
+
65
+ Args:
66
+ messages: The chat history.
67
+ """
68
+
69
+ def run(self,
70
+ messages: List[Message] = None,
71
+ share: bool = False,
72
+ server_name: str = None,
73
+ server_port: int = None,
74
+ concurrency_limit: int = 10,
75
+ enable_mention: bool = False,
76
+ **kwargs):
77
+ self.run_kwargs = kwargs
78
+
79
+ from qwen_agent.gui.gradio_dep import gr, mgr, ms
80
+
81
+ customTheme = gr.themes.Default(
82
+ primary_hue=gr.themes.utils.colors.blue,
83
+ radius_size=gr.themes.utils.sizes.radius_none,
84
+ )
85
+
86
+ if messages is not None:
87
+ logger.info('web-ui messages.size %s' % len(messages))
88
+
89
+ with gr.Blocks(
90
+ css=os.path.join(os.path.dirname(__file__), 'assets/appBot.css'),
91
+ theme=customTheme,
92
+ ) as demo:
93
+ history = gr.State([])
94
+ with ms.Application():
95
+ with gr.Row(elem_classes='container'):
96
+ with gr.Column(scale=4):
97
+ chatbot = mgr.Chatbot(value=convert_history_to_chatbot(messages=messages),
98
+ avatar_images=[
99
+ self.user_config,
100
+ self.agent_config_list,
101
+ ],
102
+ height=850,
103
+ avatar_image_width=80,
104
+ flushing=False,
105
+ show_copy_button=True,
106
+ latex_delimiters=[{
107
+ 'left': '\\(',
108
+ 'right': '\\)',
109
+ 'display': True
110
+ }, {
111
+ 'left': '\\begin{equation}',
112
+ 'right': '\\end{equation}',
113
+ 'display': True
114
+ }, {
115
+ 'left': '\\begin{align}',
116
+ 'right': '\\end{align}',
117
+ 'display': True
118
+ }, {
119
+ 'left': '\\begin{alignat}',
120
+ 'right': '\\end{alignat}',
121
+ 'display': True
122
+ }, {
123
+ 'left': '\\begin{gather}',
124
+ 'right': '\\end{gather}',
125
+ 'display': True
126
+ }, {
127
+ 'left': '\\begin{CD}',
128
+ 'right': '\\end{CD}',
129
+ 'display': True
130
+ }, {
131
+ 'left': '\\[',
132
+ 'right': '\\]',
133
+ 'display': True
134
+ }])
135
+
136
+ input = mgr.MultimodalInput(
137
+ placeholder=self.input_placeholder,
138
+ show_copy_button=True,
139
+ )
140
+
141
+ with gr.Column(scale=1):
142
+ if len(self.agent_list) > 1:
143
+ agent_selector = gr.Dropdown(
144
+ [(agent.name, i) for i, agent in enumerate(self.agent_list)],
145
+ label='Agents',
146
+ info='请选择一个 Agent',
147
+ value=0,
148
+ interactive=True,
149
+ )
150
+
151
+ agent_info_block = self._create_agent_info_block()
152
+
153
+ agent_plugins_block = self._create_agent_plugins_block()
154
+
155
+ if self.prompt_suggestions:
156
+ gr.Examples(
157
+ label='推荐对话',
158
+ examples=self.prompt_suggestions,
159
+ inputs=[input],
160
+ )
161
+
162
+ if len(self.agent_list) > 1:
163
+ agent_selector.change(
164
+ fn=self.change_agent,
165
+ inputs=[agent_selector],
166
+ outputs=[agent_selector, agent_info_block, agent_plugins_block],
167
+ queue=False,
168
+ )
169
+
170
+ input.change(
171
+ fn=self.change_text,
172
+ inputs=[input],
173
+ )
174
+ input_promise = input.submit(
175
+ fn=self.add_text,
176
+ inputs=[input, chatbot, history],
177
+ outputs=[input, chatbot, history],
178
+ queue=True,
179
+ concurrency_limit=concurrency_limit,
180
+ )
181
+
182
+ if len(self.agent_list) > 1:
183
+ if enable_mention:
184
+ input_promise = input_promise.then(
185
+ self.add_mention,
186
+ [chatbot, agent_selector],
187
+ [chatbot, agent_selector],
188
+ ).then(
189
+ self.agent_run,
190
+ [chatbot, history, agent_selector],
191
+ [chatbot, history, agent_selector],
192
+ )
193
+ else:
194
+ input_promise = input_promise.then(
195
+ self.agent_run,
196
+ [chatbot, history, agent_selector],
197
+ [chatbot, history, agent_selector],
198
+ )
199
+ else:
200
+ input_promise = input_promise.then(
201
+ self.agent_run,
202
+ [chatbot, history],
203
+ [chatbot, history],
204
+ )
205
+
206
+ input_promise.then(self.flushed, None, [input])
207
+
208
+ demo.load(None)
209
+
210
+ demo.queue(default_concurrency_limit=concurrency_limit).launch(share=share,
211
+ server_name=server_name,
212
+ server_port=server_port)
213
+
214
+ def change_agent(self, agent_selector):
215
+ yield agent_selector, self._create_agent_info_block(agent_selector), self._create_agent_plugins_block(
216
+ agent_selector)
217
+
218
+ def change_text(self, _input):
219
+ logger.info(f'agent_run change_text input:{_input.text}')
220
+
221
+ def add_text(self, _input, _chatbot, _history):
222
+ _history.append({
223
+ ROLE: USER,
224
+ CONTENT: [{
225
+ 'text': _input.text
226
+ }],
227
+ })
228
+
229
+ if self.user_config[NAME]:
230
+ _history[-1][NAME] = self.user_config[NAME]
231
+
232
+ logger.info('agent_run add_text input:\n' + pprint.pformat(_history, indent=2))
233
+
234
+ if _input.files:
235
+ for file in _input.files:
236
+ if file.mime_type.startswith('image/'):
237
+ _history[-1][CONTENT].append({IMAGE: 'file://' + file.path})
238
+ elif file.mime_type.startswith('audio/'):
239
+ _history[-1][CONTENT].append({AUDIO: 'file://' + file.path})
240
+ elif file.mime_type.startswith('video/'):
241
+ _history[-1][CONTENT].append({VIDEO: 'file://' + file.path})
242
+ else:
243
+ _history[-1][CONTENT].append({FILE: file.path})
244
+
245
+ _chatbot.append([_input, None])
246
+
247
+ from qwen_agent.gui.gradio_dep import gr
248
+ yield gr.update(interactive=False, value=''), _chatbot, _history
249
+
250
+ def add_mention(self, _chatbot, _agent_selector):
251
+ if len(self.agent_list) == 1:
252
+ yield _chatbot, _agent_selector
253
+
254
+ query = _chatbot[-1][0].text
255
+ match = re.search(r'@\w+\b', query)
256
+ if match:
257
+ _agent_selector = self._get_agent_index_by_name(match.group()[1:])
258
+
259
+ agent_name = self.agent_list[_agent_selector].name
260
+
261
+ if ('@' + agent_name) not in query and self.agent_hub is None:
262
+ _chatbot[-1][0].text = '@' + agent_name + ' ' + query
263
+
264
+ yield _chatbot, _agent_selector
265
+
266
+ def agent_run(self, _chatbot, _history, _agent_selector=None):
267
+ # TODO 仅保持任务的单论对话
268
+ if self.verbose:
269
+ logger.info('agent_run input[all]:\n' + pprint.pformat(_history, indent=2))
270
+ _history = _history[-1:]
271
+ if self.verbose:
272
+ logger.info('agent_run input[new]:\n' + pprint.pformat(_history, indent=2))
273
+
274
+ if len(_history) == 0:
275
+ if _agent_selector is not None:
276
+ yield _chatbot, _history, _agent_selector
277
+ else:
278
+ yield _chatbot, _history
279
+ logger.info('agent_run input with empty input, do nothing.')
280
+ return
281
+
282
+ num_input_bubbles = len(_chatbot) - 1
283
+ num_output_bubbles = 1
284
+ _chatbot[-1][1] = [None for _ in range(len(self.agent_list))]
285
+
286
+ logger.info('agent_run input:_agent_selector %s' % _agent_selector)
287
+ agent_runner = self.agent_list[_agent_selector or 0]
288
+ if self.agent_hub:
289
+ agent_runner = self.agent_hub
290
+ agent_runner.function_map
291
+
292
+ responses = []
293
+ for responses in agent_runner.run(_history, **self.run_kwargs):
294
+ if not responses:
295
+ continue
296
+ if responses[-1][CONTENT] == PENDING_USER_INPUT:
297
+ logger.info('Interrupted. Waiting for user input!')
298
+ break
299
+
300
+ display_responses = convert_fncall_to_text(responses)
301
+ if not display_responses:
302
+ continue
303
+ if display_responses[-1][CONTENT] is None:
304
+ continue
305
+
306
+ while len(display_responses) > num_output_bubbles:
307
+ # Create a new chat bubble
308
+ _chatbot.append([None, None])
309
+ _chatbot[-1][1] = [None for _ in range(len(self.agent_list))]
310
+ num_output_bubbles += 1
311
+
312
+ assert num_output_bubbles == len(display_responses)
313
+ assert num_input_bubbles + num_output_bubbles == len(_chatbot)
314
+
315
+ for i, rsp in enumerate(display_responses):
316
+ agent_index = self._get_agent_index_by_name(rsp[NAME])
317
+ _chatbot[num_input_bubbles + i][1][agent_index] = rsp[CONTENT]
318
+
319
+ if len(self.agent_list) > 1:
320
+ _agent_selector = agent_index
321
+
322
+ if _agent_selector is not None:
323
+ yield _chatbot, _history, _agent_selector
324
+ else:
325
+ yield _chatbot, _history
326
+
327
+ if responses:
328
+ _history.extend([res for res in responses if res[CONTENT] != PENDING_USER_INPUT])
329
+
330
+ if _agent_selector is not None:
331
+ yield _chatbot, _history, _agent_selector
332
+ else:
333
+ yield _chatbot, _history
334
+
335
+ if self.verbose:
336
+ logger.info('agent_run response:\n' + pprint.pformat(responses, indent=2))
337
+
338
+ def flushed(self):
339
+ logger.info('agent_run flushed')
340
+ from qwen_agent.gui.gradio_dep import gr
341
+ return gr.update(interactive=True, value='')
342
+
343
+ def _get_agent_index_by_name(self, agent_name):
344
+ if agent_name is None:
345
+ return 0
346
+
347
+ try:
348
+ agent_name = agent_name.strip()
349
+ for i, agent in enumerate(self.agent_list):
350
+ if agent.name == agent_name:
351
+ return i
352
+ return 0
353
+ except Exception:
354
+ print_traceback()
355
+ return 0
356
+
357
+ def _create_agent_info_block(self, agent_index=0):
358
+ from qwen_agent.gui.gradio_dep import gr
359
+
360
+ agent_config_interactive = self.agent_config_list[agent_index]
361
+
362
+ return gr.HTML(
363
+ format_cover_html(
364
+ bot_name=agent_config_interactive['name'],
365
+ bot_description=agent_config_interactive['description'],
366
+ bot_avatar=agent_config_interactive['avatar'],
367
+ ))
368
+
369
+ def _create_agent_plugins_block(self, agent_index=0):
370
+ from qwen_agent.gui.gradio_dep import gr
371
+
372
+ agent_interactive = self.agent_list[agent_index]
373
+
374
+ if agent_interactive.function_map:
375
+ capabilities = [key for key in agent_interactive.function_map.keys()]
376
+ return gr.CheckboxGroup(
377
+ label='插件',
378
+ value=capabilities,
379
+ choices=capabilities,
380
+ interactive=False,
381
+ )
382
+
383
+ else:
384
+ return gr.CheckboxGroup(
385
+ label='插件',
386
+ value=[],
387
+ choices=[],
388
+ interactive=False,
389
+ )
llm/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # coding=utf-8
llm/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
llm/__pycache__/oai.cpython-310.pyc ADDED
Binary file (9.96 kB). View file
 
llm/__pycache__/qwen_dashscope.cpython-310.pyc ADDED
Binary file (4.6 kB). View file
 
llm/oai.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import logging
4
+ import os
5
+ from http import HTTPStatus
6
+ from pprint import pformat
7
+ from typing import Dict, Iterator, List, Optional, Literal, Union
8
+
9
+ import openai
10
+
11
+ if openai.__version__.startswith('0.'):
12
+ from openai.error import OpenAIError # noqa
13
+ else:
14
+ from openai import OpenAIError
15
+
16
+ from qwen_agent.llm.base import ModelServiceError, register_llm
17
+ from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat
18
+ from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall
19
+ from qwen_agent.log import logger
20
+ import datetime
21
+ def today_date():
22
+ return datetime.date.today().strftime("%Y-%m-%d")
23
+
24
+
25
+ SYSTEM_PROMPT = """You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.
26
+
27
+ # Tools
28
+
29
+ You may call one or more functions to assist with the user query.
30
+
31
+ You are provided with function signatures within <tools></tools> XML tags:
32
+ <tools>
33
+ {"type": "function", "function": {"name": "search", "description": "Perform Google web searches then returns a string of the top search results. Accepts multiple queries.", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries."}}, "required": ["query"]}}}
34
+ {"type": "function", "function": {"name": "visit", "description": "Visit webpage(s) and return the summary of the content.", "parameters": {"type": "object", "properties": {"url": {"type": "array", "items": {"type": "string"}, "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs."}, "goal": {"type": "string", "description": "The specific information goal for visiting webpage(s)."}}, "required": ["url", "goal"]}}}
35
+ {"type": "function", "function": {"name": "PythonInterpreter", "description": "Executes Python code in a sandboxed environment. To use this tool, you must follow this format:
36
+ 1. The 'arguments' JSON object must be empty: {}.
37
+ 2. The Python code to be executed must be placed immediately after the JSON block, enclosed within <code> and </code> tags.
38
+
39
+ IMPORTANT: Any output you want to see MUST be printed to standard output using the print() function.
40
+
41
+ Example of a correct call:
42
+ <tool_call>
43
+ {"name": "PythonInterpreter", "arguments": {}}
44
+ <code>
45
+ import numpy as np
46
+ # Your code here
47
+ print(f"The result is: {np.mean([1,2,3])}")
48
+ </code>
49
+ </tool_call>", "parameters": {"type": "object", "properties": {}, "required": []}}}
50
+ {"type": "function", "function": {"name": "google_scholar", "description": "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries for Google Scholar."}}, "required": ["query"]}}}
51
+ {"type": "function", "function": {"name": "parse_file", "description": "This is a tool that can be used to parse multiple user uploaded local files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3.", "parameters": {"type": "object", "properties": {"files": {"type": "array", "items": {"type": "string"}, "description": "The file name of the user uploaded local files to be parsed."}}, "required": ["files"]}}}
52
+ </tools>
53
+
54
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
55
+ <tool_call>
56
+ {"name": <function-name>, "arguments": <args-json-object>}
57
+ </tool_call>
58
+
59
+ """
60
+
61
+ @register_llm('oai')
62
+ class TextChatAtOAI(BaseFnCallModel):
63
+
64
+ def __init__(self, cfg: Optional[Dict] = None):
65
+ super().__init__(cfg)
66
+ self.model = self.model or 'gpt-4o-mini'
67
+ cfg = cfg or {}
68
+
69
+ api_base = cfg.get('api_base')
70
+ api_base = api_base or cfg.get('base_url')
71
+ api_base = api_base or cfg.get('model_server')
72
+ api_base = (api_base or '').strip()
73
+
74
+ api_key = cfg.get('api_key')
75
+ api_key = api_key or os.getenv('OPENAI_API_KEY')
76
+ api_key = (api_key or 'EMPTY').strip()
77
+
78
+ if openai.__version__.startswith('0.'):
79
+ if api_base:
80
+ openai.api_base = api_base
81
+ if api_key:
82
+ openai.api_key = api_key
83
+ self._complete_create = openai.Completion.create
84
+ self._chat_complete_create = openai.ChatCompletion.create
85
+ else:
86
+ api_kwargs = {}
87
+ if api_base:
88
+ api_kwargs['base_url'] = api_base
89
+ if api_key:
90
+ api_kwargs['api_key'] = api_key
91
+
92
+ def _chat_complete_create(*args, **kwargs):
93
+ # OpenAI API v1 does not allow the following args, must pass by extra_body
94
+ extra_params = ['top_k', 'repetition_penalty']
95
+ if any((k in kwargs) for k in extra_params):
96
+ kwargs['extra_body'] = copy.deepcopy(kwargs.get('extra_body', {}))
97
+ for k in extra_params:
98
+ if k in kwargs:
99
+ kwargs['extra_body'][k] = kwargs.pop(k)
100
+ if 'request_timeout' in kwargs:
101
+ kwargs['timeout'] = kwargs.pop('request_timeout')
102
+
103
+ client = openai.OpenAI(**api_kwargs)
104
+ return client.chat.completions.create(*args, **kwargs)
105
+
106
+ def _complete_create(*args, **kwargs):
107
+ # OpenAI API v1 does not allow the following args, must pass by extra_body
108
+ extra_params = ['top_k', 'repetition_penalty']
109
+ if any((k in kwargs) for k in extra_params):
110
+ kwargs['extra_body'] = copy.deepcopy(kwargs.get('extra_body', {}))
111
+ for k in extra_params:
112
+ if k in kwargs:
113
+ kwargs['extra_body'][k] = kwargs.pop(k)
114
+ if 'request_timeout' in kwargs:
115
+ kwargs['timeout'] = kwargs.pop('request_timeout')
116
+
117
+ client = openai.OpenAI(**api_kwargs)
118
+ return client.completions.create(*args, **kwargs)
119
+
120
+ self._complete_create = _complete_create
121
+ self._chat_complete_create = _chat_complete_create
122
+
123
+ def _chat_stream(
124
+ self,
125
+ messages: List[Message],
126
+ delta_stream: bool,
127
+ generate_cfg: dict,
128
+ ) -> Iterator[List[Message]]:
129
+ messages = self.convert_messages_to_dicts(messages)
130
+
131
+ try:
132
+ response = self._chat_complete_create(model=self.model, messages=messages, stream=True, **generate_cfg)
133
+ if delta_stream:
134
+ for chunk in response:
135
+ if chunk.choices:
136
+ choice = chunk.choices[0]
137
+ if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
138
+ yield [
139
+ Message(
140
+ role=ASSISTANT,
141
+ content='',
142
+ reasoning_content=choice.delta.reasoning_content
143
+ )
144
+ ]
145
+ if hasattr(choice.delta, 'content') and choice.delta.content:
146
+ yield [Message(role=ASSISTANT, content=choice.delta.content, reasoning_content='')]
147
+ # 兼容 map agent 模型
148
+ if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
149
+ function_name = choice.delta.tool_calls[0].function.name
150
+ function_call = {
151
+ 'name': function_name,
152
+ 'arguments': json.loads(choice.delta.tool_calls[0].function.arguments)
153
+ }
154
+ function_json = json.dumps(function_call, ensure_ascii=False)
155
+ yield [Message(role=ASSISTANT, content=f'<tool_call>{function_json}</tool_call>')]
156
+ logger.info(f'delta_stream message chunk: {chunk}')
157
+ else:
158
+ full_response = ''
159
+ full_reasoning_content = ''
160
+ for chunk in response:
161
+ if chunk.choices:
162
+ choice = chunk.choices[0]
163
+ if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
164
+ full_reasoning_content += choice.delta.reasoning_content
165
+ if hasattr(choice.delta, 'content') and choice.delta.content:
166
+ full_response += choice.delta.content
167
+ # 兼容 map agent 模型
168
+ if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
169
+ function_name = choice.delta.tool_calls[0].function.name
170
+ # function_call = FunctionCall(
171
+ # name=function_name,
172
+ # arguments=choice.delta.tool_calls[0].function.arguments,
173
+ # )
174
+ # yield [Message(role=ASSISTANT, content='', function_call=function_call)]
175
+ function_call = {
176
+ 'name': function_name,
177
+ 'arguments': json.loads(choice.delta.tool_calls[0].function.arguments)
178
+ }
179
+ function_json = json.dumps(function_call, ensure_ascii=False)
180
+ logger.info(json.dumps(function_call, ensure_ascii=False, indent=4))
181
+ full_response += f'<tool_call>{function_json}</tool_call>'
182
+ yield [Message(role=ASSISTANT, content=full_response, reasoning_content=full_reasoning_content)]
183
+ logger.info(f'message chunk: {chunk}')
184
+ except OpenAIError as ex:
185
+ raise ModelServiceError(exception=ex)
186
+
187
+ def _chat_no_stream(
188
+ self,
189
+ messages: List[Message],
190
+ generate_cfg: dict,
191
+ ) -> List[Message]:
192
+ messages = self.convert_messages_to_dicts(messages)
193
+ try:
194
+ response = self._chat_complete_create(model=self.model, messages=messages, stream=False, **generate_cfg)
195
+ if hasattr(response.choices[0].message, 'reasoning_content'):
196
+ return [
197
+ Message(role=ASSISTANT,
198
+ content=response.choices[0].message.content,
199
+ reasoning_content=response.choices[0].message.reasoning_content)
200
+ ]
201
+ else:
202
+ return [Message(role=ASSISTANT, content=response.choices[0].message.content)]
203
+ except OpenAIError as ex:
204
+ raise ModelServiceError(exception=ex)
205
+
206
+ def _chat_with_functions(
207
+ self,
208
+ messages: List[Message],
209
+ functions: List[Dict],
210
+ stream: bool,
211
+ delta_stream: bool,
212
+ generate_cfg: dict,
213
+ lang: Literal['en', 'zh'],
214
+ ) -> Union[List[Message], Iterator[List[Message]]]:
215
+ # if delta_stream:
216
+ # raise NotImplementedError('Please use stream=True with delta_stream=False, because delta_stream=True'
217
+ # ' is not implemented for function calling due to some technical reasons.')
218
+ generate_cfg = copy.deepcopy(generate_cfg)
219
+ for k in ['parallel_function_calls', 'function_choice', 'thought_in_content']:
220
+ if k in generate_cfg:
221
+ del generate_cfg[k]
222
+ messages = simulate_response_completion_with_chat(messages)
223
+ return self._chat(messages, stream=stream, delta_stream=delta_stream, generate_cfg=generate_cfg)
224
+
225
+ def _chat(
226
+ self,
227
+ messages: List[Union[Message, Dict]],
228
+ stream: bool,
229
+ delta_stream: bool,
230
+ generate_cfg: dict,
231
+ ) -> Union[List[Message], Iterator[List[Message]]]:
232
+ if stream:
233
+ return self._chat_stream(messages, delta_stream=delta_stream, generate_cfg=generate_cfg)
234
+ else:
235
+ return self._chat_no_stream(messages, generate_cfg=generate_cfg)
236
+
237
+ @staticmethod
238
+ def convert_messages_to_dicts(messages: List[Message]) -> List[dict]:
239
+ # TODO: Change when the VLLM deployed model needs to pass reasoning_complete.
240
+ # At this time, in order to be compatible with lower versions of vLLM,
241
+ # and reasoning content is currently not useful
242
+
243
+ messages = [msg.model_dump() for msg in messages]
244
+ return_messages = []
245
+ messages[0]["content"] = SYSTEM_PROMPT + "Current date: " + str(today_date())
246
+ for i in messages:
247
+ i["content"] = i["content"].replace("<think>\n<think>\n","<think>\n\n")
248
+ return_messages.append(i)
249
+ if logger.isEnabledFor(logging.DEBUG):
250
+ logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
251
+ return return_messages
llm/qwen_dashscope.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from http import HTTPStatus
3
+ from pprint import pformat
4
+ from typing import Dict, Iterator, List, Optional
5
+
6
+ import dashscope
7
+
8
+ from qwen_agent.llm.base import ModelServiceError, register_llm
9
+ from qwen_agent.llm.function_calling import BaseFnCallModel
10
+ from qwen_agent.llm.schema import ASSISTANT, Message
11
+ from qwen_agent.log import logger
12
+
13
+
14
+ @register_llm('qwen_dashscope')
15
+ class QwenChatAtDS(BaseFnCallModel):
16
+
17
+ def __init__(self, cfg: Optional[Dict] = None):
18
+ super().__init__(cfg)
19
+ self.model = self.model or 'qwen-max'
20
+ initialize_dashscope(cfg)
21
+
22
+ def _chat_stream(
23
+ self,
24
+ messages: List[Message],
25
+ delta_stream: bool,
26
+ generate_cfg: dict,
27
+ ) -> Iterator[List[Message]]:
28
+ messages = [msg.model_dump() for msg in messages]
29
+ if messages[-1]['role'] == ASSISTANT:
30
+ messages[-1]['partial'] = True
31
+ logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
32
+ response = dashscope.Generation.call(
33
+ self.model,
34
+ messages=messages, # noqa
35
+ result_format='message',
36
+ stream=True,
37
+ **generate_cfg)
38
+ if delta_stream:
39
+ return self._delta_stream_output(response)
40
+ else:
41
+ return self._full_stream_output(response)
42
+
43
+ def _chat_no_stream(
44
+ self,
45
+ messages: List[Message],
46
+ generate_cfg: dict,
47
+ ) -> List[Message]:
48
+ messages = [msg.model_dump() for msg in messages]
49
+ if messages[-1]['role'] == ASSISTANT:
50
+ messages[-1]['partial'] = True
51
+ logger.debug(f'LLM Input:\n{pformat(messages, indent=2)}')
52
+ response = dashscope.Generation.call(
53
+ self.model,
54
+ messages=messages, # noqa
55
+ result_format='message',
56
+ stream=False,
57
+ **generate_cfg)
58
+ if response.status_code == HTTPStatus.OK:
59
+ return [
60
+ Message(role=ASSISTANT,
61
+ content=response.output.choices[0].message.content,
62
+ reasoning_content=response.output.choices[0].message.get('reasoning_content', ''),
63
+ extra={'model_service_info': response})
64
+ ]
65
+ else:
66
+ raise ModelServiceError(code=response.code,
67
+ message=response.message,
68
+ extra={'model_service_info': response})
69
+
70
+ def _continue_assistant_response(
71
+ self,
72
+ messages: List[Message],
73
+ generate_cfg: dict,
74
+ stream: bool,
75
+ ) -> Iterator[List[Message]]:
76
+ return self._chat(messages, stream=stream, delta_stream=False, generate_cfg=generate_cfg)
77
+
78
+ @staticmethod
79
+ def _delta_stream_output(response) -> Iterator[List[Message]]:
80
+ for chunk in response:
81
+ if chunk.status_code == HTTPStatus.OK:
82
+ yield [
83
+ Message(role=ASSISTANT,
84
+ content=chunk.output.choices[0].message.content,
85
+ reasoning_content=chunk.output.choices[0].message.reasoning_content,
86
+ extra={'model_service_info': chunk})
87
+ ]
88
+ else:
89
+ raise ModelServiceError(code=chunk.code, message=chunk.message, extra={'model_service_info': chunk})
90
+
91
+ @staticmethod
92
+ def _full_stream_output(response) -> Iterator[List[Message]]:
93
+ full_content = ''
94
+ full_reasoning_content = ''
95
+ for chunk in response:
96
+ if chunk.status_code == HTTPStatus.OK:
97
+ if chunk.output.choices[0].message.get('reasoning_content', ''):
98
+ full_reasoning_content += chunk.output.choices[0].message.reasoning_content
99
+ if chunk.output.choices[0].message.content:
100
+ full_content += chunk.output.choices[0].message.content
101
+ yield [
102
+ Message(role=ASSISTANT,
103
+ content=full_content,
104
+ reasoning_content=full_reasoning_content,
105
+ extra={'model_service_info': chunk})
106
+ ]
107
+ else:
108
+ raise ModelServiceError(code=chunk.code, message=chunk.message, extra={'model_service_info': chunk})
109
+
110
+
111
+ def initialize_dashscope(cfg: Optional[Dict] = None) -> None:
112
+ cfg = cfg or {}
113
+
114
+ api_key = cfg.get('api_key', '')
115
+ base_http_api_url = cfg.get('base_http_api_url', None)
116
+ base_websocket_api_url = cfg.get('base_websocket_api_url', None)
117
+
118
+ if not api_key:
119
+ api_key = os.getenv('DASHSCOPE_API_KEY', 'EMPTY')
120
+ if not base_http_api_url:
121
+ base_http_api_url = os.getenv('DASHSCOPE_HTTP_URL', None)
122
+ if not base_websocket_api_url:
123
+ base_websocket_api_url = os.getenv('DASHSCOPE_WEBSOCKET_URL', None)
124
+
125
+ api_key = api_key.strip()
126
+ if api_key in ('', 'EMPTY'):
127
+ if dashscope.api_key is None or dashscope.api_key in ('', 'EMPTY'):
128
+ logger.warning('No valid dashscope api_key found in cfg, environment variable `DASHSCOPE_API_KEY` or dashscope.api_key, the model call may raise errors.')
129
+ else:
130
+ logger.info('No dashscope api_key found in cfg, using the dashscope.api_key that has already been set.')
131
+ else: # valid api_key
132
+ if api_key != dashscope.api_key:
133
+ logger.info('Setting the dashscope api_key.')
134
+ dashscope.api_key = api_key
135
+ # or do nothing since both keys are the same
136
+
137
+ if base_http_api_url is not None:
138
+ dashscope.base_http_api_url = base_http_api_url.strip()
139
+ if base_websocket_api_url is not None:
140
+ dashscope.base_websocket_api_url = base_websocket_api_url.strip()
messages.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "role": "system",
4
+ "content": "You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"search\", \"description\": \"Perform Google web searches then returns a string of the top search results. Accepts multiple queries.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"description\": \"The search query.\"}, \"minItems\": 1, \"description\": \"The list of search queries.\"}}, \"required\": [\"query\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"visit\", \"description\": \"Visit webpage(s) and return the summary of the content.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"url\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs.\"}, \"goal\": {\"type\": \"string\", \"description\": \"The specific information goal for visiting webpage(s).\"}}, \"required\": [\"url\", \"goal\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"PythonInterpreter\", \"description\": \"Executes Python code in a sandboxed environment. To use this tool, you must follow this format:\n1. The 'arguments' JSON object must be empty: {}.\n2. The Python code to be executed must be placed immediately after the JSON block, enclosed within <code> and </code> tags.\n\nIMPORTANT: Any output you want to see MUST be printed to standard output using the print() function.\n\nExample of a correct call:\n<tool_call>\n{\"name\": \"PythonInterpreter\", \"arguments\": {}}\n<code>\nimport numpy as np\n# Your code here\nprint(f\"The result is: {np.mean([1,2,3])}\")\n</code>\n</tool_call>\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}}\n{\"type\": \"function\", \"function\": {\"name\": \"google_scholar\", \"description\": \"Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"description\": \"The search query.\"}, \"minItems\": 1, \"description\": \"The list of search queries for Google Scholar.\"}}, \"required\": [\"query\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"parse_file\", \"description\": \"This is a tool that can be used to parse multiple user uploaded local files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"files\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"The file name of the user uploaded local files to be parsed.\"}}, \"required\": [\"files\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n\nCurrent date: 2025-09-19"
5
+ },
6
+ {
7
+ "role": "user",
8
+ "content": "When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?"
9
+ },
10
+ {
11
+ "role": "assistant",
12
+ "content": "<think>\n\nThe user asks: \"When is the paper submission deadline for the ACL 2025 Industry Track, and what is the venue address for the conference?\" So we need to find:\n\n- ACL 2025 (Association for Computational Linguistics) conference. Specifically the Industry Track submission deadline date.\n- Venue address for the conference (likely the location, city, country; maybe specific venue address like a convention center).\n\nWe need to browse the web to locate official information about ACL 2025 (maybe it hasn't happened yet). Possibly the site is something like https://2025.aclweb.org or similar.\n\nSearch queries could be:\n\n- \"ACL 2025 industry track submission deadline\"\n- \"ACL 2025 conference venue address\"\n- \"ACL 2025 location\"\n- \"ACL 2025 main conference venue\"\n- \"ACL 2025 call for papers industry track\"\n\nWe will search using the functions.search tool.\n</think>\n<tool_call>\n{\"name\": \"search\", \"arguments\": {\"query\": [\"ACL 2025 industry track submission deadline\", \"ACL 2025 conference venue address\", \"ACL 2025 location\", \"ACL 2025 call for papers industry track\", \"ACL 2025 conference details\"]}}\n</tool_call>"
13
+ },
14
+ {
15
+ "role": "user",
16
+ "content": "<tool_response>\nA Google search for 'ACL 2025 industry track submission deadline' found 10 results:\n\n## Web Results\n1. [Industry Track](https://2025.aclweb.org/calls/industry_track/)\n\nNote: All deadlines are 11:59PM UTC-12:00 (anywhere on Earth). Following the ACL and ARR Policies for Review and Citation, updated in early 2024, there is no ...\n\n2. [ACL 2025 Industry Track](https://openreview.net/group?id=aclweb.org/ACL/2025/Industry_Track)\nDate published: Jul 28, 2025\n\nPlease see the venue website for more information. Submission Start: Jan 27 2025 12:00AM UTC-0, Submission Deadline: Mar 22 2025 12:59PM UTC-0.\n\n3. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nCall for Industry Track is out. Oct 7, 2024, First call for papers is out. Sep ... Commitment deadline for ACL 2025, April 20, 2025. Notification of ...\n\n4. [Call for Industry Track Papers](https://2025.naacl.org/calls/industry/)\n\nAll deadlines are 11:59PM UTC-12:00 (“anywhere on Earth”). Paper submission deadline, December 2, 2024 December 7, 2024. Notification of acceptance, February 5, ...\n\n5. [Call for Industry Track Papers](https://2025.emnlp.org/calls/industry_track/)\n\nImportant Dates · Paper submission deadline: July 4, 2025 · Reviewer-author discussion period: August 28 – September 8, 2025 · Notification: September 24, 2025 ...\n\n6. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1/)\nDate published: by G Rehm · 2025\n\nFor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025), it was decided once again to organise a dedicated Industry Track.\n\n7. [The 2025 Conference on Empirical Methods in Natural ...](https://2025.emnlp.org/)\n\nReview submission deadline, June 18, 2025 ; Author response and author-reviewer discussion, June 26 - July 2, 2025 ; Commitment deadline, August 1, 2025.\n\n8. [Acl 2025 deadline](https://assets-global.website-files.com/683fdb5e8008a99e5b00140f/6854688e54c673b87dfbc2c6_nukanavowazix.pdf)\nDate published: Jun 29, 2025\n\nAuthors of accepted papers must submit pre-recorded video presentations, poster PDFs and/or slides. CL and TACL authors also need to upload ...\n\n9. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\n... submission deadline: Oct 6, 2025 • EACL commitment deadline: Dec 14, 2025. A tall building with intricate geometric patterns on its facade, featuring arched ...\n\n10. [ACL 2025 The 63rd Annual Meeting of the Association for ...](https://aclanthology.org/2025.acl-industry.0.pdf)\nDate published: by G Rehm · 2025\n\n... Industry Track submissions as well as the reviewers and area chairs for their hard and dedicated work under very tight deadlines. We would ...\n=======\nA Google search for 'ACL 2025 conference venue address' found 10 results:\n\n## Web Results\n1. [Venue](https://2025.aclweb.org/venue/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right ...\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n3. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n4. [ACL 2025 Conference](https://openreview.net/group?id=aclweb.org/ACL/2025/Conference)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics. ACL 2025. Vienna, AustriaJul 27 2025https://2025.aclweb.org/[email protected].\n\n5. [ACL 2025 in Vienna, Austria](https://scads.ai/acl-2025-in-vienna-austria/)\nDate published: Aug 5, 2025\n\nFrom July 27 to August 1 the 63rd Annual Meeting of the Association for Computational Linguistics ACL 2025 took place in Vienna, Austria.\n\n6. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nJoin us for our 30th anniversary conference , November 4–9, 2025! Early registration ends on October 6, 2025. Space is limited, so register soon to ...\n\n7. [ACL 2025 : The 63rd Annual Meeting of the Association for ...](http://www.wikicfp.com/cfp/servlet/event.showcfp?copyownerid=90704&eventid=183095)\n\nACL 2025 Call for Papers Main Conference ACL 2025. Website: https://2025.aclweb.org/ Submission Deadline: February 15, 2025. Conference Dates: July 27 to ...\n\n8. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\n2025 Annual Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics Albuquerque, New Mexico April 29–May 4, 2025\n\n9. [Annual Meeting of the Association for Computational ...](https://aclanthology.org/events/acl-2025/)\n\nAnnual Meeting of the Association for Computational Linguistics (2025) ... address the lack of ground truth explanations to train the model, we propose ...\n\n10. [IBM at ACL 2025 - Vienna, Austria and virtual](https://research.ibm.com/events/acl-2025)\nDate published: Jul 27, 2025\n\nThis event has ended. About. IBM is proud to sponsor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025).\n=======\nA Google search for 'ACL 2025 location' found 10 results:\n\n## Web Results\n1. [Austin City Limits Music Festival | Austin, TX | October 3-5 & 10 ...](https://www.aclfestival.com/)\n\nAustin City Limits Music Festival will be presented at Zilker Park, in Austin, TX on October 3-5 & 10-12, 2025. Stay connected for upcoming announcements ...\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n3. [Venue](https://2025.aclweb.org/venue/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right ...\n\n4. [Tickets | Austin City Limits Music Festival](https://www.aclfestival.com/tickets)\n\nBuy tickets for Austin City Limits Music Festival for October 3-5 & 10-12, 2025.\n\n5. [Austin City Limits Music Festival | Tickets, Dates & More](https://www.austintexas.org/events/acl-fest/)\n\nPlan your ACL 2025 visit! Get tickets, VIP passes, hotel options, and learn about the best activities for an unforgettable experience in Austin.\n\n6. [Austin City Limits Music Festival 2025 guide: tickets, tips ...](https://www.statesman.com/entertainment/music/article/acl-fest-guide-lineup-schedule-zilker-rules-21040936.php)\nDate published: 3 days ago\n\nACL Fest runs Oct. 3-5 and Oct. 10-12 in Zilker Park, 2100 Barton Springs Road. Gates open at noon and close at 10 p.m. on Friday, Saturday and ...\n\n7. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nWelcome to Vienna! What an incredible week it has been here in Vienna for the main conference!\n\n8. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n9. [Festival Map](https://support.aclfestival.com/hc/en-us/articles/4405399774484-Festival-Map)\n\nWhat are the dates of ACL Festival 2025? Where is the festival located? Can I leave and re-enter on the same day? Can I check my chair at ...\n\n10. [What are the dates of ACL Festival 2025?](https://support.aclfestival.com/hc/en-us/articles/4405406283924-What-are-the-dates-of-ACL-Festival-2025)\nDate published: Aug 27, 2025\n\nAustin City Limits Music Festival will take place on October 3-5 and October 10-12, 2025. Sign-up for our SMS list to be the first to know ...\n=======\nA Google search for 'ACL 2025 call for papers industry track' found 10 results:\n\n## Web Results\n1. [Industry Track](https://2025.aclweb.org/calls/industry_track/)\n\nThis track provides an opportunity to highlight the key insights and new research challenges that arise from real world implementations.\n\n2. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nCall for System Demonstrations is out. Jan 20, 2025, Second call for papers is out. Dec 3, 2024, Call for Industry Track is out. Oct 7, 2024, First call for ...\n\n3. [ACL 2025 Industry Track](https://openreview.net/group?id=aclweb.org/ACL/2025/Industry_Track)\n\nPlease see the venue website for more information. Submission Start: Jan 27 2025 12:00AM UTC-0, Submission Deadline: Mar 22 2025 12:59PM UTC-0.\n\n4. [Call for Industry Track Papers](https://2025.naacl.org/calls/industry/)\n\nThe industry track welcomes submissions describing innovations and implementations of applications in all areas of speech and natural language processing.\n\n5. [ACL Industry track 2025 decisions : r/airesearch](https://www.reddit.com/r/airesearch/comments/1ki8ohz/acl_industry_track_2025_decisions/)\nDate published: 4 months ago\n\nACL indutry track reviews will be out soon. This is a thread for all types of discussions.\n\n6. [Call for Industry Track Papers](https://2025.emnlp.org/calls/industry_track/)\n\nThe EMNLP 2025 Industry Track provides the opportunity to highlight the key insights and new research challenges that arise from the development and deployment ...\n\n7. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1/)\nDate published: by G Rehm · 2025\n\nFor the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025), it was decided once again to organise a dedicated Industry Track.\n\n8. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\nCall for industry track papers is out. Oct 01, 2024, Second call for papers is out. Sep 10, 2024, The ACL-NAACL-EMNLP joint call for tutorials is out. Sep 10 ...\n\n9. [Call for Industry Track](https://coling2025.org/calls/industry_track/)\n\nTopics of InterestPermalink. The purpose of this track is to showcase key insights and emerging research challenges emanating from practical applications.\n\n10. [ACL 2025 Industry Track: Overview](https://aclanthology.org/2025.acl-industry.1.pdf)\nDate published: by G Rehm · 2025\n\n3 Submissions and Results. The call for Industry Track papers attracted an un- precedented number of 421 paper submissions. A total of 453 ...\n=======\nA Google search for 'ACL 2025 conference details' found 10 results:\n\n## Web Results\n1. [ACL 2025: The 63rd Annual Meeting of the Association for ...](https://2025.aclweb.org/)\n\nImportant Dates ; Camera-ready papers due, May 30, 2025 ; Overall Conference, July 27 to August 1st, 2025 ; Tutorials & Welcome Reception, July 27, 2025 ; Main ...\n\n2. [ACL 2025 Conference](https://openreview.net/group?id=aclweb.org/ACL/2025/Conference)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics. ACL 2025. Vienna, AustriaJul 27 2025https://2025.aclweb.org/[email protected].\n\n3. [ACL 2025 (@aclmeeting) / X](https://x.com/aclmeeting?lang=en)\n\nWe look forward to seeing you at #ACL2026 in San Diego! ACL SRW 2025 updates the program schedule: acl2025-srw.github.io/schedule. Welcome to Vienna! What an ...\n\n4. [Annual Meeting of the Association for Computational ...](https://aclanthology.org/events/acl-2025/)\n\n... ACL 2025 1388 papers; Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025) 29 papers; Proceedings of the 2nd Workshop on ...\n\n5. [2025 Annual Conference of the Nations of the Americas ...](https://2025.naacl.org/)\n\nSubmission Deadline (via ACL Rolling Review), October 15, 2024 ; Welcome Reception, Tuesday, April 29, 2025 ; Main Conference, Wednesday–Friday, April 30–May 2, ...\n\n6. [ACL 2025](https://www.clarin.eu/event/2025/acl-2025)\n\nThe 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025) will take place in Vienna, Austria from July 27 to August 1st, 2025.\n\n7. [2025 ACL Conference Registration](https://acl.swoogo.com/acl2025)\n\n2025 ACL Conference Registration July 27 - August 1, 2025. Early Registration: Ends Wednesday, July 2, 2025. AOE Late Registration: Ends for In-Person and ...\n\n8. [ACL 2025 The 63rd Annual Meeting of the Association for ...](https://aclanthology.org/2025.acl-long.0.pdf)\nDate published: by W Che · 2025\n\nACL 2025 hosts a rich set of 28 workshops and 8 tutorials, thanks to the dedicated efforts of our Wo- rkshop Chairs — Terra Blevins and ...\n\n9. [Dates and Venues](http://aclrollingreview.org/dates)\nDate published: May 19, 2025\n\nThis table lists all the key dates, by which the authors may need to take action, for 2025 cycles following the new schedule.\n\n10. [ACL 2025 : The 63rd Annual Meeting of the Association for ...](http://www.wikicfp.com/cfp/servlet/event.showcfp?copyownerid=90704&eventid=183095)\n\nConference July 28 - 30, 2025. Workshops July 31 - August 1, 2025. Note: All deadlines are 11:59PM UTC-12:00 (“anywhere on Earth”). Paper Submission Details\n</tool_response>"
17
+ },
18
+ {
19
+ "role": "assistant",
20
+ "content": "<think>\n\nWe have search results. The relevant ones for submission deadline of Industry Track:\n\n- #1: \"Industry Track\" page at https://2025.aclweb.org/calls/industry_track/. It might have the exact deadline.\n\n- #2: OpenReview group for ACL 2025 Industry Track shows \"Submission Deadline: Mar 22 2025 12:59PM UTC-0\".\n\nBut we need to verify which is correct. Could be that the industry track call has a deadline in March (like March 22, 2025). But we also see other info from different sources showing deadlines like December 2/7, 2024 for NAACL (not relevant). The question is specifically \"paper submission deadline for the ACL 2025 Industry Track\". We should check the official ACL 2025 website's calls/industry_track page.\n\nLet's visit that URL.\n</think>\n<tool_call>\n{\"name\": \"visit\", \"arguments\": {\"url\": [\"https://2025.aclweb.org/calls/industry_track/\"], \"goal\": \"Find the paper submission deadline date for the ACL 2025 Industry Track.\"}}\n</tool_call>"
21
+ },
22
+ {
23
+ "role": "user",
24
+ "content": "<tool_response>\nThe useful information in https://2025.aclweb.org/calls/industry_track/ for user goal Find the paper submission deadline date for the ACL 2025 Industry Track. as follows: \n\nEvidence in page: \nIMPORTANT DATES\n---------------\n\nPaper submission deadline: 21 March 2025\n\nNotification of acceptance: 09 May 2025\n\nCamera-ready version of papers due: 23 May 2025\n\nMain conference: 28-30 July 2025\n\nNote: All deadlines are 11:59PM UTC-12:00 (anywhere on Earth).\n\nSummary: \nThe paper submission deadline for the ACL 2025 Industry Track is 21 March 2025, at 11:59 PM UTC-12:00 (anywhere on Earth). This deadline is explicitly stated in the 'IMPORTANT DATES' section of the call for papers, which also outlines subsequent key dates including notification of acceptance (9 May 2025), camera-ready version submission (23 May 2025), and the main conference dates (28–30 July 2025).\n</tool_response>"
25
+ },
26
+ {
27
+ "role": "assistant",
28
+ "content": "<think>\n\nGreat. The page shows \"Paper submission deadline: 21 March 2025\". However, earlier the OpenReview group indicated March 22 2025 12:59PM UTC-0. The official website is more authoritative.\n\nNow we need venue address for the conference. Search results show a \"Venue\" page at https://2025.aclweb.org/venue/. Let's visit that.\n</think>\n<tool_call>\n{\"name\": \"visit\", \"arguments\": {\"url\": [\"https://2025.aclweb.org/venue/\"], \"goal\": \"Find the venue address for the ACL 2025 conference.\"}}\n</tool_call>"
29
+ },
30
+ {
31
+ "role": "user",
32
+ "content": "<tool_response>\nThe useful information in https://2025.aclweb.org/venue/ for user goal Find the venue address for the ACL 2025 conference. as follows: \n\nEvidence in page: \nConference Venue\n----------------\n\nAustria Center Vienna Bruno-Kreisky-Platz 1, 1220 Wien, Austria\n\n +43 1 260690\n\n[https://www.acv.at/en/](https://www.acv.at/en/)\n\nThe Austria Center Vienna is an easy-to-reach location, and right in the heart of Europe. And the only conference centre in the world that is situated right next door to a UN headquarters. Austria’s largest congress center with 21 halls, 134 meeting rooms and 5 exhibition halls on 5 levels, with room for up to 22,800 people.\n\nSummary: \nThe venue for the ACL 2025 conference is the Austria Center Vienna, located at Bruno-Kreisky-Platz 1, 1220 Wien, Austria. This major congress center is situated in the heart of Europe, adjacent to a UN headquarters, and features 21 halls, 134 meeting rooms, and 5 exhibition halls, capable of accommodating up to 22,800 attendees.\n</tool_response>"
33
+ }
34
+ ]
prompt.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXTRACTOR_PROMPT = """Please process the following webpage content and user goal to extract relevant information:
2
+
3
+ ## **Webpage Content**
4
+ {webpage_content}
5
+
6
+ ## **User Goal**
7
+ {goal}
8
+
9
+ ## **Task Guidelines**
10
+ 1. **Content Scanning for Rational**: Locate the **specific sections/data** directly related to the user's goal within the webpage content
11
+ 2. **Key Extraction for Evidence**: Identify and extract the **most relevant information** from the content, you never miss any important information, output the **full original context** of the content as far as possible, it can be more than three paragraphs.
12
+ 3. **Summary Output for Summary**: Organize into a concise paragraph with logical flow, prioritizing clarity and judge the contribution of the information to the goal.
13
+
14
+ **Final Output Format using JSON format has "rational", "evidence", "summary" feilds**
15
+ """
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ qwen-agent[gui,rag,code_interpreter,mcp]
2
+ sandbox_fusion
scholar.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ from typing import Union, List
5
+ from qwen_agent.tools.base import BaseTool, register_tool
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ import http.client
8
+
9
+
10
+ SERPER_KEY=os.environ.get('SERPER_KEY_ID')
11
+
12
+
13
+ @register_tool("google_scholar", allow_overwrite=True)
14
+ class Scholar(BaseTool):
15
+ name = "google_scholar"
16
+ description = "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries."
17
+ parameters = {
18
+ "type": "object",
19
+ "properties": {
20
+ "query": {
21
+ "type": "array",
22
+ "items": {"type": "string", "description": "The search query."},
23
+ "minItems": 1,
24
+ "description": "The list of search queries for Google Scholar."
25
+ },
26
+ },
27
+ "required": ["query"],
28
+ }
29
+
30
+ def google_scholar_with_serp(self, query: str):
31
+ conn = http.client.HTTPSConnection("google.serper.dev")
32
+ payload = json.dumps({
33
+ "q": query,
34
+ })
35
+ headers = {
36
+ 'X-API-KEY': SERPER_KEY,
37
+ 'Content-Type': 'application/json'
38
+ }
39
+ for i in range(5):
40
+ try:
41
+ conn.request("POST", "/scholar", payload, headers)
42
+ res = conn.getresponse()
43
+ break
44
+ except Exception as e:
45
+ print(e)
46
+ if i == 4:
47
+ return f"Google Scholar Timeout, return None, Please try again later."
48
+ continue
49
+
50
+
51
+ data = res.read()
52
+
53
+ results = json.loads(data.decode("utf-8"))
54
+ try:
55
+ if "organic" not in results:
56
+ raise Exception(f"No results found for query: '{query}'. Use a less specific query.")
57
+
58
+ web_snippets = list()
59
+ idx = 0
60
+ if "organic" in results:
61
+ for page in results["organic"]:
62
+ idx += 1
63
+ date_published = ""
64
+ if "year" in page:
65
+ date_published = "\nDate published: " + str(page["year"])
66
+
67
+ publicationInfo = ""
68
+ if "publicationInfo" in page:
69
+ publicationInfo = "\npublicationInfo: " + page["publicationInfo"]
70
+
71
+ snippet = ""
72
+ if "snippet" in page:
73
+ snippet = "\n" + page["snippet"]
74
+
75
+ link_info = "no available link"
76
+ if "pdfUrl" in page:
77
+ link_info = "pdfUrl: " + page["pdfUrl"]
78
+
79
+ citedBy = ""
80
+ if "citedBy" in page:
81
+ citedBy = "\ncitedBy: " + str(page["citedBy"])
82
+
83
+ redacted_version = f"{idx}. [{page['title']}]({link_info}){publicationInfo}{date_published}{citedBy}\n{snippet}"
84
+
85
+ redacted_version = redacted_version.replace("Your browser can't play this video.", "")
86
+ web_snippets.append(redacted_version)
87
+
88
+ content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
89
+ return content
90
+ except:
91
+ return f"No results found for '{query}'. Try with a more general query."
92
+
93
+
94
+ def call(self, params: Union[str, dict], **kwargs) -> str:
95
+ # assert GOOGLE_SEARCH_KEY is not None, "Please set the IDEALAB_SEARCH_KEY environment variable."
96
+ try:
97
+ params = json.loads(params)
98
+ params = self._verify_json_format_args(params)
99
+ query = params["query"]
100
+ except:
101
+ return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field"
102
+
103
+ if isinstance(query, str):
104
+ response = self.google_scholar_with_serp(query)
105
+ else:
106
+ assert isinstance(query, List)
107
+ with ThreadPoolExecutor(max_workers=3) as executor:
108
+
109
+ response = list(executor.map(self.google_scholar_with_serp, query))
110
+ response = "\n=======\n".join(response)
111
+ return response
search.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ from typing import List, Union
4
+ import requests
5
+ from qwen_agent.tools.base import BaseTool, register_tool
6
+ import asyncio
7
+ from typing import Dict, List, Optional, Union
8
+ import uuid
9
+ import http.client
10
+ import json
11
+
12
+ import os
13
+
14
+
15
+ SERPER_KEY=os.environ.get('SERPER_KEY_ID')
16
+
17
+
18
+ @register_tool("search", allow_overwrite=True)
19
+ class Search(BaseTool):
20
+ name = "search"
21
+ description = "Performs batched web searches: supply an array 'query'; the tool retrieves the top 10 results for each query in one call."
22
+ parameters = {
23
+ "type": "object",
24
+ "properties": {
25
+ "query": {
26
+ "type": "array",
27
+ "items": {
28
+ "type": "string"
29
+ },
30
+ "description": "Array of query strings. Include multiple complementary search queries in a single call."
31
+ },
32
+ },
33
+ "required": ["query"],
34
+ }
35
+
36
+ def __init__(self, cfg: Optional[dict] = None):
37
+ super().__init__(cfg)
38
+ def google_search_with_serp(self, query: str):
39
+ def contains_chinese_basic(text: str) -> bool:
40
+ return any('\u4E00' <= char <= '\u9FFF' for char in text)
41
+ conn = http.client.HTTPSConnection("google.serper.dev")
42
+ if contains_chinese_basic(query):
43
+ payload = json.dumps({
44
+ "q": query,
45
+ "location": "China",
46
+ "gl": "cn",
47
+ "hl": "zh-cn"
48
+ })
49
+
50
+ else:
51
+ payload = json.dumps({
52
+ "q": query,
53
+ "location": "United States",
54
+ "gl": "us",
55
+ "hl": "en"
56
+ })
57
+ headers = {
58
+ 'X-API-KEY': SERPER_KEY,
59
+ 'Content-Type': 'application/json'
60
+ }
61
+
62
+
63
+ for i in range(5):
64
+ try:
65
+ conn.request("POST", "/search", payload, headers)
66
+ res = conn.getresponse()
67
+ break
68
+ except Exception as e:
69
+ print(e)
70
+ if i == 4:
71
+ return f"Google search Timeout, return None, Please try again later."
72
+ continue
73
+
74
+ data = res.read()
75
+ results = json.loads(data.decode("utf-8"))
76
+ print(results)
77
+
78
+ try:
79
+ if "organic" not in results:
80
+ raise Exception(f"No results found for query: '{query}'. Use a less specific query.")
81
+
82
+ web_snippets = list()
83
+ idx = 0
84
+ if "organic" in results:
85
+ for page in results["organic"]:
86
+ idx += 1
87
+ date_published = ""
88
+ if "date" in page:
89
+ date_published = "\nDate published: " + page["date"]
90
+
91
+ source = ""
92
+ if "source" in page:
93
+ source = "\nSource: " + page["source"]
94
+
95
+ snippet = ""
96
+ if "snippet" in page:
97
+ snippet = "\n" + page["snippet"]
98
+
99
+ redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
100
+ redacted_version = redacted_version.replace("Your browser can't play this video.", "")
101
+ web_snippets.append(redacted_version)
102
+
103
+ content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
104
+ return content
105
+ except Exception as e:
106
+ print(e)
107
+ return f"No results found for '{query}'. Try with a more general query."
108
+
109
+
110
+
111
+ def search_with_serp(self, query: str):
112
+ result = self.google_search_with_serp(query)
113
+ return result
114
+
115
+ def call(self, params: Union[str, dict], **kwargs) -> str:
116
+ try:
117
+ print(params)
118
+ params = json.loads(params)
119
+ print(params)
120
+ query = params["query"]
121
+ print("query:\n", query)
122
+ except:
123
+ return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
124
+
125
+ if isinstance(query, str):
126
+ # 单个查询
127
+ response = self.search_with_serp(query)
128
+ else:
129
+ # 多个查询
130
+ assert isinstance(query, List)
131
+ responses = []
132
+ for q in query:
133
+ responses.append(self.search_with_serp(q))
134
+ response = "\n=======\n".join(responses)
135
+
136
+ return response
tool_python.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Dict, List, Optional, Union
3
+ import json5
4
+ from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
5
+ from qwen_agent.utils.utils import extract_code
6
+ from sandbox_fusion import run_code, RunCodeRequest, RunStatus
7
+ from requests.exceptions import Timeout
8
+ import os
9
+ import random
10
+ import time
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+
13
+ SANDBOX_URL=os.getenv('SANDBOX_URL','')
14
+ SANDBOX_FUSION_ENDPOINTS = [
15
+ SANDBOX_URL
16
+ ]
17
+
18
+ # Fallback to single endpoint if environment variable exists
19
+ if 'SANDBOX_FUSION_ENDPOINT' in os.environ:
20
+ SANDBOX_FUSION_ENDPOINTS = os.environ['SANDBOX_FUSION_ENDPOINT'].split(',')
21
+
22
+
23
+ @register_tool('PythonInterpreter', allow_overwrite=True)
24
+ class PythonInterpreter(BaseToolWithFileAccess):
25
+ name = "PythonInterpreter"
26
+ description = 'Execute Python code in a sandboxed environment. Use this to run Python code and get the execution results.\n**Make sure to use print() for any output you want to see in the results.**\nFor code parameters, use placeholders first, and then put the code within <code></code> XML tags, such as:\n<tool_call>\n{"purpose": <detailed-purpose-of-this-tool-call>, "name": <tool-name>, "arguments": {"code": ""}}\n<code>\nHere is the code.\n</code>\n</tool_call>\n'
27
+
28
+ parameters = {
29
+ "type": "object",
30
+ "properties": {
31
+ "code": {
32
+ "type": "string",
33
+ "description": "The Python code to execute. Must be provided within <code></code> XML tags. Remember to use print() statements for any output you want to see.",
34
+ }
35
+ },
36
+ "required": ["code"],
37
+ }
38
+
39
+ def __init__(self, cfg: Optional[Dict] = None):
40
+ super().__init__(cfg)
41
+ # self.summary_mapping = SummaryMapping()
42
+
43
+ @property
44
+ def args_format(self) -> str:
45
+ fmt = self.cfg.get('args_format')
46
+ if fmt is None:
47
+ if has_chinese_chars([self.name_for_human, self.name, self.description, self.parameters]):
48
+ fmt = 'The input for this tool should be a Markdown code block.'
49
+
50
+ else:
51
+ fmt = 'Enclose the code within triple backticks (`) at the beginning and end of the code.'
52
+ return fmt
53
+
54
+ def observation(self, tool: dict, tool_dict: dict, tool_results, empty_mode: bool=False, readpage: bool=False, max_observation_length: int=None, tokenizer=None):
55
+ print('test')
56
+ assert isinstance(tool_results, str), f"result of python code should be str, instead of {type(tool_results)}. {tool_results}"
57
+ return tool_results
58
+
59
+ @property
60
+ def function(self) -> dict:
61
+ return {
62
+ 'name': self.name,
63
+ 'description': self.description,
64
+ 'parameters': self.parameters,
65
+ }
66
+
67
+ def call(self, params, files= None, timeout = 50, **kwargs) -> str:
68
+ try:
69
+ # super().call(params=params, files=files) # copy remote files to work_dir
70
+ try:
71
+ code=params.split('<code>')[1].split('</code')[0]
72
+ print(params)
73
+ if type(params) is str:
74
+ params = json5.loads(params)
75
+ code = params.get('code', '')
76
+ if not code:
77
+ code = params.get('raw', '')
78
+ triple_match = re.search(r'```[^\n]*\n(.+?)```', code, re.DOTALL)
79
+ if triple_match:
80
+ code = triple_match.group(1)
81
+ except Exception:
82
+ code = extract_code(params)
83
+
84
+ if not code.strip():
85
+ return '[Python Interpreter Error]: Empty code.'
86
+
87
+ # Retry mechanism with randomly sampled endpoints
88
+ # code=params
89
+ last_error = None
90
+ for attempt in range(8):
91
+ try:
92
+ # Randomly sample an endpoint for each attempt
93
+ endpoint = random.choice(SANDBOX_FUSION_ENDPOINTS)
94
+ print(f"Attempt {attempt + 1}/5 using endpoint: {endpoint}")
95
+
96
+ code_result = run_code(RunCodeRequest(code=code, language='python', run_timeout=timeout), max_attempts=1, client_timeout=timeout, endpoint=endpoint)
97
+ print("[Python] Code Result", code_result)
98
+ result = []
99
+ if code_result.run_result.stdout:
100
+ result.append(f"stdout:\n{code_result.run_result.stdout}")
101
+ if code_result.run_result.stderr:
102
+ result.append(f"stderr:\n{code_result.run_result.stderr}")
103
+ if code_result.run_result.execution_time >= timeout-1:
104
+ result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.")
105
+ result = '\n'.join(result)
106
+ print('SUCCESS RUNNING TOOL')
107
+ return result if result.strip() else 'Finished execution.'
108
+
109
+ except Timeout as e:
110
+ last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.'
111
+ print(f"Timeout on attempt {attempt + 1}: {last_error}")
112
+ if attempt == 4: # Last attempt
113
+ return last_error
114
+ continue
115
+
116
+ except Exception as e:
117
+ last_error = f'[Python Interpreter Error]: {str(e)} on endpoint {endpoint}'
118
+ print(f"Error on attempt {attempt + 1}: {last_error}")
119
+ if attempt == 4: # Last attempt
120
+ return last_error
121
+ continue
122
+
123
+ return last_error if last_error else '[Python Interpreter Error]: All attempts failed.'
124
+
125
+ except Exception as e:
126
+ return f"[Python Interpreter Error]: {str(e)}"
127
+
128
+ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeout: Optional[int] = 30, **kwargs) -> tuple:
129
+ """Test a specific endpoint directly"""
130
+ try:
131
+ if type(params) is str:
132
+ params = json5.loads(params)
133
+ code = params.get('code', '')
134
+ if not code:
135
+ code = params.get('raw', '')
136
+ triple_match = re.search(r'```[^\n]*\n(.+?)```', code, re.DOTALL)
137
+ if triple_match:
138
+ code = triple_match.group(1)
139
+ except Exception:
140
+ code = extract_code(params)
141
+
142
+ if not code.strip():
143
+ return False, '[Python Interpreter Error]: Empty code.'
144
+
145
+ try:
146
+ start_time = time.time()
147
+ code_result = run_code(RunCodeRequest(code=code, language='python', run_timeout=timeout),
148
+ max_attempts=1, client_timeout=timeout, endpoint=endpoint)
149
+ end_time = time.time()
150
+
151
+ result = []
152
+ if code_result.run_result.stdout:
153
+ result.append(f"stdout:\n{code_result.run_result.stdout}")
154
+ if code_result.run_result.stderr:
155
+ result.append(f"stderr:\n{code_result.run_result.stderr}")
156
+
157
+ result = '\n'.join(result)
158
+ execution_time = end_time - start_time
159
+ return True, result if result.strip() else 'Finished execution.', execution_time
160
+
161
+ except Timeout as e:
162
+ return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None
163
+ except Exception as e:
164
+ return False, f'[Python Interpreter Error]: {str(e)}', None
165
+
166
+
167
+
168
+ def test_single_endpoint(endpoint: str, test_cases: List[dict], timeout: int = 30) -> dict:
169
+ """Test a single endpoint with multiple test cases"""
170
+ executor = PythonInterpreter()
171
+ results = {
172
+ 'endpoint': endpoint,
173
+ 'status': 'unknown',
174
+ 'passed_tests': 0,
175
+ 'total_tests': len(test_cases),
176
+ 'test_results': [],
177
+ 'avg_execution_time': 0,
178
+ 'errors': []
179
+ }
180
+
181
+ execution_times = []
182
+
183
+ print(f"\n🧪 Testing endpoint: {endpoint}")
184
+
185
+ for i, test_case in enumerate(test_cases):
186
+ test_name = test_case['name']
187
+ test_code = test_case['code']
188
+ expected_output = test_case.get('expected_output')
189
+
190
+ print(f" ├─ Running test {i+1}/{len(test_cases)}: {test_name}")
191
+
192
+ try:
193
+ success, result, exec_time = executor.call_specific_endpoint(
194
+ {"code": test_code}, endpoint, timeout
195
+ )
196
+
197
+ test_result = {
198
+ 'name': test_name,
199
+ 'success': success,
200
+ 'result': result,
201
+ 'execution_time': exec_time,
202
+ 'expected_match': False
203
+ }
204
+
205
+ if success and exec_time is not None:
206
+ execution_times.append(exec_time)
207
+
208
+ # Check if output matches expected (if provided)
209
+ if expected_output:
210
+ # Clean up output for comparison
211
+ actual_output = result.replace('stdout:\n', '').strip()
212
+ if expected_output.strip() in actual_output:
213
+ test_result['expected_match'] = True
214
+ results['passed_tests'] += 1
215
+ print(f" │ ✅ PASSED ({exec_time:.2f}s)")
216
+ else:
217
+ print(f" │ ❌ OUTPUT MISMATCH ({exec_time:.2f}s)")
218
+ print(f" │ Expected: {expected_output.strip()}")
219
+ print(f" │ Got: {actual_output}")
220
+ else:
221
+ # No expected output specified, just check if it ran successfully
222
+ test_result['expected_match'] = True
223
+ results['passed_tests'] += 1
224
+ print(f" │ ✅ PASSED ({exec_time:.2f}s)")
225
+ else:
226
+ print(f" │ ❌ FAILED: {result}")
227
+ results['errors'].append(f"{test_name}: {result}")
228
+
229
+ results['test_results'].append(test_result)
230
+
231
+ except Exception as e:
232
+ print(f" │ 💥 EXCEPTION: {str(e)}")
233
+ results['errors'].append(f"{test_name}: Exception - {str(e)}")
234
+ results['test_results'].append({
235
+ 'name': test_name,
236
+ 'success': False,
237
+ 'result': f"Exception: {str(e)}",
238
+ 'execution_time': None,
239
+ 'expected_match': False
240
+ })
241
+
242
+ # Calculate statistics
243
+ if execution_times:
244
+ results['avg_execution_time'] = sum(execution_times) / len(execution_times)
245
+
246
+ # Determine overall status
247
+ if results['passed_tests'] == results['total_tests']:
248
+ results['status'] = 'healthy'
249
+ print(f" └─ ✅ ALL TESTS PASSED ({results['passed_tests']}/{results['total_tests']})")
250
+ elif results['passed_tests'] > 0:
251
+ results['status'] = 'partial'
252
+ print(f" └─ ⚠️ PARTIAL SUCCESS ({results['passed_tests']}/{results['total_tests']})")
253
+ else:
254
+ results['status'] = 'failed'
255
+ print(f" └─ ❌ ALL TESTS FAILED ({results['passed_tests']}/{results['total_tests']})")
256
+
257
+ return results
258
+
259
+
260
+ def test_all_endpoints_comprehensive():
261
+ """Comprehensive test suite for all sandbox fusion endpoints"""
262
+
263
+ # Define comprehensive test cases
264
+ test_cases = [
265
+ {
266
+ 'name': 'Basic Math',
267
+ 'code': 'print(2 + 2)',
268
+ 'expected_output': '4'
269
+ },
270
+ {
271
+ 'name': 'String Operations',
272
+ 'code': 'print("Hello, " + "World!")',
273
+ 'expected_output': 'Hello, World!'
274
+ },
275
+ {
276
+ 'name': 'List Operations',
277
+ 'code': '''
278
+ numbers = [1, 2, 3, 4, 5]
279
+ print(f"Sum: {sum(numbers)}")
280
+ print(f"Length: {len(numbers)}")
281
+ ''',
282
+ 'expected_output': 'Sum: 15'
283
+ },
284
+ {
285
+ 'name': 'Dictionary Operations',
286
+ 'code': '''
287
+ data = {"name": "Test", "value": 42}
288
+ print(f"Name: {data['name']}, Value: {data['value']}")
289
+ ''',
290
+ 'expected_output': 'Name: Test, Value: 42'
291
+ },
292
+ {
293
+ 'name': 'Loop and Conditionals',
294
+ 'code': '''
295
+ result = []
296
+ for i in range(5):
297
+ if i % 2 == 0:
298
+ result.append(i)
299
+ print(f"Even numbers: {result}")
300
+ ''',
301
+ 'expected_output': 'Even numbers: [0, 2, 4]'
302
+ },
303
+ {
304
+ 'name': 'Function Definition',
305
+ 'code': '''
306
+ def factorial(n):
307
+ if n <= 1:
308
+ return 1
309
+ return n * factorial(n-1)
310
+
311
+ print(f"5! = {factorial(5)}")
312
+ ''',
313
+ 'expected_output': '5! = 120'
314
+ },
315
+ {
316
+ 'name': 'Exception Handling',
317
+ 'code': '''
318
+ try:
319
+ result = 10 / 2
320
+ print(f"Division result: {result}")
321
+ except ZeroDivisionError:
322
+ print("Cannot divide by zero")
323
+ ''',
324
+ 'expected_output': 'Division result: 5.0'
325
+ },
326
+ {
327
+ 'name': 'Import Standard Library',
328
+ 'code': '''
329
+ import math
330
+ print(f"Pi: {math.pi:.2f}")
331
+ print(f"Square root of 16: {math.sqrt(16)}")
332
+ ''',
333
+ 'expected_output': 'Pi: 3.14'
334
+ },
335
+ {
336
+ 'name': 'Complex Calculation',
337
+ 'code': '''
338
+ import math
339
+
340
+ # Calculate area of a circle
341
+ radius = 5
342
+ area = math.pi * radius ** 2
343
+ print(f"Area of circle with radius {radius}: {area:.2f}")
344
+
345
+ # Fibonacci sequence
346
+ def fibonacci(n):
347
+ if n <= 1:
348
+ return n
349
+ return fibonacci(n-1) + fibonacci(n-2)
350
+
351
+ fib_10 = fibonacci(10)
352
+ print(f"10th Fibonacci number: {fib_10}")
353
+ ''',
354
+ 'expected_output': 'Area of circle with radius 5: 78.54'
355
+ },
356
+ {
357
+ 'name': 'Error Handling Test',
358
+ 'code': '''
359
+ try:
360
+ undefined_variable
361
+ except NameError as e:
362
+ print("Caught NameError as expected")
363
+ print("Test passed")
364
+ ''',
365
+ 'expected_output': 'Test passed'
366
+ }
367
+ ]
368
+
369
+ print("🚀 Starting comprehensive endpoint testing...")
370
+ print(f"📊 Testing {len(SANDBOX_FUSION_ENDPOINTS)} endpoints with {len(test_cases)} test cases each")
371
+ print("=" * 80)
372
+
373
+ # Test all endpoints concurrently
374
+ all_results = []
375
+
376
+ # Use ThreadPoolExecutor for concurrent testing
377
+ with ThreadPoolExecutor(max_workers=min(len(SANDBOX_FUSION_ENDPOINTS), 8)) as executor:
378
+ future_to_endpoint = {
379
+ executor.submit(test_single_endpoint, endpoint, test_cases): endpoint
380
+ for endpoint in SANDBOX_FUSION_ENDPOINTS
381
+ }
382
+
383
+ for future in as_completed(future_to_endpoint):
384
+ endpoint = future_to_endpoint[future]
385
+ try:
386
+ result = future.result()
387
+ all_results.append(result)
388
+ except Exception as exc:
389
+ print(f'❌ Endpoint {endpoint} generated an exception: {exc}')
390
+ all_results.append({
391
+ 'endpoint': endpoint,
392
+ 'status': 'failed',
393
+ 'passed_tests': 0,
394
+ 'total_tests': len(test_cases),
395
+ 'test_results': [],
396
+ 'avg_execution_time': 0,
397
+ 'errors': [f'Exception during testing: {exc}']
398
+ })
399
+
400
+ # Print comprehensive summary
401
+ print("\n" + "=" * 80)
402
+ print("📈 COMPREHENSIVE TEST RESULTS SUMMARY")
403
+ print("=" * 80)
404
+
405
+ healthy_endpoints = [r for r in all_results if r['status'] == 'healthy']
406
+ partial_endpoints = [r for r in all_results if r['status'] == 'partial']
407
+ failed_endpoints = [r for r in all_results if r['status'] == 'failed']
408
+
409
+ print(f"✅ Healthy endpoints: {len(healthy_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
410
+ print(f"⚠️ Partial endpoints: {len(partial_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
411
+ print(f"❌ Failed endpoints: {len(failed_endpoints)}/{len(SANDBOX_FUSION_ENDPOINTS)}")
412
+
413
+ if healthy_endpoints:
414
+ print(f"\n🎉 HEALTHY ENDPOINTS ({len(healthy_endpoints)}):")
415
+ for result in healthy_endpoints:
416
+ print(f" ✅ {result['endpoint']} (avg: {result['avg_execution_time']:.2f}s)")
417
+
418
+ if partial_endpoints:
419
+ print(f"\n⚠️ PARTIAL ENDPOINTS ({len(partial_endpoints)}):")
420
+ for result in partial_endpoints:
421
+ print(f" ⚠️ {result['endpoint']} ({result['passed_tests']}/{result['total_tests']} tests passed)")
422
+
423
+ if failed_endpoints:
424
+ print(f"\n❌ FAILED ENDPOINTS ({len(failed_endpoints)}):")
425
+ for result in failed_endpoints:
426
+ print(f" ❌ {result['endpoint']} (0/{result['total_tests']} tests passed)")
427
+ if result['errors']:
428
+ for error in result['errors'][:3]: # Show first 3 errors
429
+ print(f" └─ {error}")
430
+
431
+ # Performance statistics
432
+ healthy_times = [r['avg_execution_time'] for r in healthy_endpoints if r['avg_execution_time'] > 0]
433
+ if healthy_times:
434
+ print(f"\n⚡ PERFORMANCE STATISTICS:")
435
+ print(f" Fastest endpoint: {min(healthy_times):.2f}s")
436
+ print(f" Slowest endpoint: {max(healthy_times):.2f}s")
437
+ print(f" Average response time: {sum(healthy_times)/len(healthy_times):.2f}s")
438
+
439
+ # Overall health score
440
+ total_tests = len(SANDBOX_FUSION_ENDPOINTS) * len(test_cases)
441
+ passed_tests = sum(r['passed_tests'] for r in all_results)
442
+ health_score = (passed_tests / total_tests) * 100
443
+
444
+ print(f"\n🏥 OVERALL SYSTEM HEALTH: {health_score:.1f}%")
445
+ print(f" ({passed_tests}/{total_tests} total tests passed)")
446
+
447
+ if health_score >= 90:
448
+ print(" 🟢 Excellent - System is performing well")
449
+ elif health_score >= 70:
450
+ print(" 🟡 Good - Some endpoints may need attention")
451
+ elif health_score >= 50:
452
+ print(" 🟠 Fair - Multiple endpoints having issues")
453
+ else:
454
+ print(" 🔴 Poor - System requires immediate attention")
455
+
456
+ return all_results
457
+
458
+
459
+ def _test():
460
+ """Original test function"""
461
+ # test_code = """import sympy as sp\nX=sp.symbols('X')\npoly_factor = (X**2 - (sp.sqrt(34)+sp.sqrt(14))*X + 2*sp.sqrt(119))*(X**2 - 2*(sp.sqrt(11)+sp.sqrt(6))*X + 4*sp.sqrt(66))\npoly_original = X**4 - sp.sqrt(34)*X**3 - sp.sqrt(14)*X**3 - 2*sp.sqrt(11)*X**3 - 2*sp.sqrt(6)*X**3 + 2*sp.sqrt(374)*X**2 + 2*sp.sqrt(154)*X**2 + 2*sp.sqrt(119)*X**2 + 4*sp.sqrt(66)*X**2 + 4*sp.sqrt(51)*X**2 + 4*sp.sqrt(21)*X**2 - 4*sp.sqrt(1309)*X - 4*sp.sqrt(714)*X - 8*sp.sqrt(561)*X - 8*sp.sqrt(231)*X + 8*sp.sqrt(7854)\nprint('expanded factor matches?', sp.simplify(poly_factor - poly_original) == 0)\nprint('Difference simplified:', sp.simplify(poly_factor - poly_original))\n"""
462
+ # params = {"code": test_code}
463
+ # params = {"code": "x = 1.25\nprint(x*x)"}
464
+ params = "print(1234)"
465
+ # '![fig-001](workspace/tools/CodeInterpreter/ac1b42e5-19fb-460e-b3a3-5f1029658efd.png)'
466
+ executor = PythonInterpreter()
467
+ out = executor.call(params)
468
+ from pprint import pprint
469
+ pprint(out)
470
+
471
+
472
+ if __name__ == '__main__':
473
+ # Run comprehensive endpoint testing
474
+ test_all_endpoints_comprehensive()
475
+
476
+ print("\n" + "=" * 80)
477
+ print("🔧 Running original test...")
478
+ print("=" * 80)
479
+
480
+ # Also run the original test
481
+ _test()
utils/__pycache__/date.cpython-310.pyc ADDED
Binary file (2.29 kB). View file
 
utils/date.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from datetime import datetime, timedelta, timezone
3
+
4
+ DEFAULT_DATE_FORMAT = "%Y-%m-%d"
5
+
6
+ wdays = {
7
+ "en": [
8
+ "Monday",
9
+ "Tuesday",
10
+ "Wednesday",
11
+ "Thursday",
12
+ "Friday",
13
+ "Saturday",
14
+ "Sunday",
15
+ ],
16
+ "zh": [
17
+ "星期一",
18
+ "星期二",
19
+ "星期三",
20
+ "星期四",
21
+ "星期五",
22
+ "星期六",
23
+ "星期日",
24
+ ],
25
+ }
26
+
27
+
28
+ def get_date_now() -> list[int]:
29
+ beijing_time = datetime.now(timezone.utc) + timedelta(hours=8)
30
+ date = beijing_time.timetuple()
31
+ date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
32
+ return date
33
+
34
+
35
+ def get_date_rand(before_days: int = 365, after_days: int = 365) -> list[int]:
36
+ random_timedelta = timedelta(days=random.randint(-before_days, after_days))
37
+ rand_time = datetime.now(timezone.utc) + random_timedelta
38
+ date = rand_time.timetuple()
39
+ date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
40
+ return date
41
+
42
+
43
+ def str2date(date_str, date_format: str = DEFAULT_DATE_FORMAT) -> list[int]:
44
+ time = datetime.strptime(date_str, date_format)
45
+ date = time.timetuple()
46
+ date = [int(d) for d in [date.tm_year, date.tm_mon, date.tm_mday, date.tm_wday]]
47
+ return date
48
+
49
+
50
+ def date2str(
51
+ date,
52
+ sep="-",
53
+ with_week: bool = False,
54
+ language: str = "en",
55
+ date_format: str = DEFAULT_DATE_FORMAT,
56
+ ) -> str:
57
+ # YYYY-MM-DD
58
+ if isinstance(date, str):
59
+ date = str2date(date, date_format=date_format)
60
+
61
+ date_str = sep.join([f"{date[0]:04d}", f"{date[1]:02d}", f"{date[2]:02d}"])
62
+ if with_week:
63
+ wday = wdays[language][date[3]]
64
+ date_str = f"{date_str} {wday}"
65
+ return date_str
66
+
67
+
68
+ if __name__ == "__main__":
69
+ rand_date = get_date_now()
70
+ print(date2str(rand_date, with_week=True))
71
+ print(date2str(rand_date, with_week=True, language="zh"))
utils/logs.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ import os
3
+ import sys
4
+ import logging
5
+
6
+
7
+ import platform
8
+
9
+
10
+ def check_macos():
11
+ os_name = platform.system()
12
+ if os_name == 'Darwin':
13
+ return True
14
+ return False
15
+
16
+ def setup_logger(level = None, logfile_name: str = 'search-agent'):
17
+ if level is None:
18
+ if os.getenv('SEARCH_AGENT_DEBUG', '0').strip().lower() in ('1', 'true'):
19
+ level = logging.DEBUG
20
+ else:
21
+ level = logging.INFO
22
+
23
+ formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s')
24
+
25
+ handler = logging.StreamHandler()
26
+ # Do not run handler.setLevel(level) so that users can change the level via logger.setLevel later
27
+ handler.setFormatter(formatter)
28
+
29
+ # agent path
30
+ if check_macos():
31
+ agent_path = 'logs'
32
+ else:
33
+ agent_path = os.getenv('AGENT_PATH', 'logs')
34
+ if len(agent_path) > 0:
35
+ agent_path = agent_path + '/logs'
36
+ # check path
37
+ if not os.path.exists(agent_path):
38
+ os.makedirs(agent_path)
39
+ file_handler = logging.FileHandler(f'{agent_path}/{logfile_name}.log')
40
+ file_handler.setFormatter(formatter)
41
+
42
+ _logger = logging.getLogger(logfile_name)
43
+ _logger.setLevel(level)
44
+ _logger.addHandler(handler)
45
+ _logger.addHandler(file_handler)
46
+ return _logger
47
+
48
+
49
+ logger = setup_logger(logfile_name='search-agent')
50
+ access_logger = setup_logger(logfile_name='search-agent-access')
51
+ error_logger = setup_logger(logfile_name='search-agent-error')
visit.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import signal
4
+ import threading
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from typing import List, Union
7
+ import requests
8
+ from qwen_agent.tools.base import BaseTool, register_tool
9
+ from prompt import EXTRACTOR_PROMPT
10
+ from openai import OpenAI
11
+ import random
12
+ from urllib.parse import urlparse, unquote
13
+ import time
14
+ import tiktoken
15
+
16
+ VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200))
17
+ WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000))
18
+
19
+ JINA_API_KEYS = os.getenv("JINA_KEY", "")
20
+
21
+
22
+ @staticmethod
23
+ def truncate_to_tokens(text: str, max_tokens: int = 95000) -> str:
24
+ encoding = tiktoken.get_encoding("cl100k_base")
25
+
26
+ tokens = encoding.encode(text)
27
+ if len(tokens) <= max_tokens:
28
+ return text
29
+
30
+ truncated_tokens = tokens[:max_tokens]
31
+ return encoding.decode(truncated_tokens)
32
+
33
+ OSS_JSON_FORMAT = """# Response Formats
34
+ ## visit_content
35
+ {"properties":{"rational":{"type":"string","description":"Locate the **specific sections/data** directly related to the user's goal within the webpage content"},"evidence":{"type":"string","description":"Identify and extract the **most relevant information** from the content, never miss any important information, output the **full original context** of the content as far as possible, it can be more than three paragraphs.","summary":{"type":"string","description":"Organize into a concise paragraph with logical flow, prioritizing clarity and judge the contribution of the information to the goal."}}}}"""
36
+
37
+
38
+ @register_tool('visit', allow_overwrite=True)
39
+ class Visit(BaseTool):
40
+ # The `description` tells the agent the functionality of this tool.
41
+ name = 'visit'
42
+ description = 'Visit webpage(s) and return the summary of the content.'
43
+ # The `parameters` tell the agent what input parameters the tool has.
44
+ parameters = {
45
+ "type": "object",
46
+ "properties": {
47
+ "url": {
48
+ "type": ["string", "array"],
49
+ "items": {
50
+ "type": "string"
51
+ },
52
+ "minItems": 1,
53
+ "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs."
54
+ },
55
+ "goal": {
56
+ "type": "string",
57
+ "description": "The goal of the visit for webpage(s)."
58
+ }
59
+ },
60
+ "required": ["url", "goal"]
61
+ }
62
+ # The `call` method is the main function of the tool.
63
+ def call(self, params: Union[str, dict], **kwargs) -> str:
64
+ try:
65
+ params = json.loads(params)
66
+ url = params["url"]
67
+ goal = params["goal"]
68
+ except:
69
+ return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
70
+
71
+ start_time = time.time()
72
+
73
+ # Create log folder if it doesn't exist
74
+ log_folder = "log"
75
+ os.makedirs(log_folder, exist_ok=True)
76
+
77
+ if isinstance(url, str):
78
+ response = self.readpage_jina(url, goal)
79
+ else:
80
+ response = []
81
+ assert isinstance(url, List)
82
+ start_time = time.time()
83
+ for u in url:
84
+ if time.time() - start_time > 900:
85
+ cur_response = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
86
+ cur_response += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
87
+ cur_response += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
88
+ else:
89
+ try:
90
+ cur_response = self.readpage_jina(u, goal)
91
+ except Exception as e:
92
+ cur_response = f"Error fetching {u}: {str(e)}"
93
+ response.append(cur_response)
94
+ response = "\n=======\n".join(response)
95
+
96
+ print(f'Summary Length {len(response)}; Summary Content {response}')
97
+ return response.strip()
98
+
99
+ def call_server(self, msgs, max_retries=2):
100
+ api_key = os.environ.get("API_KEY")
101
+ url_llm = os.environ.get("API_BASE")
102
+ model_name = "qwen/qwen3-30b-a3b-instruct-2507"
103
+ client = OpenAI(
104
+ api_key=api_key,
105
+ base_url=url_llm,
106
+ )
107
+ for attempt in range(max_retries):
108
+ try:
109
+ chat_response = client.chat.completions.create(
110
+ model=model_name,
111
+ messages=msgs,
112
+ temperature=0.7
113
+ )
114
+ content = chat_response.choices[0].message.content
115
+ if content:
116
+ try:
117
+ json.loads(content)
118
+ except:
119
+ # extract json from string
120
+ left = content.find('{')
121
+ right = content.rfind('}')
122
+ if left != -1 and right != -1 and left <= right:
123
+ content = content[left:right+1]
124
+ return content
125
+ except Exception as e:
126
+ print(e)
127
+ if attempt == (max_retries - 1):
128
+ return ""
129
+ continue
130
+
131
+
132
+ def jina_readpage(self, url: str) -> str:
133
+ """
134
+ Read webpage content using Jina service.
135
+
136
+ Args:
137
+ url: The URL to read
138
+ goal: The goal/purpose of reading the page
139
+
140
+ Returns:
141
+ str: The webpage content or error message
142
+ """
143
+ max_retries = 3
144
+ timeout = 50
145
+
146
+ for attempt in range(max_retries):
147
+ headers = {
148
+ "Authorization": f"Bearer {JINA_API_KEYS}",
149
+ }
150
+ print(headers)
151
+ try:
152
+ response = requests.get(
153
+ f"https://r.jina.ai/{url}",
154
+ headers=headers,
155
+ timeout=timeout
156
+ )
157
+ if response.status_code == 200:
158
+ webpage_content = response.text
159
+ return webpage_content
160
+ else:
161
+ print(response.text)
162
+ raise ValueError("jina readpage error")
163
+ except Exception as e:
164
+ time.sleep(0.5)
165
+ if attempt == max_retries - 1:
166
+ return "[visit] Failed to read page."
167
+
168
+ return "[visit] Failed to read page."
169
+
170
+ def html_readpage_jina(self, url: str) -> str:
171
+ max_attempts = 8
172
+ for attempt in range(max_attempts):
173
+ content = self.jina_readpage(url)
174
+ service = "jina"
175
+ print(service)
176
+ if content and not content.startswith("[visit] Failed to read page.") and content != "[visit] Empty content." and not content.startswith("[document_parser]"):
177
+ return content
178
+ return "[visit] Failed to read page."
179
+
180
+ def readpage_jina(self, url: str, goal: str) -> str:
181
+ """
182
+ Attempt to read webpage content by alternating between jina and aidata services.
183
+
184
+ Args:
185
+ url: The URL to read
186
+ goal: The goal/purpose of reading the page
187
+
188
+ Returns:
189
+ str: The webpage content or error message
190
+ """
191
+
192
+ summary_page_func = self.call_server
193
+ max_retries = int(os.getenv('VISIT_SERVER_MAX_RETRIES', 1))
194
+
195
+ content = self.html_readpage_jina(url)
196
+
197
+ if content and not content.startswith("[visit] Failed to read page.") and content != "[visit] Empty content." and not content.startswith("[document_parser]"):
198
+ content = truncate_to_tokens(content, max_tokens=95000)
199
+ messages = [{"role":"user","content": EXTRACTOR_PROMPT.format(webpage_content=content, goal=goal)}]
200
+ parse_retry_times = 0
201
+ raw = summary_page_func(messages, max_retries=max_retries)
202
+ summary_retries = 3
203
+ while len(raw) < 10 and summary_retries >= 0:
204
+ truncate_length = int(0.7 * len(content)) if summary_retries > 0 else 25000
205
+ status_msg = (
206
+ f"[visit] Summary url[{url}] "
207
+ f"attempt {3 - summary_retries + 1}/3, "
208
+ f"content length: {len(content)}, "
209
+ f"truncating to {truncate_length} chars"
210
+ ) if summary_retries > 0 else (
211
+ f"[visit] Summary url[{url}] failed after 3 attempts, "
212
+ f"final truncation to 25000 chars"
213
+ )
214
+ print(status_msg)
215
+ content = content[:truncate_length]
216
+ extraction_prompt = EXTRACTOR_PROMPT.format(
217
+ webpage_content=content,
218
+ goal=goal
219
+ )
220
+ messages = [{"role": "user", "content": extraction_prompt}]
221
+ raw = summary_page_func(messages, max_retries=max_retries)
222
+ summary_retries -= 1
223
+
224
+ parse_retry_times = 2
225
+ if isinstance(raw, str):
226
+ raw = raw.replace("```json", "").replace("```", "").strip()
227
+ while parse_retry_times < 3:
228
+ try:
229
+ raw = json.loads(raw)
230
+ break
231
+ except:
232
+ raw = summary_page_func(messages, max_retries=max_retries)
233
+ parse_retry_times += 1
234
+
235
+ if parse_retry_times >= 3:
236
+ useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
237
+ useful_information += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
238
+ useful_information += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
239
+ else:
240
+ useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
241
+ useful_information += "Evidence in page: \n" + str(raw["evidence"]) + "\n\n"
242
+ useful_information += "Summary: \n" + str(raw["summary"]) + "\n\n"
243
+
244
+ if len(useful_information) < 10 and summary_retries < 0:
245
+ print("[visit] Could not generate valid summary after maximum retries")
246
+ useful_information = "[visit] Failed to read page"
247
+
248
+ return useful_information
249
+
250
+ # If no valid content was obtained after all retries
251
+ else:
252
+ useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
253
+ useful_information += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
254
+ useful_information += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
255
+ return useful_information
256
+
257
+
258
+ if __name__ == "__main__":
259
+ a = Visit()
260
+ print(a.call('{"url": ["https://2025.aclweb.org/"], "goal": "Find the important dates page and locate the Industry Track paper submission deadline"}'))