Spaces:

minchyeom
/

llmOS-Agent

Runtime error

App Files Files Community

tech-envision commited on Jun 7

Commit

d0252db

1 Parent(s): 4477cf4

Add asynchronous message streaming

Browse files

Files changed (4) hide show

README.md +2 -1
bot/discord_bot.py +2 -3
run.py +2 -2
src/chat.py +125 -4

README.md CHANGED Viewed

@@ -31,7 +31,8 @@ Uploaded files are stored under the `uploads` directory and mounted inside the V
 ```python
 async with ChatSession() as chat:
     path_in_vm = chat.upload_document("path/to/file.pdf")
-    reply = await chat.chat(f"Summarize {path_in_vm}")
 ```
 When using the Discord bot, attach one or more text files to a message to

 ```python
 async with ChatSession() as chat:
     path_in_vm = chat.upload_document("path/to/file.pdf")
+    async for part in chat.chat_stream(f"Summarize {path_in_vm}"):
+        print(part)
 ```
 When using the Discord bot, attach one or more text files to a message to

bot/discord_bot.py CHANGED Viewed

@@ -71,12 +71,11 @@ async def on_message(message: discord.Message) -> None:
         if message.content.strip():
             try:
-                reply = await chat.chat(message.content)
             except Exception as exc:  # pragma: no cover - runtime errors
                 _LOG.error("Failed to process message: %s", exc)
                 await message.reply(f"Error: {exc}", mention_author=False)
-            else:
-                await message.reply(reply, mention_author=False)
 def main() -> None:

         if message.content.strip():
             try:
+                async for part in chat.chat_stream(message.content):
+                    await message.reply(part, mention_author=False)
             except Exception as exc:  # pragma: no cover - runtime errors
                 _LOG.error("Failed to process message: %s", exc)
                 await message.reply(f"Error: {exc}", mention_author=False)
 def main() -> None:

run.py CHANGED Viewed

@@ -10,8 +10,8 @@ async def _main() -> None:
         doc_path = chat.upload_document("test.txt")
         # print(f"Document uploaded to VM at: {doc_path}")
         # answer = await chat.chat(f"Remove all contents of test.txt and add the text 'Hello, World!' to it.")
-        answer = await chat.chat("What is in /data directory?")
-        print("\n>>>", answer)
 if __name__ == "__main__":

         doc_path = chat.upload_document("test.txt")
         # print(f"Document uploaded to VM at: {doc_path}")
         # answer = await chat.chat(f"Remove all contents of test.txt and add the text 'Hello, World!' to it.")
+        async for resp in chat.chat_stream("What is in /data directory?"):
+            print("\n>>>", resp)
 if __name__ == "__main__":

src/chat.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import List
 import json
 import asyncio
 import shutil
@@ -49,6 +49,9 @@ class ChatSession:
         )
         self._vm = None
         self._messages: List[Msg] = self._load_history()
     async def __aenter__(self) -> "ChatSession":
         self._vm = VMRegistry.acquire(self._user.username)
@@ -131,13 +134,13 @@ class ChatSession:
             options={"num_ctx": NUM_CTX},
         )
-    async def _handle_tool_calls(
         self,
         messages: List[Msg],
         response: ChatResponse,
         conversation: Conversation,
         depth: int = 0,
-    ) -> ChatResponse:
         while depth < MAX_TOOL_CALL_DEPTH and response.message.tool_calls:
             for call in response.message.tool_calls:
                 if call.function.name != "execute_terminal":
@@ -162,6 +165,10 @@ class ChatSession:
                 )
                 follow_task = asyncio.create_task(self.ask(messages, think=True))
                 done, _ = await asyncio.wait(
                     {exec_task, follow_task},
                     return_when=asyncio.FIRST_COMPLETED,
@@ -186,13 +193,19 @@ class ChatSession:
                         role="tool",
                         content=result,
                     )
                     nxt = await self.ask(messages, think=True)
                     self._store_assistant_message(conversation, nxt.message)
                     response = nxt
                 else:
                     followup = await follow_task
                     self._store_assistant_message(conversation, followup.message)
                     messages.append(followup.message.model_dump())
                     result = await exec_task
                     messages.append(
                         {
@@ -206,13 +219,32 @@ class ChatSession:
                         role="tool",
                         content=result,
                     )
                     nxt = await self.ask(messages, think=True)
                     self._store_assistant_message(conversation, nxt.message)
                     response = nxt
                 depth += 1
-        return response
     async def chat(self, prompt: str) -> str:
         DBMessage.create(conversation=self._conversation, role="user", content=prompt)
@@ -228,3 +260,92 @@ class ChatSession:
             self._messages, response, self._conversation
         )
         return final_resp.message.content

 from __future__ import annotations
+from typing import List, AsyncIterator
 import json
 import asyncio
 import shutil
         )
         self._vm = None
         self._messages: List[Msg] = self._load_history()
+        self._lock = asyncio.Lock()
+        self._state = "idle"
+        self._tool_task: asyncio.Task | None = None
     async def __aenter__(self) -> "ChatSession":
         self._vm = VMRegistry.acquire(self._user.username)
             options={"num_ctx": NUM_CTX},
         )
+    async def _handle_tool_calls_stream(
         self,
         messages: List[Msg],
         response: ChatResponse,
         conversation: Conversation,
         depth: int = 0,
+    ) -> AsyncIterator[ChatResponse]:
         while depth < MAX_TOOL_CALL_DEPTH and response.message.tool_calls:
             for call in response.message.tool_calls:
                 if call.function.name != "execute_terminal":
                 )
                 follow_task = asyncio.create_task(self.ask(messages, think=True))
+                async with self._lock:
+                    self._state = "awaiting_tool"
+                    self._tool_task = exec_task
                 done, _ = await asyncio.wait(
                     {exec_task, follow_task},
                     return_when=asyncio.FIRST_COMPLETED,
                         role="tool",
                         content=result,
                     )
+                    async with self._lock:
+                        self._state = "generating"
+                        self._tool_task = None
                     nxt = await self.ask(messages, think=True)
                     self._store_assistant_message(conversation, nxt.message)
+                    messages.append(nxt.message.model_dump())
                     response = nxt
+                    yield nxt
                 else:
                     followup = await follow_task
                     self._store_assistant_message(conversation, followup.message)
                     messages.append(followup.message.model_dump())
+                    yield followup
                     result = await exec_task
                     messages.append(
                         {
                         role="tool",
                         content=result,
                     )
+                    async with self._lock:
+                        self._state = "generating"
+                        self._tool_task = None
                     nxt = await self.ask(messages, think=True)
                     self._store_assistant_message(conversation, nxt.message)
+                    messages.append(nxt.message.model_dump())
                     response = nxt
+                    yield nxt
                 depth += 1
+        async with self._lock:
+            self._state = "idle"
+    async def _handle_tool_calls(
+        self,
+        messages: List[Msg],
+        response: ChatResponse,
+        conversation: Conversation,
+        depth: int = 0,
+    ) -> ChatResponse:
+        final = response
+        gen = self._handle_tool_calls_stream(messages, response, conversation, depth)
+        async for final in gen:
+            pass
+        return final
     async def chat(self, prompt: str) -> str:
         DBMessage.create(conversation=self._conversation, role="user", content=prompt)
             self._messages, response, self._conversation
         )
         return final_resp.message.content
+    async def chat_stream(self, prompt: str) -> AsyncIterator[str]:
+        async with self._lock:
+            if self._state == "generating":
+                _LOG.info("Ignoring message while generating")
+                return
+            if self._state == "awaiting_tool" and self._tool_task:
+                async for part in self._chat_during_tool(prompt):
+                    yield part
+                return
+            self._state = "generating"
+        DBMessage.create(conversation=self._conversation, role="user", content=prompt)
+        self._messages.append({"role": "user", "content": prompt})
+        response = await self.ask(self._messages)
+        self._messages.append(response.message.model_dump())
+        self._store_assistant_message(self._conversation, response.message)
+        _LOG.info("Thinking:\n%s", response.message.thinking or "<no thinking trace>")
+        async for resp in self._handle_tool_calls_stream(
+            self._messages, response, self._conversation
+        ):
+            yield resp.message.content
+    async def _chat_during_tool(self, prompt: str) -> AsyncIterator[str]:
+        DBMessage.create(conversation=self._conversation, role="user", content=prompt)
+        self._messages.append({"role": "user", "content": prompt})
+        user_task = asyncio.create_task(self.ask(self._messages))
+        exec_task = self._tool_task
+        done, _ = await asyncio.wait(
+            {exec_task, user_task},
+            return_when=asyncio.FIRST_COMPLETED,
+        )
+        if exec_task in done:
+            user_task.cancel()
+            try:
+                await user_task
+            except asyncio.CancelledError:
+                pass
+            result = await exec_task
+            self._tool_task = None
+            self._messages.append(
+                {"role": "tool", "name": "execute_terminal", "content": result}
+            )
+            DBMessage.create(
+                conversation=self._conversation, role="tool", content=result
+            )
+            async with self._lock:
+                self._state = "generating"
+            nxt = await self.ask(self._messages, think=True)
+            self._store_assistant_message(self._conversation, nxt.message)
+            self._messages.append(nxt.message.model_dump())
+            yield nxt.message.content
+            async for part in self._handle_tool_calls_stream(
+                self._messages, nxt, self._conversation
+            ):
+                yield part.message.content
+        else:
+            resp = await user_task
+            self._store_assistant_message(self._conversation, resp.message)
+            self._messages.append(resp.message.model_dump())
+            async with self._lock:
+                self._state = "awaiting_tool"
+            yield resp.message.content
+            result = await exec_task
+            self._tool_task = None
+            self._messages.append(
+                {"role": "tool", "name": "execute_terminal", "content": result}
+            )
+            DBMessage.create(
+                conversation=self._conversation, role="tool", content=result
+            )
+            async with self._lock:
+                self._state = "generating"
+            nxt = await self.ask(self._messages, think=True)
+            self._store_assistant_message(self._conversation, nxt.message)
+            self._messages.append(nxt.message.model_dump())
+            yield nxt.message.content
+            async for part in self._handle_tool_calls_stream(
+                self._messages, nxt, self._conversation
+            ):
+                yield part.message.content