Spaces:
Running
Running
feat(chat): enable streaming for chat responses
Browse files- [feat] Refactor handle_chat_submit to be a generator (chat_handler.py:handle_chat_submit())
- [feat] Implement iteration and yielding of partial responses (chat_handler.py:129-131)
- [refactor] Remove final response accumulation logic (chat_handler.py:124-127)
- [docs] Update docstring and comment for streaming (chat_handler.py:105,114)
- [feat] Add stream=True to chat_submit.click() (ui_components.py:75)
- [feat] Add stream=True to chat_input.submit() (ui_components.py:81)
- [docs] Update comment for chat event connection (ui_components.py:69)
- chat_handler.py +8 -9
- ui_components.py +5 -3
chat_handler.py
CHANGED
|
@@ -102,15 +102,16 @@ def chat_respond(
|
|
| 102 |
|
| 103 |
def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p):
|
| 104 |
"""
|
| 105 |
-
Handle chat submission and manage conversation history.
|
| 106 |
"""
|
| 107 |
if not message.strip():
|
| 108 |
-
|
|
|
|
| 109 |
|
| 110 |
# Add user message to history
|
| 111 |
history = history + [{"role": "user", "content": message}]
|
| 112 |
|
| 113 |
-
# Generate response
|
| 114 |
response_generator = chat_respond(
|
| 115 |
message,
|
| 116 |
history[:-1], # Don't include the current message in history for the function
|
|
@@ -121,12 +122,10 @@ def handle_chat_submit(message, history, system_msg, model_name, max_tokens, tem
|
|
| 121 |
top_p
|
| 122 |
)
|
| 123 |
|
| 124 |
-
#
|
| 125 |
assistant_response = ""
|
| 126 |
for partial_response in response_generator:
|
| 127 |
assistant_response = partial_response
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
return history, ""
|
|
|
|
| 102 |
|
| 103 |
def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p):
|
| 104 |
"""
|
| 105 |
+
Handle chat submission and manage conversation history with streaming.
|
| 106 |
"""
|
| 107 |
if not message.strip():
|
| 108 |
+
yield history, ""
|
| 109 |
+
return
|
| 110 |
|
| 111 |
# Add user message to history
|
| 112 |
history = history + [{"role": "user", "content": message}]
|
| 113 |
|
| 114 |
+
# Generate response with streaming
|
| 115 |
response_generator = chat_respond(
|
| 116 |
message,
|
| 117 |
history[:-1], # Don't include the current message in history for the function
|
|
|
|
| 122 |
top_p
|
| 123 |
)
|
| 124 |
|
| 125 |
+
# Stream the assistant response token by token
|
| 126 |
assistant_response = ""
|
| 127 |
for partial_response in response_generator:
|
| 128 |
assistant_response = partial_response
|
| 129 |
+
# Update history with the current partial response and yield it
|
| 130 |
+
current_history = history + [{"role": "assistant", "content": assistant_response}]
|
| 131 |
+
yield current_history, ""
|
|
|
|
|
|
ui_components.py
CHANGED
|
@@ -66,19 +66,21 @@ def create_chat_tab(handle_chat_submit_fn):
|
|
| 66 |
# Configuration tips below the chat
|
| 67 |
create_chat_tips()
|
| 68 |
|
| 69 |
-
# Connect chat events
|
| 70 |
chat_submit.click(
|
| 71 |
fn=handle_chat_submit_fn,
|
| 72 |
inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
|
| 73 |
chat_max_tokens, chat_temperature, chat_top_p],
|
| 74 |
-
outputs=[chatbot_display, chat_input]
|
|
|
|
| 75 |
)
|
| 76 |
|
| 77 |
chat_input.submit(
|
| 78 |
fn=handle_chat_submit_fn,
|
| 79 |
inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
|
| 80 |
chat_max_tokens, chat_temperature, chat_top_p],
|
| 81 |
-
outputs=[chatbot_display, chat_input]
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
|
|
|
|
| 66 |
# Configuration tips below the chat
|
| 67 |
create_chat_tips()
|
| 68 |
|
| 69 |
+
# Connect chat events with streaming enabled
|
| 70 |
chat_submit.click(
|
| 71 |
fn=handle_chat_submit_fn,
|
| 72 |
inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
|
| 73 |
chat_max_tokens, chat_temperature, chat_top_p],
|
| 74 |
+
outputs=[chatbot_display, chat_input],
|
| 75 |
+
stream=True
|
| 76 |
)
|
| 77 |
|
| 78 |
chat_input.submit(
|
| 79 |
fn=handle_chat_submit_fn,
|
| 80 |
inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
|
| 81 |
chat_max_tokens, chat_temperature, chat_top_p],
|
| 82 |
+
outputs=[chatbot_display, chat_input],
|
| 83 |
+
stream=True
|
| 84 |
)
|
| 85 |
|
| 86 |
|