apply history flatten before it goint to prompt
Browse files
app.py
CHANGED
|
@@ -109,6 +109,30 @@ def retrieve_context(query, max_results=6, max_chars_per_result=600):
|
|
| 109 |
except Exception:
|
| 110 |
return ""
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# ------------------------------
|
| 113 |
# Chat Response Generation with ZeroGPU using Pipeline
|
| 114 |
# ------------------------------
|
|
@@ -120,7 +144,8 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
| 120 |
|
| 121 |
- Appends the user's message to the conversation history.
|
| 122 |
- Optionally retrieves web search context and inserts it as an additional system message.
|
| 123 |
-
-
|
|
|
|
| 124 |
- Returns the updated conversation history and a debug message.
|
| 125 |
"""
|
| 126 |
cancel_event.clear()
|
|
@@ -131,7 +156,6 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
| 131 |
|
| 132 |
# Retrieve web search context if enabled.
|
| 133 |
debug_message = ""
|
| 134 |
-
retrieved_context = ""
|
| 135 |
if enable_search:
|
| 136 |
debug_message = "Initiating web search..."
|
| 137 |
yield conversation, debug_message
|
|
@@ -155,23 +179,26 @@ def chat_response(user_message, chat_history, system_prompt, enable_search,
|
|
| 155 |
conversation.append({"role": "assistant", "content": ""})
|
| 156 |
|
| 157 |
try:
|
|
|
|
|
|
|
|
|
|
| 158 |
# Load the pipeline (cached) for the selected model.
|
| 159 |
pipe = load_pipeline(model_name)
|
| 160 |
|
| 161 |
-
#
|
| 162 |
response = pipe(
|
| 163 |
-
|
| 164 |
max_new_tokens=max_tokens,
|
| 165 |
temperature=temperature,
|
| 166 |
top_k=top_k,
|
| 167 |
top_p=top_p,
|
| 168 |
repetition_penalty=repeat_penalty,
|
| 169 |
)
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
|
| 176 |
# Update the conversation history.
|
| 177 |
conversation[-1]["content"] = assistant_text
|
|
|
|
| 109 |
except Exception:
|
| 110 |
return ""
|
| 111 |
|
| 112 |
+
# ----------------------------------------------------------------------------
|
| 113 |
+
# NEW HELPER FUNCTION: Format Conversation History into a Clean Prompt
|
| 114 |
+
# ----------------------------------------------------------------------------
|
| 115 |
+
def format_conversation(conversation, system_prompt):
|
| 116 |
+
"""
|
| 117 |
+
Converts a list of conversation messages (each a dict with 'role' and 'content')
|
| 118 |
+
and a system prompt into a single plain text string.
|
| 119 |
+
This prevents raw role labels from being passed to the model.
|
| 120 |
+
"""
|
| 121 |
+
# Start with the system prompt.
|
| 122 |
+
prompt = system_prompt.strip() + "\n"
|
| 123 |
+
# Loop through conversation and format user and assistant messages.
|
| 124 |
+
for msg in conversation:
|
| 125 |
+
if msg["role"] == "user":
|
| 126 |
+
prompt += "User: " + msg["content"].strip() + "\n"
|
| 127 |
+
elif msg["role"] == "assistant":
|
| 128 |
+
prompt += "Assistant: " + msg["content"].strip() + "\n"
|
| 129 |
+
elif msg["role"] == "system":
|
| 130 |
+
prompt += msg["content"].strip() + "\n"
|
| 131 |
+
# Append the assistant cue to indicate the start of the reply.
|
| 132 |
+
if not prompt.strip().endswith("Assistant:"):
|
| 133 |
+
prompt += "Assistant: "
|
| 134 |
+
return prompt
|
| 135 |
+
|
| 136 |
# ------------------------------
|
| 137 |
# Chat Response Generation with ZeroGPU using Pipeline
|
| 138 |
# ------------------------------
|
|
|
|
| 144 |
|
| 145 |
- Appends the user's message to the conversation history.
|
| 146 |
- Optionally retrieves web search context and inserts it as an additional system message.
|
| 147 |
+
- Converts the conversation into a formatted prompt to avoid leaking role labels.
|
| 148 |
+
- Uses the cached pipeline to generate a response.
|
| 149 |
- Returns the updated conversation history and a debug message.
|
| 150 |
"""
|
| 151 |
cancel_event.clear()
|
|
|
|
| 156 |
|
| 157 |
# Retrieve web search context if enabled.
|
| 158 |
debug_message = ""
|
|
|
|
| 159 |
if enable_search:
|
| 160 |
debug_message = "Initiating web search..."
|
| 161 |
yield conversation, debug_message
|
|
|
|
| 179 |
conversation.append({"role": "assistant", "content": ""})
|
| 180 |
|
| 181 |
try:
|
| 182 |
+
# Format the entire conversation into a single prompt (this fixes both issues).
|
| 183 |
+
prompt_text = format_conversation(conversation, system_prompt)
|
| 184 |
+
|
| 185 |
# Load the pipeline (cached) for the selected model.
|
| 186 |
pipe = load_pipeline(model_name)
|
| 187 |
|
| 188 |
+
# Generate a response using the formatted prompt.
|
| 189 |
response = pipe(
|
| 190 |
+
prompt_text,
|
| 191 |
max_new_tokens=max_tokens,
|
| 192 |
temperature=temperature,
|
| 193 |
top_k=top_k,
|
| 194 |
top_p=top_p,
|
| 195 |
repetition_penalty=repeat_penalty,
|
| 196 |
)
|
| 197 |
+
|
| 198 |
+
# Extract the generated text.
|
| 199 |
+
generated = response[0]["generated_text"]
|
| 200 |
+
# Remove the prompt portion so we only keep the new assistant reply.
|
| 201 |
+
assistant_text = generated[len(prompt_text):].strip()
|
| 202 |
|
| 203 |
# Update the conversation history.
|
| 204 |
conversation[-1]["content"] = assistant_text
|