Add dynamic duration calculation for ZeroGPU acceleration
Browse files
app.py
CHANGED
|
@@ -344,7 +344,13 @@ def format_conversation(history, system_prompt, tokenizer):
|
|
| 344 |
prompt += "Assistant: "
|
| 345 |
return prompt
|
| 346 |
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 349 |
enable_search, max_results, max_chars,
|
| 350 |
model_name, max_tokens, temperature,
|
|
|
|
| 344 |
prompt += "Assistant: "
|
| 345 |
return prompt
|
| 346 |
|
| 347 |
+
def get_duration(user_msg, chat_history, system_prompt, enable_search, max_results, max_chars, model_name, max_tokens, temperature, top_k, top_p, repeat_penalty, search_timeout):
|
| 348 |
+
base_duration = 60
|
| 349 |
+
token_duration = max_tokens * 0.1 # Estimate 0.1 seconds per token
|
| 350 |
+
search_duration = 30 if enable_search else 0
|
| 351 |
+
return base_duration + token_duration + search_duration
|
| 352 |
+
|
| 353 |
+
@spaces.GPU(duration=get_duration)
|
| 354 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 355 |
enable_search, max_results, max_chars,
|
| 356 |
model_name, max_tokens, temperature,
|