Luigi commited on
Commit
6073cc2
·
1 Parent(s): d3726c6

Add dynamic duration calculation for ZeroGPU acceleration

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -344,7 +344,13 @@ def format_conversation(history, system_prompt, tokenizer):
344
  prompt += "Assistant: "
345
  return prompt
346
 
347
- @spaces.GPU(duration=120)
 
 
 
 
 
 
348
  def chat_response(user_msg, chat_history, system_prompt,
349
  enable_search, max_results, max_chars,
350
  model_name, max_tokens, temperature,
 
344
  prompt += "Assistant: "
345
  return prompt
346
 
347
+ def get_duration(user_msg, chat_history, system_prompt, enable_search, max_results, max_chars, model_name, max_tokens, temperature, top_k, top_p, repeat_penalty, search_timeout):
348
+ base_duration = 60
349
+ token_duration = max_tokens * 0.1 # Estimate 0.1 seconds per token
350
+ search_duration = 30 if enable_search else 0
351
+ return base_duration + token_duration + search_duration
352
+
353
+ @spaces.GPU(duration=get_duration)
354
  def chat_response(user_msg, chat_history, system_prompt,
355
  enable_search, max_results, max_chars,
356
  model_name, max_tokens, temperature,