bhardwaj08sarthak commited on
Commit
6837d1e
·
verified ·
1 Parent(s): 6adb80e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -156,7 +156,13 @@ def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temper
156
  agent = CodeAgent(model=model, tools=[classify_and_score])
157
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
158
  return agent
159
-
 
 
 
 
 
 
160
 
161
  # ------------------------ Agent task template -----------------------------
162
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
@@ -243,7 +249,7 @@ with gr.Blocks() as demo:
243
 
244
  with gr.Accordion("API Settings", open=False):
245
  hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
246
- model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
247
  provider = gr.Textbox(value="novita", label="Provider")
248
  timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
249
 
@@ -285,6 +291,11 @@ with gr.Blocks() as demo:
285
  outputs=[final_json, transcript]
286
  )
287
 
288
- if __name__ == "__main__":
 
 
 
 
 
289
  demo.launch()
290
 
 
156
  agent = CodeAgent(model=model, tools=[classify_and_score])
157
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
158
  return agent
159
+ @spaces.GPU(duration=20)
160
+ def load_model():
161
+ transformers_model = TransformersModel(
162
+ model_id='swiss-ai/Apertus-70B-Instruct-2509',
163
+ device_map="auto"
164
+ ).to("cuda")
165
+ return transformers_model
166
 
167
  # ------------------------ Agent task template -----------------------------
168
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
 
249
 
250
  with gr.Accordion("API Settings", open=False):
251
  hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
252
+ model_id = gr.Textbox(value=transformers_model, label="Model ID")
253
  provider = gr.Textbox(value="novita", label="Provider")
254
  timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
255
 
 
291
  outputs=[final_json, transcript]
292
  )
293
 
294
+ if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
295
+ try:
296
+ load_model() # triggers GPU allocation during startup
297
+ except Exception as e:
298
+ # don't crash the app if warmup fails; logs will show details
299
+ print("Warmup failed:", e)
300
  demo.launch()
301