Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Running

bhardwaj08sarthak commited on Sep 15

Commit

6837d1e

verified ·

1 Parent(s): 6adb80e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -156,7 +156,13 @@ def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temper
     agent = CodeAgent(model=model, tools=[classify_and_score])
     agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}  # attach for reference
     return agent
 # ------------------------ Agent task template -----------------------------
 TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
@@ -243,7 +249,7 @@ with gr.Blocks() as demo:
     with gr.Accordion("API Settings", open=False):
         hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
-        model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
         provider = gr.Textbox(value="novita", label="Provider")
         timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
@@ -285,6 +291,11 @@ with gr.Blocks() as demo:
         outputs=[final_json, transcript]
     )
-if __name__ == "__main__":
     demo.launch()

     agent = CodeAgent(model=model, tools=[classify_and_score])
     agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}  # attach for reference
     return agent
+@spaces.GPU(duration=20)
+def load_model():
+    transformers_model = TransformersModel(
+        model_id='swiss-ai/Apertus-70B-Instruct-2509',
+        device_map="auto"
+    ).to("cuda")
+    return transformers_model
 # ------------------------ Agent task template -----------------------------
 TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
     with gr.Accordion("API Settings", open=False):
         hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
+        model_id = gr.Textbox(value=transformers_model, label="Model ID")
         provider = gr.Textbox(value="novita", label="Provider")
         timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
         outputs=[final_json, transcript]
     )
+if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
+    try:
+        load_model()  # triggers GPU allocation during startup
+    except Exception as e:
+        # don't crash the app if warmup fails; logs will show details
+        print("Warmup failed:", e)
     demo.launch()