DeepSeek-R1-Chatbot

Running

App Files Files Community

ruslanmv commited on Jan 28

Commit

54fe9a3

verified ·

1 Parent(s): 1472595

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -4,18 +4,13 @@ from functools import lru_cache
 # Cache model loading to optimize performance
 @lru_cache(maxsize=3)
 def load_hf_model(model_name):
-    # Use the Gradio-built huggingface loader instead of transformers_gradio
-    return gr.load(
-        name=f"deepseek-ai/{model_name}",
-        src="huggingface",  # Changed from transformers_gradio.registry
-        api_name="/chat"
-    )
 # Load all models at startup
 MODELS = {
-    "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
-    "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
-    "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
 }
 # --- Chatbot function ---
@@ -27,22 +22,21 @@ def chatbot(input_text, history, model_choice, system_message, max_new_tokens, t
     # Create payload for the model
     payload = {
-        "messages": [{"role": "user", "content": input_text}],
-        "system": system_message,
-        "max_tokens": max_new_tokens,
-        "temperature": temperature,
-        "top_p": top_p
     }
     # Run inference using the selected model
     try:
-        response = model_component(payload)  # The response is likely a dictionary
-        if isinstance(response, dict) and "choices" in response:
-            # Assuming the response structure is similar to OpenAI's API
-            assistant_response = response["choices"][0]["message"]["content"]
-        elif isinstance(response, dict) and "generated_text" in response:
-            # If the response is in a different format, adjust accordingly
-            assistant_response = response["generated_text"]
         else:
             assistant_response = "Unexpected model response format."
     except Exception as e:
@@ -77,7 +71,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
             model_choice = gr.Radio(
                 choices=list(MODELS.keys()),
                 label="Choose a Model",
-                value="DeepSeek-R1"
             )
             with gr.Accordion("Optional Parameters", open=False):
                 system_message = gr.Textbox(

 # Cache model loading to optimize performance
 @lru_cache(maxsize=3)
 def load_hf_model(model_name):
+    return gr.load(f"models/{model_name}", src="huggingface")
 # Load all models at startup
 MODELS = {
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": load_hf_model("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"),
+    "deepseek-ai/DeepSeek-R1": load_hf_model("deepseek-ai/DeepSeek-R1"),
+    "deepseek-ai/DeepSeek-R1-Zero": load_hf_model("deepseek-ai/DeepSeek-R1-Zero")
 }
 # --- Chatbot function ---
     # Create payload for the model
     payload = {
+        "inputs": input_text,  # Directly pass the input text
+        "parameters": {
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "return_full_text": False  # Only return the generated text
+        }
     }
     # Run inference using the selected model
     try:
+        response = model_component(**payload)  # Pass payload as keyword arguments
+        if isinstance(response, list) and len(response) > 0:
+            # Extract the generated text from the response
+            assistant_response = response[0].get("generated_text", "No response generated.")
         else:
             assistant_response = "Unexpected model response format."
     except Exception as e:
             model_choice = gr.Radio(
                 choices=list(MODELS.keys()),
                 label="Choose a Model",
+                value="deepseek-ai/DeepSeek-R1"
             )
             with gr.Accordion("Optional Parameters", open=False):
                 system_message = gr.Textbox(