Spaces:

astegaras
/

iris

Sleeping

astegaras commited on 18 days ago

Commit

e104971

verified ·

1 Parent(s): 5587610

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,43 +1,49 @@
 import gradio as gr
 from llama_cpp import Llama
-# ----------------------------------------------------
-# Load GGUF model
-# ----------------------------------------------------
-repo = "astegaras/merged_kaggle"
-# llama_cpp automatically downloads from HF Hub if you provide the repo
-llm = Llama.from_pretrained(
-	repo_id="astegaras/merged_kaggle",
-	filename="llama-3.2-3b-instruct.Q4_K_M.gguf",
 )
-# ----------------------------------------------------
-# Chat function
-# ----------------------------------------------------
-def respond(message, history):
     messages = []
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
-    output = llm.create_chat_completion(messages=messages)
-    reply = output["choices"][0]["message"]["content"]
     return reply
-# ----------------------------------------------------
-# Launch Gradio app
-# ----------------------------------------------------
-gr.ChatInterface(
-    respond,
-    title="My Llama 3.2 GGUF Chatbot",
-    description="Running GGUF with llama.cpp inside a HuggingFace Space",
-).launch()

 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# Download your GGUF model from HF Hub
+model_path = hf_hub_download(
+    repo_id="astegaras/merged_kaggle",
+    filename="llama-3.2-3b-instruct.Q4_K_M.gguf"
 )
+# Load the GGUF model with llama.cpp
+llm = Llama(
+    model_path=model_path,
+    n_ctx=4096,       # Context window for inference
+    n_threads=8,      # Adjust to HF hardware
+    n_batch=512,
+    verbose=False
+)
+def chat_fn(message, history):
+    # Reformat history for llama.cpp chat template
     messages = []
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
+    output = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=512,
+        temperature=0.7,
+        top_p=0.9
+    )
+    reply = output["choices"][0]["message"]["content"]
     return reply
+# Gradio UI
+chatbot = gr.ChatInterface(
+    fn=chat_fn,
+    title="Merged Kaggle Model (GGUF)",
+    description="Running llama.cpp inference on GGUF model",
+)
+chatbot.launch()