Spaces:

astegaras
/

iris

Sleeping

astegaras commited on 21 days ago

Commit

805934c

verified ·

1 Parent(s): 2fb80ad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,55 +2,42 @@ import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# Download GGUF from your HF repo
 model_path = hf_hub_download(
     repo_id="astegaras/Llama3.2_3B",
-    filename="model-Q2_K.gguf"
 )
-# Load model (llama.cpp)
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
-    chat_format=None,
     n_gpu_layers=0,
     add_bos_token=False,
     add_eos_token=False,
 )
-# Build inference prompt according to your dataset format
-def format_prompt(user_message):
-    return (
-        "<|begin_of_text|>"
-        "<|start_header_id|>system<|end_header_id|>\n"
-        "You are a helpful assistant.\n"
-        "<|start_header_id|>user<|end_header_id|>\n"
-        f"{user_message}\n"
-        "<|start_header_id|>assistant<|end_header_id|>\n"
-    )
 def respond(user_input):
-    prompt = format_prompt(user_input)
     output = llm(
-        prompt,
-        max_tokens=512,
         temperature=0.7,
         top_p=0.9,
-        stop=["<|user|>", "<|system|>"],  # avoid looping
     )
-    return output["choices"][0]["text"]
-# Gradio UI
 gr.Interface(
     fn=respond,
-    inputs=gr.components.Textbox(label="Ask"),
-    outputs=gr.components.Textbox(label="Answer"),
-    title="Llama3.2-3B Fine-tuned Assistant"
 ).launch()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# Download GGUF file from HuggingFace
 model_path = hf_hub_download(
     repo_id="astegaras/Llama3.2_3B",
+    filename="model-Q2_K.gguf",
 )
+# Load model
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
     n_gpu_layers=0,
+    chat_format=None,
     add_bos_token=False,
     add_eos_token=False,
 )
+# EXACT SAME BEHAVIOR AS mlx_lm.generate
 def respond(user_input):
     output = llm(
+        user_input,             # <-- only this!
+        max_tokens=256,
         temperature=0.7,
         top_p=0.9,
+        stop=None,
     )
+    return output["choices"][0]["text"].strip()
 gr.Interface(
     fn=respond,
+    inputs="text",
+    outputs="text",
+    title="Llama3.2-3B Fine-tuned Model"
 ).launch()