Spaces:
Runtime error
Runtime error
File size: 1,457 Bytes
d4e7a9c f617ed0 d4e7a9c f617ed0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Replace with your actual HF model repo and filename
model_repo = "AravindKumarRajendran/WhiZ-gemma-3n-4b"
model_filename = "gemma-3n-4b-it-finetune.Q8_0.gguf" # Exact GGUF file name in repo
# Download GGUF model from HF Hub (caches locally)
model_path = hf_hub_download(repo_id=model_repo, filename=model_filename)
# Load model with llama-cpp
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
n_batch=64,
verbose=False
)
# Chat handler
def chat_with_model(history, user_input):
history.append(("🧑💻: " + user_input, ""))
prompt = f"{user_input} தமிழில் பதிலளி:"
output = llm(
prompt,
max_tokens=128,
temperature=0.7,
stop=["</s>"],
)
reply = output["choices"][0]["text"].strip()
history[-1] = (history[-1][0], "🤖: " + reply)
return history, ""
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🗣️ தமிழில் உரையாடல் (Tamil Chatbot - GGUF on CPU)")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="உங்கள் செய்தி", placeholder="Type your message...")
clear = gr.Button("🧹 Clear Chat")
state = gr.State([])
msg.submit(chat_with_model, [state, msg], [chatbot, msg])
clear.click(lambda: ([], ""), None, [chatbot, msg, state])
demo.launch()
|