AravindKumarRajendran's picture
model path
f617ed0
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Replace with your actual HF model repo and filename
model_repo = "AravindKumarRajendran/WhiZ-gemma-3n-4b"
model_filename = "gemma-3n-4b-it-finetune.Q8_0.gguf" # Exact GGUF file name in repo
# Download GGUF model from HF Hub (caches locally)
model_path = hf_hub_download(repo_id=model_repo, filename=model_filename)
# Load model with llama-cpp
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
n_batch=64,
verbose=False
)
# Chat handler
def chat_with_model(history, user_input):
history.append(("🧑‍💻: " + user_input, ""))
prompt = f"{user_input} தமிழில் பதிலளி:"
output = llm(
prompt,
max_tokens=128,
temperature=0.7,
stop=["</s>"],
)
reply = output["choices"][0]["text"].strip()
history[-1] = (history[-1][0], "🤖: " + reply)
return history, ""
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🗣️ தமிழில் உரையாடல் (Tamil Chatbot - GGUF on CPU)")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="உங்கள் செய்தி", placeholder="Type your message...")
clear = gr.Button("🧹 Clear Chat")
state = gr.State([])
msg.submit(chat_with_model, [state, msg], [chatbot, msg])
clear.click(lambda: ([], ""), None, [chatbot, msg, state])
demo.launch()