| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # Download GGUF to local file | |
| model_path = hf_hub_download( | |
| repo_id="astegaras/Llama3.2_3B", | |
| filename="model-Q4_K_M.gguf" | |
| ) | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_gpu_layers=0, | |
| ) | |
| def respond(prompt): | |
| out = llm(prompt, max_tokens=256) | |
| return out["choices"][0]["text"] | |
| gr.Interface(fn=respond, inputs="text", outputs="text").launch() |