iris / app.py
astegaras's picture
Update app.py
37fe02c verified
raw
history blame
1.82 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Download your GGUF model from HF Hub
model_path = hf_hub_download(
repo_id="astegaras/lora_python_converter",
filename="llama-3.2-3b-instruct.Q2_K.gguf"
)
# Load GGUF with safe HF settings
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=4,
n_batch=64,
n_gpu_layers=0, # IMPORTANT
use_mmap=False, # IMPORTANT
use_mlock=False, # IMPORTANT
low_vram=True, # IMPORTANT
verbose=False
)
def generate_code(instruction):
messages = [
{"role": "system", "content": "You are a Python code generator. Return only code."},
{"role": "user", "content": instruction},
]
out = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.2,
top_p=0.5
)
return out["choices"][0]["message"]["content"]
# ---- GRADIO UI ----
with gr.Blocks(theme="gradio/soft") as demo:
gr.Markdown(
"""
# Python Code Generator
Enter a task in plain English and receive executable Python code.
Example:
*"Help me set up my to-do list"*
"""
)
with gr.Row():
with gr.Column(scale=1):
instruction = gr.Textbox(
label="Describe what you want to build",
placeholder="Example: Help me set up my to-do list",
lines=3,
)
submit = gr.Button("Generate Python Code", variant="primary")
with gr.Column(scale=1):
code_output = gr.Code(
label="Generated Python Code",
language="python"
)
submit.click(fn=generate_code, inputs=instruction, outputs=code_output)
demo.launch(share=True)