import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Download your GGUF model from HF Hub
model_path = hf_hub_download(
    repo_id="astegaras/lora_python_converter",
    filename="llama-3.2-3b-instruct.Q2_K.gguf"
)

# Load GGUF with safe HF settings
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=4,
    n_batch=64,
    n_gpu_layers=0,     # IMPORTANT
    use_mmap=False,     # IMPORTANT
    use_mlock=False,    # IMPORTANT
    low_vram=True,      # IMPORTANT
    verbose=False
)

def generate_code(instruction):
    messages = [
        {"role": "system", "content": "You are a Python code generator. Return only code."},
        {"role": "user", "content": instruction},
    ]

    out = llm.create_chat_completion(
        messages=messages,
        max_tokens=512,
        temperature=0.2,
        top_p=0.5
    )

    return out["choices"][0]["message"]["content"]

# ---- GRADIO UI ----
with gr.Blocks(theme="gradio/soft") as demo:
    gr.Markdown(
        """
        # Python Code Generator  
        Enter a task in plain English and receive executable Python code.

        Example:  
        *"Help me set up my to-do list"*
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            instruction = gr.Textbox(
                label="Describe what you want to build",
                placeholder="Example: Help me set up my to-do list",
                lines=3,
            )
            submit = gr.Button("Generate Python Code", variant="primary")

        with gr.Column(scale=1):
            code_output = gr.Code(
                label="Generated Python Code",
                language="python"
            )

    submit.click(fn=generate_code, inputs=instruction, outputs=code_output)

demo.launch(share=True)