import os
os.system('pip install minijinja')
import gradio as gr
from huggingface_hub import InferenceClient
import torch
import spaces
# Initialize the client with your model
client = InferenceClient("karpathy/gpt2_1558M_final2_hf")
@spaces.GPU
def generate_text(prompt, max_tokens, temperature, top_p):
    response = ""
    for chunk in client.text_generation(
        prompt,
        max_new_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        if isinstance(chunk, str):
            response += chunk
        elif hasattr(chunk, 'token'):
            response += chunk.token.text
        elif hasattr(chunk, 'generated_text'):
            response += chunk.generated_text
        yield response
    if not response:
        yield "I apologize, but I couldn't generate a response."
def clear_input():
    return ""
# Define example prompts
unicorn_example = "In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."
time_travel_example = "Explain the grandfather paradox in time travel and propose a potential resolution."
with gr.Blocks() as demo:
    gr.Markdown("
LLM.C 1.5B Demo ๐ค
")
    
    gr.Markdown(
        """
        ## About LLM.C
        Quick demo of the model trained https://github.com/karpathy/llm.c/discussions/677 (add more info)
        """
    )
    
    with gr.Accordion("Advanced Settings", open=False):
        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens")
        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)")
    
    gr.Markdown("### Example prompts")
    with gr.Row():
        example1 = gr.Button("๐ฆ Unicorn Discovery")
        example2 = gr.Button("โณ Time Travel Paradox")
    
    prompt = gr.Textbox(lines=3, label='Enter your prompt')
    output = gr.Textbox(lines=10, label='Generated text')
    
    with gr.Row():
        clear_button = gr.Button("๐งน Clear input")
        submit = gr.Button("๐ Generate")
        stop_button = gr.Button("๐ Stop")
    # Set up event handlers
    submit_event = submit.click(generate_text, inputs=[prompt, max_tokens, temperature, top_p], outputs=output)
    stop_button.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
    clear_button.click(clear_input, inputs=[], outputs=prompt)
    example1.click(lambda: unicorn_example, inputs=[], outputs=prompt)
    example2.click(lambda: time_travel_example, inputs=[], outputs=prompt)
if __name__ == "__main__":
    demo.launch()