Spaces:

Mohssinibra
/

GPT

Sleeping

GPT

File size: 1,710 Bytes

3584175
12ad157
dacc2eb
3584175
dacc2eb
54a9e39
3584175
dacc2eb
 
 
 
 
 
 
3584175
dacc2eb
12ad157
dacc2eb
 
 
 
 
 
 
 
 
 
 
 
12ad157
dacc2eb
 
12ad157
 
 
 
 
 
dacc2eb
 
12ad157
 
dacc2eb
12ad157
 
 
 
 
 
 
 
dacc2eb
 
12ad157
 
dacc2eb
12ad157

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Specify the model ID
model_id = "MBZUAI-Paris/Atlas-Chat-2B"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",  # Automatically selects the device
    torch_dtype=torch.bfloat16  # Use bfloat16 for efficiency
)

# Define the text generation function
def generate_text(prompt, max_length=100, temperature=0.7):
    # Prepare the input message in chat format
    messages = [{"role": "user", "content": prompt}]
    
    # Tokenize the input with the chat template
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        return_dict=True,
        add_generation_prompt=True
    ).to(model.device)
    
    # Generate the response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        temperature=temperature,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        num_return_sequences=1
    )
    
    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=4, label="Enter Prompt"),
        gr.Slider(minimum=50, maximum=300, step=10, value=100, label="Max Length"),
        gr.Slider(minimum=0.1, maximum=1.5, step=0.1, value=0.7, label="Temperature")
    ],
    outputs="text",
    title="Atlas-Chat-27B Text Generator",
    description="Powered by the MBZUAI-Paris/Atlas-Chat-27B model."
)

# Launch the interface
interface.launch()