Spaces:
Sleeping
Sleeping
File size: 1,710 Bytes
3584175 12ad157 dacc2eb 3584175 dacc2eb 54a9e39 3584175 dacc2eb 3584175 dacc2eb 12ad157 dacc2eb 12ad157 dacc2eb 12ad157 dacc2eb 12ad157 dacc2eb 12ad157 dacc2eb 12ad157 dacc2eb 12ad157 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Specify the model ID
model_id = "MBZUAI-Paris/Atlas-Chat-2B"
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto", # Automatically selects the device
torch_dtype=torch.bfloat16 # Use bfloat16 for efficiency
)
# Define the text generation function
def generate_text(prompt, max_length=100, temperature=0.7):
# Prepare the input message in chat format
messages = [{"role": "user", "content": prompt}]
# Tokenize the input with the chat template
inputs = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
return_dict=True,
add_generation_prompt=True
).to(model.device)
# Generate the response
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=temperature,
top_k=50,
top_p=0.95,
do_sample=True,
num_return_sequences=1
)
# Decode and return the generated text
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Create the Gradio interface
interface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=4, label="Enter Prompt"),
gr.Slider(minimum=50, maximum=300, step=10, value=100, label="Max Length"),
gr.Slider(minimum=0.1, maximum=1.5, step=0.1, value=0.7, label="Temperature")
],
outputs="text",
title="Atlas-Chat-27B Text Generator",
description="Powered by the MBZUAI-Paris/Atlas-Chat-27B model."
)
# Launch the interface
interface.launch()
|