Spaces:
Runtime error
Runtime error
| import spaces | |
| import gradio as gr | |
| from cartesia_pytorch import ReneLMHeadModel | |
| from transformers import AutoTokenizer | |
| #import subprocess | |
| #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
| # Load model and tokenizer | |
| model = ReneLMHeadModel.from_pretrained("cartesia-ai/Rene-v0.1-1.3b-pytorch").half().cuda() | |
| tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf") | |
| # Define the function to generate text | |
| def generate_text(input_text): | |
| inputs = tokenizer([input_text], return_tensors="pt") | |
| outputs = model.generate(inputs.input_ids.cuda(), max_length=50, top_k=100, top_p=0.99) | |
| out_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
| return out_message | |
| # Create Gradio interface | |
| interface = gr.Interface( | |
| fn=generate_text, | |
| inputs="text", | |
| outputs="text", | |
| title="ReneLM Text Generator", | |
| description="Generate text using ReneLMHeadModel from a prompt." | |
| ) | |
| # Launch the Gradio app | |
| interface.launch() | |