|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
import os |
|
|
|
|
|
|
|
|
cpu_info = os.popen("cat /proc/cpuinfo | grep 'model name' | head -1").read().strip() |
|
|
print(f"🖥️ Running on: {cpu_info}") |
|
|
|
|
|
|
|
|
MODEL_ID = "LiquidAI/LFM2-1.2B" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float32, |
|
|
) |
|
|
|
|
|
def chat_with_ai(user_input): |
|
|
inputs = tokenizer(user_input, return_tensors="pt") |
|
|
outputs = model.generate(**inputs, max_new_tokens=100) |
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
return response |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=chat_with_ai, |
|
|
inputs=gr.Textbox(label="Your Message"), |
|
|
outputs=gr.Textbox(label="AI Response"), |
|
|
title="Test AI Chat Model", |
|
|
description="Type a message to chat with an LLM hosted on Hugging Face." |
|
|
) |
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|