import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import os # Log CPU info at startup cpu_info = os.popen("cat /proc/cpuinfo | grep 'model name' | head -1").read().strip() print(f"🖥️ Running on: {cpu_info}") # Pick any model you want to test (small for free Spaces) MODEL_ID = "LiquidAI/LFM2-1.2B" # Load tokenizer + model tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32, # use float32 for CPU (no GPU) ) def chat_with_ai(user_input): inputs = tokenizer(user_input, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=100) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response demo = gr.Interface( fn=chat_with_ai, inputs=gr.Textbox(label="Your Message"), outputs=gr.Textbox(label="AI Response"), title="Test AI Chat Model", description="Type a message to chat with an LLM hosted on Hugging Face." ) demo.launch(server_name="0.0.0.0", server_port=7860)