import os import gradio as gr from huggingface_hub import InferenceClient import time def respond(message, history): client = InferenceClient(token=os.environ["HF_TOKEN"], model="openai/gpt-oss-20b") system_message = "You are BitAI (V1), a friendly chatbot..." messages = [{"role":"system","content":system_message}] messages.extend(history) messages.append({"role":"user","content":message}) yield "⏳ BitAI is typing..." # aqui mostra o loader # Simulando streaming real response = "" for m in client.chat_completion(messages, stream=True): token = m.choices[0].delta.content if m.choices else "" response += token yield response with gr.Blocks() as demo: with gr.Column(): gr.HTML("