import os import gradio as gr from huggingface_hub import InferenceClient import time def respond(message, history): client = InferenceClient(token=os.environ["HF_TOKEN"], model="openai/gpt-oss-20b") system_message = "You are BitAI (V1), a friendly chatbot..." messages = [{"role":"system","content":system_message}] messages.extend(history) messages.append({"role":"user","content":message}) yield "⏳ BitAI is typing..." # aqui mostra o loader # Simulando streaming real response = "" for m in client.chat_completion(messages, stream=True): token = m.choices[0].delta.content if m.choices else "" response += token yield response with gr.Blocks() as demo: with gr.Column(): gr.HTML("

BitAI

") chatbot = gr.ChatInterface(respond, type="messages") # Loader fora do chat loader = gr.HTML("
" "
" "
") demo.load(lambda: None, [], loader) # placeholder pra ativar animação css_loader = """ @keyframes moveLoader { 0% { transform: translateY(0px); } 50% { transform: translateY(5px); } 100% { transform: translateY(0px); } } """ demo.launch()