|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os, io, time, threading, logging |
|
|
from typing import Any, List, Tuple, Optional |
|
|
import requests, gradio as gr |
|
|
from gtts import gTTS |
|
|
from fastapi import Request, UploadFile, File |
|
|
from starlette.responses import JSONResponse, Response |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger("kcrobot.v4.2.cloud") |
|
|
|
|
|
HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip() |
|
|
HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip() |
|
|
HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip() |
|
|
|
|
|
TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip() |
|
|
TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip() |
|
|
|
|
|
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} |
|
|
|
|
|
CONVERSATION: List[Tuple[str, str]] = [] |
|
|
DISPLAY_BUFFER: List[str] = [] |
|
|
DISPLAY_LIMIT = 16 |
|
|
|
|
|
def push_display(line: str): |
|
|
DISPLAY_BUFFER.append(line) |
|
|
if len(DISPLAY_BUFFER) > DISPLAY_LIMIT: |
|
|
DISPLAY_BUFFER.pop(0) |
|
|
|
|
|
def detect_vi_or_en(text: str) -> str: |
|
|
if not text: return "en" |
|
|
vi_chars = "ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụýỳỷỹỵ" |
|
|
for ch in text.lower(): |
|
|
if ch in vi_chars: |
|
|
return "vi" |
|
|
return "en" |
|
|
|
|
|
def _parse_hf_text_response(data: Any) -> str: |
|
|
try: |
|
|
if isinstance(data, list) and data and isinstance(data[0], dict): |
|
|
return data[0].get("generated_text", "") or str(data[0]) |
|
|
if isinstance(data, dict) and "generated_text" in data: |
|
|
return data.get("generated_text", "") |
|
|
if isinstance(data, dict) and "text" in data: |
|
|
return data.get("text", "") |
|
|
if isinstance(data, dict) and "choices" in data: |
|
|
c0 = data["choices"][0] |
|
|
return c0.get("text") or c0.get("message", {}).get("content", "") or str(c0) |
|
|
return str(data) |
|
|
except Exception: |
|
|
return str(data) |
|
|
|
|
|
def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str: |
|
|
if not HF_API_TOKEN: |
|
|
return "[ERROR] HF_API_TOKEN not configured in Space Secrets." |
|
|
model = model or HF_MODEL |
|
|
url = f"https://api-inference.huggingface.co/models/{model}" |
|
|
payload = {"inputs": prompt, "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)}, "options": {"wait_for_model": True}} |
|
|
try: |
|
|
r = requests.post(url, headers=HF_HEADERS, json=payload, timeout=120) |
|
|
if r.status_code != 200: |
|
|
logger.error("HF text gen failed %s: %s", r.status_code, r.text[:400]) |
|
|
return f"[ERROR] HF text gen {r.status_code}: {r.text[:300]}" |
|
|
return _parse_hf_text_response(r.json()) |
|
|
except Exception as e: |
|
|
logger.exception("HF text exception") |
|
|
return f"[ERROR] HF text exception: {e}" |
|
|
|
|
|
def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str: |
|
|
if not HF_API_TOKEN: |
|
|
return "[ERROR] HF_API_TOKEN not configured." |
|
|
model = model or HF_STT_MODEL |
|
|
url = f"https://api-inference.huggingface.co/models/{model}" |
|
|
headers = dict(HF_HEADERS); headers["Content-Type"] = "application/octet-stream" |
|
|
try: |
|
|
r = requests.post(url, headers=headers, data=audio_bytes, timeout=180) |
|
|
if r.status_code != 200: |
|
|
logger.error("HF STT failed %s: %s", r.status_code, r.text[:400]) |
|
|
return f"[ERROR] HF STT {r.status_code}: {r.text[:300]}" |
|
|
out = r.json() |
|
|
if isinstance(out, dict) and "text" in out: |
|
|
return out["text"] |
|
|
return _parse_hf_text_response(out) |
|
|
except Exception as e: |
|
|
logger.exception("HF STT exception") |
|
|
return f"[ERROR] HF STT exception: {e}" |
|
|
|
|
|
def tts_gtts_bytes(text: str) -> bytes: |
|
|
if not text: return b"" |
|
|
lang = detect_vi_or_en(text) |
|
|
try: |
|
|
tts = gTTS(text=text, lang="vi" if lang == "vi" else "en") |
|
|
bio = io.BytesIO(); tts.write_to_fp(bio); bio.seek(0) |
|
|
return bio.read() |
|
|
except Exception as e: |
|
|
logger.exception("gTTS error") |
|
|
return b"" |
|
|
|
|
|
def send_telegram_message(text: str): |
|
|
if not TELEGRAM_TOKEN or not TELEGRAM_CHATID: |
|
|
logger.debug("Telegram not configured") |
|
|
return |
|
|
try: |
|
|
url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage" |
|
|
requests.post(url, json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10) |
|
|
except Exception: |
|
|
logger.exception("send_telegram_message failed") |
|
|
|
|
|
def _start_telegram_poller(): |
|
|
if not TELEGRAM_TOKEN: |
|
|
logger.info("Telegram poll disabled"); return |
|
|
base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"; offset = None |
|
|
logger.info("Telegram poller started") |
|
|
while True: |
|
|
try: |
|
|
params = {"timeout":30} |
|
|
if offset: params["offset"] = offset |
|
|
r = requests.get(base + "/getUpdates", params=params, timeout=35) |
|
|
if r.status_code != 200: |
|
|
time.sleep(2); continue |
|
|
data = r.json() |
|
|
for upd in data.get("result", []): |
|
|
offset = upd.get("update_id", 0) + 1 |
|
|
msg = upd.get("message") or {} |
|
|
chat = msg.get("chat", {}); chat_id = chat.get("id"); text = (msg.get("text") or "").strip() |
|
|
if not text: continue |
|
|
logger.info("TG msg: %s", text) |
|
|
if text.lower().startswith("/ask "): |
|
|
q = text[5:].strip(); ans = hf_text_generate(q) |
|
|
requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10) |
|
|
elif text.lower().startswith("/say "): |
|
|
phrase = text[5:].strip() |
|
|
audio = tts_gtts_bytes(phrase) |
|
|
if audio: |
|
|
files = {"audio": ("reply.mp3", audio, "audio/mpeg")} |
|
|
requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30) |
|
|
else: |
|
|
requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "[TTS failed]"}, timeout=10) |
|
|
elif text.lower().startswith("/status"): |
|
|
requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10) |
|
|
else: |
|
|
requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10) |
|
|
except Exception: |
|
|
logger.exception("Telegram poller exception") |
|
|
time.sleep(3) |
|
|
|
|
|
if TELEGRAM_TOKEN: |
|
|
t = threading.Thread(target=_start_telegram_poller, daemon=True); t.start() |
|
|
|
|
|
|
|
|
with gr.Blocks(title="KC Robot AI v4.2 — Cloud Brain") as demo: |
|
|
gr.Markdown("## 🤖 KC Robot AI v4.2 — Cloud Brain\n(Requires HF_API_TOKEN in Secrets for full AI/STT)") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
chatbot = gr.Chatbot(height=440, type="messages", elem_id="chatbot") |
|
|
text_in = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...", label="Text input") |
|
|
mic = gr.Audio(source="microphone", type="filepath", label="Record voice (browser mic)") |
|
|
send = gr.Button("Send") |
|
|
with gr.Row(): |
|
|
temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature") |
|
|
tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens") |
|
|
model_override = gr.Textbox(label="HF model override (optional)") |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### TTS / STT") |
|
|
tts_box = gr.Textbox(lines=2, label="Text → TTS") |
|
|
tts_btn = gr.Button("Create TTS") |
|
|
tts_audio = gr.Audio(label="TTS audio", interactive=False) |
|
|
gr.Markdown("Upload audio for STT") |
|
|
up = gr.Audio(source="upload", type="filepath", label="Upload audio") |
|
|
stt_btn = gr.Button("Transcribe") |
|
|
stt_out = gr.Textbox(label="Transcription") |
|
|
|
|
|
def chat_fn(audio_file, typed_text, temperature, max_tokens, model_override_val, history): |
|
|
user_text = (typed_text or "").strip() |
|
|
if audio_file: |
|
|
try: |
|
|
with open(audio_file, "rb") as f: b = f.read() |
|
|
stt = hf_stt_from_bytes(b) |
|
|
if stt and not stt.startswith("[ERROR]"): user_text = stt |
|
|
except Exception: |
|
|
logger.exception("STT from audio failed") |
|
|
if not user_text: return history or [], "" |
|
|
prompt = f"You are KC Robot AI, bilingual assistant. Answer in the same language as the user.\n\nUser: {user_text}\nAssistant:" |
|
|
model = model_override_val.strip() if model_override_val else HF_MODEL |
|
|
ans = hf_text_generate(prompt, model=model, max_new_tokens=int(max_tokens), temperature=float(temperature)) |
|
|
CONVERSATION.append((user_text, ans)); push_display("YOU: "+user_text[:80]); push_display("BOT: "+ans[:80]) |
|
|
if TELEGRAM_TOKEN and TELEGRAM_CHATID: |
|
|
try: send_telegram_message(f"You: {user_text}\nBot: {ans}") |
|
|
except: logger.exception("telegram notify failed") |
|
|
history = history or []; history.append(("You", user_text)); history.append(("Bot", ans)) |
|
|
return history, "" |
|
|
|
|
|
def tts_fn(text_in, model_override_val): |
|
|
if not text_in or not text_in.strip(): return None |
|
|
audio = tts_gtts_bytes(text_in) |
|
|
if audio == b"": raise gr.Error("TTS generation failed (gTTS).") |
|
|
return (audio, "audio/mpeg") |
|
|
|
|
|
def stt_fn(local_path, model_override_val): |
|
|
if not local_path: return "" |
|
|
with open(local_path, "rb") as f: b = f.read() |
|
|
txt = hf_stt_from_bytes(b); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt))) |
|
|
return txt |
|
|
|
|
|
send.click(chat_fn, inputs=[mic, text_in, temp, tokens, model_override], outputs=[chatbot, text_in]) |
|
|
tts_btn.click(tts_fn, inputs=[tts_box, model_override], outputs=[tts_audio]) |
|
|
stt_btn.click(stt_fn, inputs=[up, model_override], outputs=[stt_out]) |
|
|
|
|
|
|
|
|
app = demo.app |
|
|
|
|
|
@app.post("/api/ask") |
|
|
async def api_ask(req: Request): |
|
|
try: j = await req.json() |
|
|
except: return JSONResponse({"error":"invalid json"}, status_code=400) |
|
|
text = (j.get("text","") or "").strip(); lang = (j.get("lang","auto") or "auto").strip().lower() |
|
|
if not text: return JSONResponse({"error":"no text"}, status_code=400) |
|
|
if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500) |
|
|
if lang == "vi": prompt = "Bạn là trợ lý thông minh. Trả lời bằng tiếng Việt, rõ ràng:\n\n"+text |
|
|
elif lang == "en": prompt = "You are a helpful assistant. Answer in English:\n\n"+text |
|
|
else: prompt = "You are bilingual. Answer in the language of the question.\n\n"+text |
|
|
ans = hf_text_generate(prompt); CONVERSATION.append((text, ans)); push_display("YOU: "+text[:80]); push_display("BOT: "+ans[:80]) |
|
|
return {"answer": ans} |
|
|
|
|
|
@app.post("/api/tts") |
|
|
async def api_tts(req: Request): |
|
|
try: j = await req.json() |
|
|
except: return JSONResponse({"error":"invalid json"}, status_code=400) |
|
|
text = (j.get("text","") or "").strip() |
|
|
if not text: return JSONResponse({"error":"no text"}, status_code=400) |
|
|
audio = tts_gtts_bytes(text) |
|
|
if audio == b"": return JSONResponse({"error":"TTS generation failed (gTTS)."}, status_code=500) |
|
|
return Response(content=audio, media_type="audio/mpeg") |
|
|
|
|
|
@app.post("/api/stt") |
|
|
async def api_stt(file: UploadFile = File(...)): |
|
|
try: content = await file.read() |
|
|
except: return JSONResponse({"error":"file read error"}, status_code=400) |
|
|
if not content: return JSONResponse({"error":"no audio content"}, status_code=400) |
|
|
if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500) |
|
|
txt = hf_stt_from_bytes(content) |
|
|
CONVERSATION.append((f"[voice] {txt}", "")); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt))) |
|
|
return {"text": txt} |
|
|
|
|
|
@app.post("/api/presence") |
|
|
async def api_presence(req: Request): |
|
|
try: j = await req.json() |
|
|
except: return JSONResponse({"error":"invalid json"}, status_code=400) |
|
|
note = (j.get("note","Có người phía trước") or "").strip() |
|
|
greeting = f"Xin chào! {note}" |
|
|
push_display("RADAR: "+note[:80]); CONVERSATION.append(("__presence__", greeting)) |
|
|
if TELEGRAM_TOKEN and TELEGRAM_CHATID: |
|
|
try: send_telegram_message(f"⚠️ Robot: Phát hiện người - {note}") |
|
|
except: logger.exception("telegram notify failed") |
|
|
|
|
|
|
|
|
return {"greeting": greeting} |
|
|
|
|
|
@app.get("/api/display") |
|
|
async def api_display(): |
|
|
return {"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)} |
|
|
|
|
|
@app.post("/api/config") |
|
|
async def api_config(req: Request): |
|
|
try: j = await req.json() |
|
|
except: return JSONResponse({"error":"invalid json"}, status_code=400) |
|
|
changed = {}; global HF_MODEL, HF_STT_MODEL |
|
|
if "hf_model" in j: HF_MODEL = j["hf_model"]; changed["hf_model"]=HF_MODEL |
|
|
if "hf_stt_model" in j: HF_STT_MODEL = j["hf_stt_model"]; changed["hf_stt_model"]=HF_STT_MODEL |
|
|
return {"changed": changed} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |
|
|
|