Tttalalma

Runtime error

File size: 1,152 Bytes

d02f5a2
a743fcc
b06a28f
f64166c
a724c56
f01ccc1
b06a28f
 
d02f5a2
 
a724c56
d02f5a2
f64166c
a743fcc
d02f5a2
f64166c
a743fcc
f01ccc1
d02f5a2
 
a724c56
 
 
f64166c
 
 
d02f5a2
 
f64166c
 
 
 
 
d02f5a2
f64166c
 
 
a724c56
d02f5a2

from flask import Flask, request, jsonify, send_file
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import threading, time

app = Flask(__name__)
start_time = time.time()

REPO = "TheBloke/Qwen2.5-1.8B-Chat-GGUF"
FILE = "qwen2_5-1.8b-chat.Q4_K_M.gguf"

print("🔽 Downloading model...")
MODEL_PATH = hf_hub_download(REPO, FILE, local_dir=".", local_dir_use_symlinks=False)

print("🔄 Loading model...")
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8)

@app.route("/", methods=["GET"])
def root():
    return send_file("index.html")

@app.route("/chat", methods=["POST"])
def chat():
    msg = request.json.get("message", "").strip()
    if not msg:
        return jsonify({"error": "Empty message"}), 400
    prompt = f"<|user|>\n{msg}\n<|assistant|>"
    out = llm(prompt, max_tokens=300, temperature=0.7, stop=["<|user|>", "<|assistant|>"])
    return jsonify({"reply": out["choices"][0]["text"].strip()})

@app.route("/status")
def status():
    return jsonify({
        "uptime": round(time.time() - start_time),
        "model": FILE
    })

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)