File size: 3,294 Bytes
6a209b3
fab4b6f
 
 
 
 
 
 
 
 
7cd9471
fab4b6f
6a209b3
 
fab4b6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a209b3
fab4b6f
 
 
 
 
6a209b3
fab4b6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# --- Load tokenizer and model for CPU ---
tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen3-1.7B")

base_model = AutoModelForCausalLM.from_pretrained(
    "unsloth/Qwen3-1.7B",
    dtype=torch.float32,
    device_map={"": "cpu"},
)

model = PeftModel.from_pretrained(base_model, "khazarai/Nizami-1.7B").to("cpu")


# --- Chatbot logic ---
def generate_response(user_input, chat_history):
    if not user_input.strip():
        return chat_history, chat_history

    chat_history.append({"role": "user", "content": user_input})

    text = tokenizer.apply_chat_template(
        chat_history,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False,
    )

    inputs = tokenizer(text, return_tensors="pt").to("cpu")

    output_tokens = model.generate(
        **inputs,
        max_new_tokens=1024,
        temperature=0.7,
        top_p=0.8,
        top_k=20,
        do_sample=True 
    )

    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    response = response.split(user_input)[-1].strip()

    chat_history.append({"role": "assistant", "content": response})

    gr_chat_history = [
        (m["content"], chat_history[i + 1]["content"])
        for i, m in enumerate(chat_history[:-1])
        if m["role"] == "user"
    ]

    return gr_chat_history, chat_history


# --- UI Design ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="yellow", secondary_hue="slate")) as demo:
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 20px;">
        <h1 style="font-family: 'Inter', sans-serif; font-weight: 800; color: #FACC15; font-size: 2.2em;">
            πŸ“š Nizami-1.7B
        </h1>
        <p style="color: #FDE047; font-size: 1.05em; margin-top: -10px;">
            Academic style comprehension and reasoning in Azerbaijani.
        </p>
    </div>
    """)

    with gr.Row():
        with gr.Column(scale=6):
            chatbot = gr.Chatbot(
                label="Academic-style Chat",
                height=600,
                bubble_full_width=True,
                show_copy_button=True,
                avatar_images=(
                    "https://cdn-icons-png.flaticon.com/512/1077/1077012.png",  # user icon
                    "https://cdn-icons-png.flaticon.com/512/4140/4140048.png",  # bot icon
                ),
            )
            user_input = gr.Textbox(
                placeholder="Ask me...",
                label="πŸ’¬ Your question",
                lines=3,
                autofocus=True,
            )
            with gr.Row():
                send_btn = gr.Button("πŸš€ Send", variant="primary")
                clear_btn = gr.Button("🧹 Clear Chat")

    state = gr.State([])

    send_btn.click(generate_response, [user_input, state], [chatbot, state])
    user_input.submit(generate_response, [user_input, state], [chatbot, state])
    clear_btn.click(lambda: ([], []), None, [chatbot, state])

    gr.HTML("""
    <div style="text-align: center; margin-top: 25px; color: #6B7280; font-size: 0.9em;">
        Powered by <b>Qwen3-1.7B + Nizami-1.7B</b> | Built with ❀️ using Gradio
    </div>
    """)

demo.launch(share=True)