import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch # --- Load tokenizer and model for CPU --- tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen3-1.7B") base_model = AutoModelForCausalLM.from_pretrained( "unsloth/Qwen3-1.7B", dtype=torch.float32, device_map={"": "cpu"}, ) model = PeftModel.from_pretrained(base_model, "khazarai/Nizami-1.7B").to("cpu") # --- Chatbot logic --- def generate_response(user_input, chat_history): if not user_input.strip(): return chat_history, chat_history chat_history.append({"role": "user", "content": user_input}) text = tokenizer.apply_chat_template( chat_history, tokenize=False, add_generation_prompt=True, enable_thinking=False, ) inputs = tokenizer(text, return_tensors="pt").to("cpu") output_tokens = model.generate( **inputs, max_new_tokens=1024, temperature=0.7, top_p=0.8, top_k=20, do_sample=True ) response = tokenizer.decode(output_tokens[0], skip_special_tokens=True) response = response.split(user_input)[-1].strip() chat_history.append({"role": "assistant", "content": response}) gr_chat_history = [ (m["content"], chat_history[i + 1]["content"]) for i, m in enumerate(chat_history[:-1]) if m["role"] == "user" ] return gr_chat_history, chat_history # --- UI Design --- with gr.Blocks(theme=gr.themes.Soft(primary_hue="yellow", secondary_hue="slate")) as demo: gr.HTML("""
Academic style comprehension and reasoning in Azerbaijani.