Qwen-Training

Sleeping

App Files Files Community

rahul7star commited on Oct 13

Commit

829e77a

verified ·

1 Parent(s): 817ccbc

Create app_train.py

Browse files

Files changed (1) hide show

app_train.py +232 -0

app_train.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+PromptWizard Qwen Training — Configurable Dataset & Repo
+Fine-tunes Qwen using a user-selected dataset and optionally uploads
+the trained model to a Hugging Face Hub repo asynchronously with logs.
+"""
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
+from datasets import load_dataset
+from peft import LoraConfig, get_peft_model, TaskType
+from huggingface_hub import upload_folder, HfFolder
+import os, asyncio, threading
+from datetime import datetime
+# ==== Async upload wrapper ====
+def start_async_upload(local_dir, hf_repo, output_log):
+    """Starts async model upload in a background thread."""
+    def runner():
+        output_log.append(f"[INFO] 🚀 Async upload thread started for repo: {hf_repo}")
+        asyncio.run(async_upload_model(local_dir, hf_repo, output_log))
+        output_log.append(f"[INFO] 🛑 Async upload thread finished for repo: {hf_repo}")
+    threading.Thread(target=runner, daemon=True).start()
+async def async_upload_model(local_dir, hf_repo, output_log, max_retries=3):
+    """Upload model folder to HF Hub via HTTP API."""
+    try:
+        token = HfFolder.get_token()
+        output_log.append(f"[INFO] ☁️ Preparing to upload to repo: {hf_repo}")
+        attempt = 0
+        while attempt < max_retries:
+            try:
+                output_log.append(f"[INFO] 🔄 Attempt {attempt+1} to upload folder via HTTP API...")
+                upload_folder(folder_path=local_dir, repo_id=hf_repo, repo_type="model", token=token, ignore_patterns=["*.lock","*.tmp"], create_pr=False)
+                output_log.append("[SUCCESS] ✅ Model successfully uploaded to HF Hub!")
+                break
+            except Exception as e:
+                attempt += 1
+                output_log.append(f"[ERROR] Upload attempt {attempt} failed: {e}")
+                if attempt < max_retries:
+                    output_log.append("[INFO] Retrying in 5 seconds...")
+                    await asyncio.sleep(5)
+                else:
+                    output_log.append("[ERROR] ❌ Max retries reached. Upload failed.")
+    except Exception as e:
+        output_log.append(f"[ERROR] ❌ Unexpected error during upload: {e}")
+# ==== GPU check ====
+def check_gpu_status():
+    return "🚀 Zero GPU Ready - GPU will be allocated when training starts"
+# ==== Logging helper ====
+def log_message(output_log, msg):
+    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
+    print(line)
+    output_log.append(line)
+# ==== Train model ====
+@spaces.GPU(duration=300)
+def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo):
+    output_log = []
+    test_split = 0.2
+    mock_question = "Who is referred to as 'O best of Brahmanas' in the Bhagavad Gita?"
+    try:
+        log_message(output_log, "🔍 Initializing training sequence...")
+        # ===== Device =====
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        log_message(output_log, f"🎮 Using device: {device}")
+        if device == "cuda":
+            log_message(output_log, f"✅ GPU: {torch.cuda.get_device_name(0)}")
+        # ===== Load dataset =====
+        log_message(output_log, f"\n📚 Loading dataset: {dataset_name} ...")
+        dataset = load_dataset(dataset_name)
+        dataset = dataset["train"].train_test_split(test_size=test_split)
+        train_dataset = dataset["train"]
+        test_dataset = dataset["test"]
+        log_message(output_log, f"   Training samples: {len(train_dataset)}")
+        log_message(output_log, f"   Test samples: {len(test_dataset)}")
+        # ===== Format examples =====
+        def format_example(item):
+            text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
+            prompt = f"""<|system|>
+You are a wise teacher interpreting Bhagavad Gita with deep insights.
+<|user|>
+{text}
+<|assistant|>
+"""
+            return {"text": prompt}
+        train_dataset = train_dataset.map(format_example)
+        test_dataset = test_dataset.map(format_example)
+        log_message(output_log, f"✅ Formatted {len(train_dataset)} train + {len(test_dataset)} test examples")
+        # ===== Load model & tokenizer =====
+        log_message(output_log, f"\n🤖 Loading model: {base_model}")
+        tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        model = AutoModelForCausalLM.from_pretrained(
+            base_model,
+            trust_remote_code=True,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            low_cpu_mem_usage=True,
+        )
+        if device == "cuda":
+            model = model.to(device)
+        log_message(output_log, "✅ Model and tokenizer loaded successfully")
+        # ===== LoRA configuration =====
+        log_message(output_log, "\n⚙️ Configuring LoRA for efficient fine-tuning...")
+        lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=16, lora_dropout=0.1, target_modules=["q_proj","v_proj"], bias="none")
+        model = get_peft_model(model, lora_config)
+        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}")
+        # ===== Tokenization + labels =====
+        def tokenize_fn(examples):
+            tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=256)
+            tokenized["labels"] = tokenized["input_ids"].copy()
+            return tokenized
+        train_dataset = train_dataset.map(tokenize_fn, batched=True)
+        test_dataset = test_dataset.map(tokenize_fn, batched=True)
+        log_message(output_log, "✅ Tokenization + labels done")
+        # ===== Training arguments =====
+        output_dir = "./qwen-gita-lora"
+        training_args = TrainingArguments(
+            output_dir=output_dir,
+            num_train_epochs=num_epochs,
+            per_device_train_batch_size=batch_size,
+            gradient_accumulation_steps=2,
+            warmup_steps=10,
+            logging_steps=5,
+            save_strategy="epoch",
+            fp16=device=="cuda",
+            optim="adamw_torch",
+            learning_rate=learning_rate,
+            max_steps=100,
+        )
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=test_dataset,
+            tokenizer=tokenizer,
+        )
+        # ===== Train =====
+        log_message(output_log, "\n🚀 Starting training...")
+        trainer.train()
+        log_message(output_log, "\n✅ Training finished!")
+        # ===== Test with mock question =====
+        inputs = tokenizer(f"<|system|>\nYou are a wise teacher interpreting Bhagavad Gita.\n<|user|>\n{mock_question}\n<|assistant|>\n", return_tensors="pt").to(device)
+        outputs = model.generate(**inputs, max_new_tokens=100)
+        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        log_message(output_log, f"\n🧪 Mock Question Test:\nQ: {mock_question}\nA: {answer}")
+        # ===== Save locally (optional upload later) =====
+        trainer.save_model(output_dir)
+        tokenizer.save_pretrained(output_dir)
+        log_message(output_log, "\n✅ Model saved locally. You can now review the mock answer before uploading.")
+    except Exception as e:
+        log_message(output_log, f"\n❌ Error during training: {e}")
+    return "\n".join(output_log), output_dir, mock_question
+# ==== Gradio Interface ====
+def create_interface():
+    with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo:
+        gr.Markdown("""
+        # 🧘 PromptWizard Qwen Fine-tuning
+        Fine-tune Qwen on any dataset and optionally upload to HF Hub.
+        """)
+        with gr.Row():
+            with gr.Column():
+                gr.Textbox(label="GPU Status", value=check_gpu_status(), interactive=False)
+                base_model = gr.Textbox(label="Base Model", value="Qwen/Qwen2.5-0.5B")
+                dataset_name = gr.Textbox(label="Dataset Name", value="rahul7star/Gita")
+                hf_repo = gr.Textbox(label="HF Repo for Upload", value="rahul7star/Qwen0.5-3B-Gita")
+                num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs")
+                batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
+                learning_rate = gr.Number(value=5e-5, label="Learning Rate")
+                train_btn = gr.Button("🚀 Start Fine-tuning", variant="primary")
+                upload_btn = gr.Button("☁️ Upload Model to HF Hub", variant="secondary", interactive=False)
+            with gr.Column():
+                output = gr.Textbox(label="Training Log", lines=25, max_lines=40,
+                                    value="Click 'Start Fine-tuning' to train your model.")
+        # ==== Train button ====
+        def train_click(base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo):
+            log, output_dir, mock_question = train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo)
+            return log, True, output_dir
+        train_btn.click(
+            fn=train_click,
+            inputs=[base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo],
+            outputs=[output, upload_btn, hf_repo],
+        )
+        # ==== Upload button ====
+        def upload_click(hf_repo):
+            output_log = []
+            start_async_upload("./qwen-gita-lora", hf_repo, output_log)
+            return "\n".join(output_log)
+        upload_btn.click(
+            fn=upload_click,
+            inputs=[hf_repo],
+            outputs=output,
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860)