Spaces:
Sleeping
Sleeping
| """ | |
| PromptWizard Qwen Training โ Gita Edition | |
| Fine-tunes Qwen using rahul7star/Gita dataset (.csv) | |
| Uploads trained model to rahul7star/Qwen0.5-3B-Gita on Hugging Face Hub | |
| """ | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| Trainer, | |
| TrainingArguments, | |
| ) | |
| from datasets import load_dataset, Dataset | |
| from peft import LoraConfig, get_peft_model, TaskType | |
| from huggingface_hub import HfApi, HfFolder, Repository | |
| import os, tempfile, shutil | |
| # === GPU check (Zero GPU compatible) === | |
| def check_gpu_status(): | |
| return "๐ Zero GPU Ready - GPU will be allocated when training starts" | |
| # === Main Training === | |
| def train_model(model_name, num_epochs, batch_size, learning_rate, progress=gr.Progress()): | |
| progress(0, desc="Initializing...") | |
| output_log = [] | |
| try: | |
| # ==== Device ==== | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| output_log.append(f"๐ฎ Using device: {device}") | |
| if device == "cuda": | |
| output_log.append(f"โ GPU: {torch.cuda.get_device_name(0)}") | |
| # ==== Load dataset ==== | |
| progress(0.1, desc="Loading rahul7star/Gita dataset...") | |
| output_log.append("\n๐ Loading dataset from rahul7star/Gita...") | |
| dataset = load_dataset("rahul7star/Gita", split="train") | |
| output_log.append(f" Loaded {len(dataset)} samples from CSV") | |
| output_log.append(f" Columns: {dataset.column_names}") | |
| # ==== Format data ==== | |
| def format_example(item): | |
| # Use "text" or "content" column if available | |
| text = ( | |
| item.get("text") | |
| or item.get("content") | |
| or " ".join(str(v) for v in item.values()) | |
| ) | |
| prompt = f"""<|system|> | |
| You are a wise teacher interpreting Bhagavad Gita with deep insights. | |
| <|user|> | |
| {text} | |
| <|assistant|> | |
| """ | |
| return {"text": prompt} | |
| dataset = dataset.map(format_example) | |
| output_log.append(f" โ Formatted {len(dataset)} examples") | |
| # ==== Model ==== | |
| progress(0.3, desc="Loading model & tokenizer...") | |
| model_name = "Qwen/Qwen2.5-0.5B" | |
| output_log.append(f"\n๐ค Loading model: {model_name}") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| low_cpu_mem_usage=True, | |
| ) | |
| if device == "cuda": | |
| model = model.to(device) | |
| output_log.append(" โ Model loaded successfully") | |
| # ==== LoRA ==== | |
| progress(0.4, desc="Configuring LoRA...") | |
| output_log.append("\nโ๏ธ Setting up LoRA for efficient fine-tuning...") | |
| lora_config = LoraConfig( | |
| task_type=TaskType.CAUSAL_LM, | |
| r=8, | |
| lora_alpha=16, | |
| lora_dropout=0.1, | |
| target_modules=["q_proj", "v_proj"], | |
| bias="none", | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| output_log.append(f" Trainable params: {trainable_params:,}") | |
| # ==== Tokenization ==== | |
| progress(0.5, desc="Tokenizing dataset...") | |
| def tokenize_fn(examples): | |
| return tokenizer( | |
| examples["text"], | |
| padding="max_length", | |
| truncation=True, | |
| max_length=256, | |
| ) | |
| dataset = dataset.map(tokenize_fn, batched=True) | |
| output_log.append(" โ Tokenization done") | |
| # ==== Training arguments ==== | |
| progress(0.6, desc="Setting up training...") | |
| output_dir = "./qwen-gita-lora" | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| num_train_epochs=num_epochs, | |
| per_device_train_batch_size=batch_size, | |
| gradient_accumulation_steps=2, | |
| warmup_steps=10, | |
| logging_steps=5, | |
| save_strategy="epoch", | |
| fp16=device == "cuda", | |
| optim="adamw_torch", | |
| learning_rate=learning_rate, | |
| max_steps=100, | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| # ==== Train ==== | |
| progress(0.7, desc="Training...") | |
| output_log.append("\n๐ Starting training...\n" + "=" * 50) | |
| train_result = trainer.train() | |
| progress(0.85, desc="Saving model...") | |
| output_log.append("\n๐พ Saving model locally...") | |
| trainer.save_model(output_dir) | |
| tokenizer.save_pretrained(output_dir) | |
| # ==== Upload to HF Hub ==== | |
| progress(0.9, desc="Uploading to Hugging Face Hub...") | |
| hf_repo = "rahul7star/Qwen0.5-3B-Gita" | |
| output_log.append(f"\nโ๏ธ Uploading fine-tuned model to: {hf_repo}") | |
| api = HfApi() | |
| token = HfFolder.get_token() | |
| # Create repo if not exists | |
| api.create_repo(repo_id=hf_repo, exist_ok=True) | |
| # Clone & push | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| repo = Repository(local_dir=tmpdir, clone_from=hf_repo, use_auth_token=token) | |
| shutil.copytree(output_dir, tmpdir, dirs_exist_ok=True) | |
| repo.push_to_hub(commit_message="Upload fine-tuned Qwen-Gita LoRA model") | |
| progress(1.0, desc="Complete!") | |
| output_log.append("\nโ Training complete & model uploaded successfully!") | |
| except Exception as e: | |
| output_log.append(f"\nโ Error: {e}") | |
| return "\n".join(output_log) | |
| # === Gradio Interface === | |
| def create_interface(): | |
| with gr.Blocks(title="PromptWizard โ Qwen Gita Trainer") as demo: | |
| gr.Markdown(""" | |
| # ๐ง PromptWizard Qwen Fine-tuning โ Gita Edition | |
| Fine-tune **Qwen 0.5B** on your dataset [rahul7star/Gita](https://huggingface.co/datasets/rahul7star/Gita) | |
| and auto-upload to your model repo **rahul7star/Qwen0.5-3B-Gita**. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gpu_status = gr.Textbox( | |
| label="GPU Status", | |
| value=check_gpu_status(), | |
| interactive=False, | |
| ) | |
| model_name = gr.Textbox( | |
| label="Base Model", | |
| value="Qwen/Qwen2.5-0.5B", | |
| interactive=False, | |
| ) | |
| num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs") | |
| batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size") | |
| learning_rate = gr.Number(value=5e-5, label="Learning Rate") | |
| train_btn = gr.Button("๐ Start Fine-tuning", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="Training Log", | |
| lines=25, | |
| max_lines=40, | |
| value="Click 'Start Fine-tuning' to train on the Gita dataset and upload to your model repo.", | |
| ) | |
| train_btn.click( | |
| fn=train_model, | |
| inputs=[model_name, num_epochs, batch_size, learning_rate], | |
| outputs=output, | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() |