Qwen-Training / app.py
rahul7star's picture
Update app.py
cfbfbbe verified
raw
history blame
8.05 kB
"""
PromptWizard Qwen Training — Gita Edition
Fine-tunes Qwen using rahul7star/Gita dataset (.csv)
Uploads trained model to rahul7star/Qwen0.5-3B-Gita on Hugging Face Hub
"""
import gradio as gr
import spaces
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
Trainer,
TrainingArguments,
)
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model, TaskType
from huggingface_hub import HfApi, HfFolder, Repository
import os, tempfile, shutil
import asyncio
import tempfile
import shutil
from huggingface_hub import HfApi, HfFolder, Repository
async def async_upload_model(local_dir, hf_repo, output_log):
try:
token = HfFolder.get_token()
api = HfApi()
api.create_repo(repo_id=hf_repo, exist_ok=True)
output_log.append(f"\n☁️ Starting async upload to: {hf_repo}")
with tempfile.TemporaryDirectory() as tmpdir:
repo = Repository(local_dir=tmpdir, clone_from=hf_repo, use_auth_token=token)
# Copy model files
shutil.copytree(local_dir, tmpdir, dirs_exist_ok=True)
repo.push_to_hub(commit_message="Upload fine-tuned model")
output_log.append("\n✅ Async upload complete!")
except Exception as e:
output_log.append(f"\n❌ Async upload error: {e}")
# === GPU check (Zero GPU compatible) ===
def check_gpu_status():
return "🚀 Zero GPU Ready - GPU will be allocated when training starts"
# === Main Training ===
@spaces.GPU(duration=300)
def train_model(model_name, num_epochs, batch_size, learning_rate, progress=gr.Progress()):
progress(0, desc="Initializing...")
output_log = []
try:
# ==== Device ====
device = "cuda" if torch.cuda.is_available() else "cpu"
output_log.append(f"🎮 Using device: {device}")
if device == "cuda":
output_log.append(f"✅ GPU: {torch.cuda.get_device_name(0)}")
# ==== Load dataset ====
progress(0.1, desc="Loading rahul7star/Gita dataset...")
output_log.append("\n📚 Loading dataset from rahul7star/Gita...")
dataset = load_dataset("rahul7star/Gita", split="train")
output_log.append(f" Loaded {len(dataset)} samples from CSV")
output_log.append(f" Columns: {dataset.column_names}")
# ==== Format data ====
def format_example(item):
text = (
item.get("text")
or item.get("content")
or " ".join(str(v) for v in item.values())
)
prompt = f"""<|system|>
You are a wise teacher interpreting Bhagavad Gita with deep insights.
<|user|>
{text}
<|assistant|>
"""
return {"text": prompt}
dataset = dataset.map(format_example)
output_log.append(f" ✅ Formatted {len(dataset)} examples")
# ==== Model & Tokenizer ====
progress(0.3, desc="Loading model & tokenizer...")
model_name = "Qwen/Qwen2.5-0.5B"
output_log.append(f"\n🤖 Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
low_cpu_mem_usage=True,
)
if device == "cuda":
model = model.to(device)
output_log.append(" ✅ Model loaded successfully")
# ==== LoRA ====
progress(0.4, desc="Configuring LoRA...")
output_log.append("\n⚙️ Setting up LoRA for efficient fine-tuning...")
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=8,
lora_alpha=16,
lora_dropout=0.1,
target_modules=["q_proj", "v_proj"],
bias="none",
)
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
output_log.append(f" Trainable params: {trainable_params:,}")
# ==== Tokenization + Labels ====
progress(0.5, desc="Tokenizing dataset...")
def tokenize_fn(examples):
tokenized = tokenizer(
examples["text"],
padding="max_length",
truncation=True,
max_length=256,
)
# Add labels for causal LM
tokenized["labels"] = tokenized["input_ids"].copy()
return tokenized
dataset = dataset.map(tokenize_fn, batched=True)
output_log.append(" ✅ Tokenization + labels done")
# ==== Training arguments ====
progress(0.6, desc="Setting up training...")
output_dir = "./qwen-gita-lora"
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_epochs,
per_device_train_batch_size=batch_size,
gradient_accumulation_steps=2,
warmup_steps=10,
logging_steps=5,
save_strategy="epoch",
fp16=device == "cuda",
optim="adamw_torch",
learning_rate=learning_rate,
max_steps=100,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset,
tokenizer=tokenizer,
)
# ==== Train ====
progress(0.7, desc="Training...")
output_log.append("\n🚀 Starting training...\n" + "=" * 50)
train_result = trainer.train()
# ==== Save model locally ====
progress(0.85, desc="Saving model...")
output_log.append("\n💾 Saving model locally...")
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
# ==== Async upload ====
hf_repo = "rahul7star/Qwen0.5-3B-Gita"
asyncio.create_task(async_upload_model(output_dir, hf_repo, output_log))
progress(1.0, desc="Complete!")
output_log.append("\n✅ Training complete & model uploaded successfully!")
except Exception as e:
output_log.append(f"\n❌ Error: {e}")
return "\n".join(output_log)
# === Gradio Interface ===
def create_interface():
with gr.Blocks(title="PromptWizard — Qwen Gita Trainer") as demo:
gr.Markdown("""
# 🧘 PromptWizard Qwen Fine-tuning — Gita Edition
Fine-tune **Qwen 0.5B** on your dataset [rahul7star/Gita](https://huggingface.co/datasets/rahul7star/Gita)
and auto-upload to your model repo **rahul7star/Qwen0.5-3B-Gita**.
""")
with gr.Row():
with gr.Column():
gpu_status = gr.Textbox(
label="GPU Status",
value=check_gpu_status(),
interactive=False,
)
model_name = gr.Textbox(
label="Base Model",
value="Qwen/Qwen2.5-0.5B",
interactive=False,
)
num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs")
batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size")
learning_rate = gr.Number(value=5e-5, label="Learning Rate")
train_btn = gr.Button("🚀 Start Fine-tuning", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Training Log",
lines=25,
max_lines=40,
value="Click 'Start Fine-tuning' to train on the Gita dataset and upload to your model repo.",
)
train_btn.click(
fn=train_model,
inputs=[model_name, num_epochs, batch_size, learning_rate],
outputs=output,
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()