# PromptWizard Qwen Fine-tuning on HuggingFace

This notebook fine-tunes Qwen models using GSM8K dataset with PromptWizard methodology.
Run this on HuggingFace or Google Colab with GPU support.

In [None]:
# Install required packages
!pip install -q transformers==4.36.2 datasets==2.16.1 peft==0.7.1 accelerate==0.25.0 bitsandbytes==0.41.3

In [None]:
import torch
import json
from datasets import Dataset, load_dataset
from transformers import (
 AutoModelForCausalLM,
 AutoTokenizer,
 TrainingArguments,
 Trainer,
 DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType

# Check GPU availability
if torch.cuda.is_available():
 print(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
 print(f" Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
 print("⚠️ No GPU detected. Training will be slow.")

In [None]:
# Load and prepare GSM8K dataset
print("Loading GSM8K dataset...")
dataset = load_dataset("openai/gsm8k", "main")

def format_prompt(item):
 """Format GSM8K item for training"""
 prompt = f"""<|system|>
You are a mathematics expert. Solve grade school math problems step by step.
<|user|>
{item['question']}
<|assistant|>
Let me solve this step by step.

{item['answer']}"""
 return {"text": prompt}

# Process datasets (using smaller subset for demo)
train_data = dataset['train'].select(range(min(1000, len(dataset['train']))))
eval_data = dataset['test'].select(range(min(100, len(dataset['test']))))

train_dataset = train_data.map(format_prompt)
eval_dataset = eval_data.map(format_prompt)

print(f"Train samples: {len(train_dataset)}")
print(f"Eval samples: {len(eval_dataset)}")

In [None]:
# Load model and tokenizer
MODEL_NAME = "Qwen/Qwen2.5-1.5B" # Using smaller model for faster training

print(f"Loading {MODEL_NAME}...")

tokenizer = AutoTokenizer.from_pretrained(
 MODEL_NAME,
 trust_remote_code=True,
 padding_side="left"
)

if tokenizer.pad_token is None:
 tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
 MODEL_NAME,
 trust_remote_code=True,
 torch_dtype=torch.float16,
 device_map="auto",
 load_in_8bit=True
)

# Configure LoRA
lora_config = LoraConfig(
 task_type=TaskType.CAUSAL_LM,
 r=8, # Lower rank for faster training
 lora_alpha=16,
 lora_dropout=0.1,
 target_modules=["q_proj", "v_proj"],
 bias="none"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
# Tokenize datasets
def tokenize_function(examples):
 return tokenizer(
 examples["text"],
 padding="max_length",
 truncation=True,
 max_length=512
 )

print("Tokenizing datasets...")
train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)

# Data collator
data_collator = DataCollatorForLanguageModeling(
 tokenizer=tokenizer,
 mlm=False,
 pad_to_multiple_of=8
)

In [None]:
# Training arguments
training_args = TrainingArguments(
 output_dir="./qwen-promptwizard",
 num_train_epochs=1, # Quick training for demo
 per_device_train_batch_size=4,
 per_device_eval_batch_size=4,
 gradient_accumulation_steps=4,
 warmup_steps=100,
 logging_steps=10,
 save_steps=100,
 evaluation_strategy="steps",
 eval_steps=50,
 save_total_limit=2,
 load_best_model_at_end=True,
 fp16=True,
 push_to_hub=False, # Set to True to push to HF Hub
 gradient_checkpointing=True,
 optim="adamw_torch",
 learning_rate=2e-4,
 lr_scheduler_type="cosine",
)

# Initialize trainer
trainer = Trainer(
 model=model,
 args=training_args,
 train_dataset=train_dataset,
 eval_dataset=eval_dataset,
 data_collator=data_collator,
 tokenizer=tokenizer,
)

In [None]:
# Start training
print("Starting training...")
print(f"Using {torch.cuda.device_count()} GPU(s)")

trainer.train()

print("\n✅ Training complete!")

In [None]:
# Save model
print("Saving model...")
trainer.save_model("./qwen-promptwizard-final")
tokenizer.save_pretrained("./qwen-promptwizard-final")

print("Model saved to ./qwen-promptwizard-final")

In [None]:
# Test the fine-tuned model
from transformers import pipeline

# Load the fine-tuned model
generator = pipeline(
 "text-generation",
 model="./qwen-promptwizard-final",
 tokenizer=tokenizer,
 device_map="auto"
)

# Test prompt
test_prompt = """<|system|>
You are a mathematics expert. Solve grade school math problems step by step.
<|user|>
Janet has 3 apples. She buys 5 more apples from the store. How many apples does she have now?
<|assistant|>"""

# Generate response
response = generator(
 test_prompt,
 max_new_tokens=200,
 temperature=0.7,
 do_sample=True
)

print("Test Response:")
print(response[0]['generated_text'])

## Next Steps

1. **Push to HuggingFace Hub**: Set `push_to_hub=True` in training arguments
2. **Increase Training**: Use more epochs and larger dataset for better results
3. **Use Larger Model**: Try Qwen2.5-7B for better performance (needs more GPU memory)
4. **Fine-tune Hyperparameters**: Adjust learning rate, LoRA rank, etc.

The trained model can now be used with PromptWizard for enhanced prompt optimization!