Spaces:
Running
Running
PromptWizard Bot
commited on
Commit
·
ea0a42e
1
Parent(s):
543b65a
Use smaller model and simpler config for Zero GPU compatibility
Browse files
app.py
CHANGED
|
@@ -75,10 +75,9 @@ You are a mathematics expert. Solve grade school math problems step by step.
|
|
| 75 |
progress(0.3, desc="Loading model and tokenizer...")
|
| 76 |
output_log.append(f"\n🤖 Loading {model_name}...")
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
output_log.append(" Note: Using 1.5B model for Zero GPU compatibility")
|
| 82 |
|
| 83 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 84 |
if tokenizer.pad_token is None:
|
|
@@ -138,14 +137,15 @@ You are a mathematics expert. Solve grade school math problems step by step.
|
|
| 138 |
output_dir="./qwen-promptwizard-zerogpu",
|
| 139 |
num_train_epochs=num_epochs,
|
| 140 |
per_device_train_batch_size=batch_size,
|
| 141 |
-
gradient_accumulation_steps=
|
| 142 |
-
warmup_steps=
|
| 143 |
-
logging_steps=
|
| 144 |
save_strategy="no", # Don't save during demo
|
| 145 |
-
fp16=
|
| 146 |
-
gradient_checkpointing=
|
| 147 |
optim="adamw_torch",
|
| 148 |
learning_rate=learning_rate,
|
|
|
|
| 149 |
)
|
| 150 |
|
| 151 |
# Create trainer
|
|
@@ -209,11 +209,11 @@ def create_interface():
|
|
| 209 |
|
| 210 |
model_name = gr.Dropdown(
|
| 211 |
choices=[
|
|
|
|
| 212 |
"Qwen/Qwen2.5-1.5B",
|
| 213 |
-
"Qwen/Qwen2.5-7B",
|
| 214 |
],
|
| 215 |
-
value="Qwen/Qwen2.5-
|
| 216 |
-
label="Model (
|
| 217 |
)
|
| 218 |
|
| 219 |
num_epochs = gr.Slider(
|
|
|
|
| 75 |
progress(0.3, desc="Loading model and tokenizer...")
|
| 76 |
output_log.append(f"\n🤖 Loading {model_name}...")
|
| 77 |
|
| 78 |
+
# Always use smaller model for Zero GPU demo
|
| 79 |
+
model_name = "Qwen/Qwen2.5-0.5B" # Use smallest model for Zero GPU
|
| 80 |
+
output_log.append(" Note: Using 0.5B model for Zero GPU compatibility")
|
|
|
|
| 81 |
|
| 82 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 83 |
if tokenizer.pad_token is None:
|
|
|
|
| 137 |
output_dir="./qwen-promptwizard-zerogpu",
|
| 138 |
num_train_epochs=num_epochs,
|
| 139 |
per_device_train_batch_size=batch_size,
|
| 140 |
+
gradient_accumulation_steps=2, # Reduced for Zero GPU
|
| 141 |
+
warmup_steps=10, # Reduced warmup
|
| 142 |
+
logging_steps=5,
|
| 143 |
save_strategy="no", # Don't save during demo
|
| 144 |
+
fp16=device == "cuda", # Only use fp16 on GPU
|
| 145 |
+
gradient_checkpointing=False, # Disable for simplicity
|
| 146 |
optim="adamw_torch",
|
| 147 |
learning_rate=learning_rate,
|
| 148 |
+
max_steps=50, # Limit steps for demo
|
| 149 |
)
|
| 150 |
|
| 151 |
# Create trainer
|
|
|
|
| 209 |
|
| 210 |
model_name = gr.Dropdown(
|
| 211 |
choices=[
|
| 212 |
+
"Qwen/Qwen2.5-0.5B",
|
| 213 |
"Qwen/Qwen2.5-1.5B",
|
|
|
|
| 214 |
],
|
| 215 |
+
value="Qwen/Qwen2.5-0.5B",
|
| 216 |
+
label="Model (0.5B works best for Zero GPU)"
|
| 217 |
)
|
| 218 |
|
| 219 |
num_epochs = gr.Slider(
|