PromptWizard Bot commited on
Commit
ea0a42e
·
1 Parent(s): 543b65a

Use smaller model and simpler config for Zero GPU compatibility

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -75,10 +75,9 @@ You are a mathematics expert. Solve grade school math problems step by step.
75
  progress(0.3, desc="Loading model and tokenizer...")
76
  output_log.append(f"\n🤖 Loading {model_name}...")
77
 
78
- # Use smaller model for demo
79
- if "7B" in model_name:
80
- model_name = "Qwen/Qwen2.5-1.5B" # Use smaller model for Zero GPU demo
81
- output_log.append(" Note: Using 1.5B model for Zero GPU compatibility")
82
 
83
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
84
  if tokenizer.pad_token is None:
@@ -138,14 +137,15 @@ You are a mathematics expert. Solve grade school math problems step by step.
138
  output_dir="./qwen-promptwizard-zerogpu",
139
  num_train_epochs=num_epochs,
140
  per_device_train_batch_size=batch_size,
141
- gradient_accumulation_steps=4,
142
- warmup_steps=50,
143
- logging_steps=10,
144
  save_strategy="no", # Don't save during demo
145
- fp16=True,
146
- gradient_checkpointing=True,
147
  optim="adamw_torch",
148
  learning_rate=learning_rate,
 
149
  )
150
 
151
  # Create trainer
@@ -209,11 +209,11 @@ def create_interface():
209
 
210
  model_name = gr.Dropdown(
211
  choices=[
 
212
  "Qwen/Qwen2.5-1.5B",
213
- "Qwen/Qwen2.5-7B",
214
  ],
215
- value="Qwen/Qwen2.5-1.5B",
216
- label="Model (1.5B recommended for Zero GPU)"
217
  )
218
 
219
  num_epochs = gr.Slider(
 
75
  progress(0.3, desc="Loading model and tokenizer...")
76
  output_log.append(f"\n🤖 Loading {model_name}...")
77
 
78
+ # Always use smaller model for Zero GPU demo
79
+ model_name = "Qwen/Qwen2.5-0.5B" # Use smallest model for Zero GPU
80
+ output_log.append(" Note: Using 0.5B model for Zero GPU compatibility")
 
81
 
82
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
83
  if tokenizer.pad_token is None:
 
137
  output_dir="./qwen-promptwizard-zerogpu",
138
  num_train_epochs=num_epochs,
139
  per_device_train_batch_size=batch_size,
140
+ gradient_accumulation_steps=2, # Reduced for Zero GPU
141
+ warmup_steps=10, # Reduced warmup
142
+ logging_steps=5,
143
  save_strategy="no", # Don't save during demo
144
+ fp16=device == "cuda", # Only use fp16 on GPU
145
+ gradient_checkpointing=False, # Disable for simplicity
146
  optim="adamw_torch",
147
  learning_rate=learning_rate,
148
+ max_steps=50, # Limit steps for demo
149
  )
150
 
151
  # Create trainer
 
209
 
210
  model_name = gr.Dropdown(
211
  choices=[
212
+ "Qwen/Qwen2.5-0.5B",
213
  "Qwen/Qwen2.5-1.5B",
 
214
  ],
215
+ value="Qwen/Qwen2.5-0.5B",
216
+ label="Model (0.5B works best for Zero GPU)"
217
  )
218
 
219
  num_epochs = gr.Slider(