Qwen-Training

Running

PromptWizard Bot commited on Aug 7

Commit

543b65a

1 Parent(s): 4bdca86

Fix Zero GPU compatibility - remove bitsandbytes and 8-bit quantization

Files changed (2) hide show

app.py CHANGED Viewed

@@ -12,12 +12,10 @@ from peft import LoraConfig, get_peft_model, TaskType
 import json
 import os
-# Check if GPU is available
 def check_gpu_status():
-    if torch.cuda.is_available():
-        return f"✅ GPU Available: {torch.cuda.get_device_name(0)} ({torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB)"
-    else:
-        return "⚠️ No GPU detected - Zero GPU will allocate when training starts"
 @spaces.GPU(duration=300)  # Request GPU for 5 minutes (can extend if needed)
 def train_model(model_name, num_epochs, batch_size, learning_rate, progress=gr.Progress()):
@@ -86,15 +84,18 @@ You are a mathematics expert. Solve grade school math problems step by step.
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
-        # Load model with 8-bit quantization
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
             trust_remote_code=True,
-            load_in_8bit=True,
-            device_map="auto",
-            torch_dtype=torch.float16
         )
         output_log.append("   Model loaded successfully")
         # Configure LoRA

 import json
 import os
+# Check if GPU is available (Zero GPU safe)
 def check_gpu_status():
+    # Don't check CUDA at module load time for Zero GPU compatibility
+    return "🚀 Zero GPU Ready - GPU will be allocated when training starts"
 @spaces.GPU(duration=300)  # Request GPU for 5 minutes (can extend if needed)
 def train_model(model_name, num_epochs, batch_size, learning_rate, progress=gr.Progress()):
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+        # Load model without quantization for Zero GPU compatibility
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
             trust_remote_code=True,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            low_cpu_mem_usage=True
         )
+        # Move model to GPU if available
+        if device == "cuda":
+            model = model.to(device)
         output_log.append("   Model loaded successfully")
         # Configure LoRA

requirements.txt CHANGED Viewed

@@ -5,6 +5,5 @@ transformers==4.36.2
 datasets==2.16.1
 peft==0.7.1
 accelerate==0.25.0
-bitsandbytes==0.41.3
 numpy==1.24.3
 sentencepiece==0.1.99

 datasets==2.16.1
 peft==0.7.1
 accelerate==0.25.0
 numpy==1.24.3
 sentencepiece==0.1.99