Remove learning rate scheduler in deepspeed config to avoid conflict (#909)
Browse files- deepspeed/zero1.json +0 -10
- deepspeed/zero2.json +0 -10
- deepspeed/zero3.json +0 -10
deepspeed/zero1.json
CHANGED
|
@@ -24,16 +24,6 @@
|
|
| 24 |
"weight_decay": "auto"
|
| 25 |
}
|
| 26 |
},
|
| 27 |
-
"scheduler": {
|
| 28 |
-
"type": "WarmupDecayLR",
|
| 29 |
-
"params": {
|
| 30 |
-
"warmup_min_lr": "auto",
|
| 31 |
-
"warmup_max_lr": "auto",
|
| 32 |
-
"warmup_num_steps": "auto",
|
| 33 |
-
"warmup_type": "linear",
|
| 34 |
-
"total_num_steps": "auto"
|
| 35 |
-
}
|
| 36 |
-
},
|
| 37 |
"gradient_accumulation_steps": "auto",
|
| 38 |
"train_batch_size": "auto",
|
| 39 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
|
| 24 |
"weight_decay": "auto"
|
| 25 |
}
|
| 26 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
"gradient_accumulation_steps": "auto",
|
| 28 |
"train_batch_size": "auto",
|
| 29 |
"train_micro_batch_size_per_gpu": "auto",
|
deepspeed/zero2.json
CHANGED
|
@@ -28,16 +28,6 @@
|
|
| 28 |
"weight_decay": "auto"
|
| 29 |
}
|
| 30 |
},
|
| 31 |
-
"scheduler": {
|
| 32 |
-
"type": "WarmupDecayLR",
|
| 33 |
-
"params": {
|
| 34 |
-
"warmup_min_lr": "auto",
|
| 35 |
-
"warmup_max_lr": "auto",
|
| 36 |
-
"warmup_num_steps": "auto",
|
| 37 |
-
"warmup_type": "linear",
|
| 38 |
-
"total_num_steps": "auto"
|
| 39 |
-
}
|
| 40 |
-
},
|
| 41 |
"gradient_accumulation_steps": "auto",
|
| 42 |
"train_batch_size": "auto",
|
| 43 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
|
| 28 |
"weight_decay": "auto"
|
| 29 |
}
|
| 30 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
"gradient_accumulation_steps": "auto",
|
| 32 |
"train_batch_size": "auto",
|
| 33 |
"train_micro_batch_size_per_gpu": "auto",
|
deepspeed/zero3.json
CHANGED
|
@@ -32,16 +32,6 @@
|
|
| 32 |
"weight_decay": "auto"
|
| 33 |
}
|
| 34 |
},
|
| 35 |
-
"scheduler": {
|
| 36 |
-
"type": "WarmupDecayLR",
|
| 37 |
-
"params": {
|
| 38 |
-
"warmup_min_lr": "auto",
|
| 39 |
-
"warmup_max_lr": "auto",
|
| 40 |
-
"warmup_num_steps": "auto",
|
| 41 |
-
"warmup_type": "linear",
|
| 42 |
-
"total_num_steps": "auto"
|
| 43 |
-
}
|
| 44 |
-
},
|
| 45 |
"gradient_accumulation_steps": "auto",
|
| 46 |
"train_batch_size": "auto",
|
| 47 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
|
| 32 |
"weight_decay": "auto"
|
| 33 |
}
|
| 34 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
"gradient_accumulation_steps": "auto",
|
| 36 |
"train_batch_size": "auto",
|
| 37 |
"train_micro_batch_size_per_gpu": "auto",
|