File size: 336 Bytes
d25b671 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
dataset:
max_length: 128
name: monology/pile-uncopyrighted
split: train
model:
device: cuda
name: EleutherAI/pythia-410m
transcoding:
batch_size: 512
bias: true
debug: false
hidden_multiplier: 4
layer_idx: 16
learning_rate: 0.02
model_type: Bilinear
n_batches: 20
n_batches_full: 3000
optimizer_type: Muon
|