| dataset: | |
| max_length: 128 | |
| name: monology/pile-uncopyrighted | |
| split: train | |
| model: | |
| device: cuda | |
| name: EleutherAI/pythia-410m | |
| transcoding: | |
| batch_size: 512 | |
| bias: true | |
| debug: false | |
| hidden_multiplier: 4 | |
| layer_idx: 22 | |
| learning_rate: 0.02 | |
| model_type: Bilinear | |
| n_batches: 20 | |
| n_batches_full: 3000 | |
| optimizer_type: Muon | |