| { | |
| "acc_batch_size": -1, | |
| "adam_betas": "0.9,0.999", | |
| "architectures": [ | |
| "TransformerForNLI" | |
| ], | |
| "attention_probs_dropout_prob": 0.1, | |
| "bias_proj": "", | |
| "bias_v1": "", | |
| "bias_v2": "", | |
| "bos_token_id": 0, | |
| "clip": 1.0, | |
| "cls": "linear", | |
| "dir": "data/snli_1.0/", | |
| "dropout": 0.1, | |
| "enc": "transformer", | |
| "eos_token_id": 2, | |
| "epochs": 5, | |
| "freeze_transformer": 0, | |
| "gpuid": 1, | |
| "gradient_checkpointing": false, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.1, | |
| "hidden_size": 768, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label_dict": "data/snli_1.0/snli.label.dict", | |
| "label_map_inv": { | |
| "0": "entailment", | |
| "1": "neutral", | |
| "2": "contradiction" | |
| }, | |
| "labels": [ | |
| "entailment", | |
| "neutral", | |
| "contradiction" | |
| ], | |
| "layer_norm_eps": 1e-05, | |
| "learning_rate": 3e-05, | |
| "load_file": "", | |
| "loss": "multiclass", | |
| "max_position_embeddings": 514, | |
| "model_type": "transformerfornli", | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 12, | |
| "num_label": 3, | |
| "optim": "adamw_fp16", | |
| "pad_token_id": 1, | |
| "param_init_type": "xavier_normal", | |
| "percent": 1.0, | |
| "print_every": 500, | |
| "save_file": "./models/robertabase_snli_seed1", | |
| "seed": 1, | |
| "train_data": "data/snli_1.0/snli.train.hdf5", | |
| "train_res": "", | |
| "transformer_type": "roberta-base", | |
| "type_vocab_size": 1, | |
| "val_data": "data/snli_1.0/snli.val.hdf5", | |
| "val_res": "", | |
| "vocab_size": 50265, | |
| "warmup": "linear", | |
| "warmup_perc": 0.1, | |
| "weight_decay": 0.01 | |
| } | |