| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 96, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010416666666666666, | |
| "grad_norm": 23.252944189766428, | |
| "learning_rate": 8.639811904061041e-08, | |
| "logits/chosen": -2.590585231781006, | |
| "logits/rejected": -2.5664222240448, | |
| "logps/chosen": -80.29847717285156, | |
| "logps/rejected": -53.10200881958008, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.10416666666666667, | |
| "grad_norm": 21.182382477045586, | |
| "learning_rate": 8.639811904061041e-07, | |
| "logits/chosen": -2.5559909343719482, | |
| "logits/rejected": -2.5379226207733154, | |
| "logps/chosen": -87.82003021240234, | |
| "logps/rejected": -80.9332046508789, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.2013888955116272, | |
| "rewards/chosen": 0.00335866492241621, | |
| "rewards/margins": -0.0003586374514270574, | |
| "rewards/rejected": 0.003717302344739437, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 17.233720974528225, | |
| "learning_rate": 7.635182612891153e-07, | |
| "logits/chosen": -2.5788445472717285, | |
| "logits/rejected": -2.528242588043213, | |
| "logps/chosen": -101.24139404296875, | |
| "logps/rejected": -88.24673461914062, | |
| "loss": 0.6743, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": 0.07447633892297745, | |
| "rewards/margins": 0.016661062836647034, | |
| "rewards/rejected": 0.057815272361040115, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 25.6903065091126, | |
| "learning_rate": 6.630553321721264e-07, | |
| "logits/chosen": -2.4120750427246094, | |
| "logits/rejected": -2.4285130500793457, | |
| "logps/chosen": -67.22891235351562, | |
| "logps/rejected": -77.24456787109375, | |
| "loss": 0.6697, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": 0.007355662528425455, | |
| "rewards/margins": 0.08921505510807037, | |
| "rewards/rejected": -0.08185939490795135, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 19.22554951858603, | |
| "learning_rate": 5.625924030551376e-07, | |
| "logits/chosen": -2.4893252849578857, | |
| "logits/rejected": -2.4751393795013428, | |
| "logps/chosen": -74.36286163330078, | |
| "logps/rejected": -75.44730377197266, | |
| "loss": 0.6618, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.07824570685625076, | |
| "rewards/margins": 0.08888493478298187, | |
| "rewards/rejected": -0.16713064908981323, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5208333333333334, | |
| "grad_norm": 19.08251678108842, | |
| "learning_rate": 4.6212947393814867e-07, | |
| "logits/chosen": -2.4168150424957275, | |
| "logits/rejected": -2.4291889667510986, | |
| "logps/chosen": -52.590057373046875, | |
| "logps/rejected": -62.784461975097656, | |
| "loss": 0.6552, | |
| "rewards/accuracies": 0.23125000298023224, | |
| "rewards/chosen": -0.059627026319503784, | |
| "rewards/margins": 0.07718921452760696, | |
| "rewards/rejected": -0.13681624829769135, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 21.66166836395161, | |
| "learning_rate": 3.6166654482115984e-07, | |
| "logits/chosen": -2.489243984222412, | |
| "logits/rejected": -2.4673056602478027, | |
| "logps/chosen": -82.04798889160156, | |
| "logps/rejected": -87.74610137939453, | |
| "loss": 0.6569, | |
| "rewards/accuracies": 0.33125001192092896, | |
| "rewards/chosen": -0.10516528785228729, | |
| "rewards/margins": 0.19169361889362335, | |
| "rewards/rejected": -0.29685890674591064, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7291666666666666, | |
| "grad_norm": 19.133621385765668, | |
| "learning_rate": 2.6120361570417096e-07, | |
| "logits/chosen": -2.452115535736084, | |
| "logits/rejected": -2.4336700439453125, | |
| "logps/chosen": -96.10133361816406, | |
| "logps/rejected": -90.39111328125, | |
| "loss": 0.6395, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.008597126230597496, | |
| "rewards/margins": 0.1830345243215561, | |
| "rewards/rejected": -0.17443740367889404, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 34.22517043366701, | |
| "learning_rate": 1.6074068658718216e-07, | |
| "logits/chosen": -2.4384665489196777, | |
| "logits/rejected": -2.3800644874572754, | |
| "logps/chosen": -83.1066665649414, | |
| "logps/rejected": -85.15121459960938, | |
| "loss": 0.6206, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": 0.0896192193031311, | |
| "rewards/margins": 0.29328036308288574, | |
| "rewards/rejected": -0.20366115868091583, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 28.270052144663545, | |
| "learning_rate": 6.027775747019331e-08, | |
| "logits/chosen": -2.3832895755767822, | |
| "logits/rejected": -2.3764870166778564, | |
| "logps/chosen": -53.896759033203125, | |
| "logps/rejected": -69.18147277832031, | |
| "loss": 0.6399, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": 0.0029330668039619923, | |
| "rewards/margins": 0.20463672280311584, | |
| "rewards/rejected": -0.20170363783836365, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 96, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6540692721803983, | |
| "train_runtime": 977.0227, | |
| "train_samples_per_second": 6.257, | |
| "train_steps_per_second": 0.098 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 96, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |