| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016025641025641024, | |
| "grad_norm": 6.815881626720253, | |
| "learning_rate": 1.0256410256410257e-06, | |
| "loss": 0.9447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4387686252593994, | |
| "step": 5, | |
| "valid_targets_mean": 2775.8, | |
| "valid_targets_min": 999 | |
| }, | |
| { | |
| "epoch": 0.03205128205128205, | |
| "grad_norm": 5.611249301686906, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.9287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4332679808139801, | |
| "step": 10, | |
| "valid_targets_mean": 3038.2, | |
| "valid_targets_min": 1540 | |
| }, | |
| { | |
| "epoch": 0.04807692307692308, | |
| "grad_norm": 4.11592117125762, | |
| "learning_rate": 3.58974358974359e-06, | |
| "loss": 0.9119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4442380368709564, | |
| "step": 15, | |
| "valid_targets_mean": 2672.2, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 2.4171662234913085, | |
| "learning_rate": 4.871794871794872e-06, | |
| "loss": 0.8644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46687430143356323, | |
| "step": 20, | |
| "valid_targets_mean": 3144.1, | |
| "valid_targets_min": 1686 | |
| }, | |
| { | |
| "epoch": 0.08012820512820513, | |
| "grad_norm": 1.5505428723718302, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 0.8381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4025786817073822, | |
| "step": 25, | |
| "valid_targets_mean": 2429.6, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 1.0869371534735193, | |
| "learning_rate": 7.435897435897437e-06, | |
| "loss": 0.814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4357997179031372, | |
| "step": 30, | |
| "valid_targets_mean": 3333.8, | |
| "valid_targets_min": 2413 | |
| }, | |
| { | |
| "epoch": 0.11217948717948718, | |
| "grad_norm": 0.950971876737434, | |
| "learning_rate": 8.717948717948719e-06, | |
| "loss": 0.7732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4076507091522217, | |
| "step": 35, | |
| "valid_targets_mean": 3007.8, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.7582939151199518, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4013994336128235, | |
| "step": 40, | |
| "valid_targets_mean": 2966.4, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 0.14423076923076922, | |
| "grad_norm": 0.6819204506609364, | |
| "learning_rate": 1.1282051282051283e-05, | |
| "loss": 0.7513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3817106783390045, | |
| "step": 45, | |
| "valid_targets_mean": 2599.5, | |
| "valid_targets_min": 1385 | |
| }, | |
| { | |
| "epoch": 0.16025641025641027, | |
| "grad_norm": 0.6391357961237331, | |
| "learning_rate": 1.2564102564102565e-05, | |
| "loss": 0.74, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37557607889175415, | |
| "step": 50, | |
| "valid_targets_mean": 2784.7, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 0.1762820512820513, | |
| "grad_norm": 0.5551476820310136, | |
| "learning_rate": 1.3846153846153847e-05, | |
| "loss": 0.718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3204777240753174, | |
| "step": 55, | |
| "valid_targets_mean": 2567.1, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.6077071082054613, | |
| "learning_rate": 1.5128205128205129e-05, | |
| "loss": 0.7196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35047832131385803, | |
| "step": 60, | |
| "valid_targets_mean": 2480.4, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 0.6028149033641808, | |
| "learning_rate": 1.641025641025641e-05, | |
| "loss": 0.7065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3641282916069031, | |
| "step": 65, | |
| "valid_targets_mean": 2665.4, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 0.22435897435897437, | |
| "grad_norm": 0.5364533780203461, | |
| "learning_rate": 1.7692307692307694e-05, | |
| "loss": 0.6856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3332011103630066, | |
| "step": 70, | |
| "valid_targets_mean": 2644.1, | |
| "valid_targets_min": 1139 | |
| }, | |
| { | |
| "epoch": 0.2403846153846154, | |
| "grad_norm": 0.5296770070879561, | |
| "learning_rate": 1.8974358974358975e-05, | |
| "loss": 0.6856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3156450092792511, | |
| "step": 75, | |
| "valid_targets_mean": 2270.6, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.5261553976863995, | |
| "learning_rate": 2.025641025641026e-05, | |
| "loss": 0.6877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30606338381767273, | |
| "step": 80, | |
| "valid_targets_mean": 2451.2, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 0.2724358974358974, | |
| "grad_norm": 0.49894510222939537, | |
| "learning_rate": 2.153846153846154e-05, | |
| "loss": 0.6759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28691911697387695, | |
| "step": 85, | |
| "valid_targets_mean": 2653.8, | |
| "valid_targets_min": 1086 | |
| }, | |
| { | |
| "epoch": 0.28846153846153844, | |
| "grad_norm": 0.5019026732938922, | |
| "learning_rate": 2.2820512820512822e-05, | |
| "loss": 0.6572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3042421042919159, | |
| "step": 90, | |
| "valid_targets_mean": 2732.5, | |
| "valid_targets_min": 1498 | |
| }, | |
| { | |
| "epoch": 0.30448717948717946, | |
| "grad_norm": 0.5087948648074874, | |
| "learning_rate": 2.4102564102564103e-05, | |
| "loss": 0.6372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3090476393699646, | |
| "step": 95, | |
| "valid_targets_mean": 2741.7, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.5093262106093662, | |
| "learning_rate": 2.5384615384615386e-05, | |
| "loss": 0.654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3319040834903717, | |
| "step": 100, | |
| "valid_targets_mean": 3045.6, | |
| "valid_targets_min": 1493 | |
| }, | |
| { | |
| "epoch": 0.33653846153846156, | |
| "grad_norm": 0.5216909158213553, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.6542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3026030361652374, | |
| "step": 105, | |
| "valid_targets_mean": 2332.0, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 0.3525641025641026, | |
| "grad_norm": 0.5594308657153152, | |
| "learning_rate": 2.794871794871795e-05, | |
| "loss": 0.6469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3094126582145691, | |
| "step": 110, | |
| "valid_targets_mean": 2481.8, | |
| "valid_targets_min": 1253 | |
| }, | |
| { | |
| "epoch": 0.3685897435897436, | |
| "grad_norm": 0.5697299366312509, | |
| "learning_rate": 2.923076923076923e-05, | |
| "loss": 0.6457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.345976859331131, | |
| "step": 115, | |
| "valid_targets_mean": 2500.8, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.481422178674973, | |
| "learning_rate": 3.0512820512820514e-05, | |
| "loss": 0.6334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3132162094116211, | |
| "step": 120, | |
| "valid_targets_mean": 3228.7, | |
| "valid_targets_min": 1906 | |
| }, | |
| { | |
| "epoch": 0.40064102564102566, | |
| "grad_norm": 0.4861239944985147, | |
| "learning_rate": 3.1794871794871795e-05, | |
| "loss": 0.6196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32609227299690247, | |
| "step": 125, | |
| "valid_targets_mean": 3174.4, | |
| "valid_targets_min": 1632 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 0.5222849869950157, | |
| "learning_rate": 3.307692307692308e-05, | |
| "loss": 0.6388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31350448727607727, | |
| "step": 130, | |
| "valid_targets_mean": 2809.1, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 0.4326923076923077, | |
| "grad_norm": 0.5379788899851837, | |
| "learning_rate": 3.435897435897436e-05, | |
| "loss": 0.6313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3066648244857788, | |
| "step": 135, | |
| "valid_targets_mean": 2802.4, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 0.5420454343087308, | |
| "learning_rate": 3.5641025641025646e-05, | |
| "loss": 0.6318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30834197998046875, | |
| "step": 140, | |
| "valid_targets_mean": 2687.2, | |
| "valid_targets_min": 1391 | |
| }, | |
| { | |
| "epoch": 0.46474358974358976, | |
| "grad_norm": 0.5214859365271105, | |
| "learning_rate": 3.692307692307693e-05, | |
| "loss": 0.6175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33880215883255005, | |
| "step": 145, | |
| "valid_targets_mean": 2832.4, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 0.4807692307692308, | |
| "grad_norm": 0.46621326586782824, | |
| "learning_rate": 3.820512820512821e-05, | |
| "loss": 0.6109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2916651666164398, | |
| "step": 150, | |
| "valid_targets_mean": 3312.8, | |
| "valid_targets_min": 1660 | |
| }, | |
| { | |
| "epoch": 0.4967948717948718, | |
| "grad_norm": 0.5182511120208516, | |
| "learning_rate": 3.948717948717949e-05, | |
| "loss": 0.6284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3359749913215637, | |
| "step": 155, | |
| "valid_targets_mean": 2667.7, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.5076341907952538, | |
| "learning_rate": 3.999954938420724e-05, | |
| "loss": 0.6205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29317110776901245, | |
| "step": 160, | |
| "valid_targets_mean": 2856.9, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 0.5288461538461539, | |
| "grad_norm": 0.4968550139113613, | |
| "learning_rate": 3.9996795694563096e-05, | |
| "loss": 0.6311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3237057626247406, | |
| "step": 165, | |
| "valid_targets_mean": 3255.5, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 0.5448717948717948, | |
| "grad_norm": 0.5432075062569038, | |
| "learning_rate": 3.9991539001644015e-05, | |
| "loss": 0.6201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3103916049003601, | |
| "step": 170, | |
| "valid_targets_mean": 2773.2, | |
| "valid_targets_min": 1355 | |
| }, | |
| { | |
| "epoch": 0.5608974358974359, | |
| "grad_norm": 0.5320854842566946, | |
| "learning_rate": 3.998377996343139e-05, | |
| "loss": 0.6195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30836841464042664, | |
| "step": 175, | |
| "valid_targets_mean": 2700.5, | |
| "valid_targets_min": 1612 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.4787446461742092, | |
| "learning_rate": 3.9973519551125746e-05, | |
| "loss": 0.6079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2759506106376648, | |
| "step": 180, | |
| "valid_targets_mean": 3075.6, | |
| "valid_targets_min": 1686 | |
| }, | |
| { | |
| "epoch": 0.592948717948718, | |
| "grad_norm": 0.5135765340971998, | |
| "learning_rate": 3.99607590490251e-05, | |
| "loss": 0.6211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3288421928882599, | |
| "step": 185, | |
| "valid_targets_mean": 3388.7, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 0.6089743589743589, | |
| "grad_norm": 0.7381909636900029, | |
| "learning_rate": 3.994550005436431e-05, | |
| "loss": 0.6308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31162500381469727, | |
| "step": 190, | |
| "valid_targets_mean": 2567.4, | |
| "valid_targets_min": 1067 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.526645919780682, | |
| "learning_rate": 3.992774447711503e-05, | |
| "loss": 0.6138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31996357440948486, | |
| "step": 195, | |
| "valid_targets_mean": 2892.8, | |
| "valid_targets_min": 1456 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.5012857363859938, | |
| "learning_rate": 3.990749453974676e-05, | |
| "loss": 0.6045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2793130874633789, | |
| "step": 200, | |
| "valid_targets_mean": 2781.1, | |
| "valid_targets_min": 1162 | |
| }, | |
| { | |
| "epoch": 0.657051282051282, | |
| "grad_norm": 0.5871837857805097, | |
| "learning_rate": 3.9884752776948564e-05, | |
| "loss": 0.6005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.309184730052948, | |
| "step": 205, | |
| "valid_targets_mean": 2420.6, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 0.6730769230769231, | |
| "grad_norm": 0.5541790395378353, | |
| "learning_rate": 3.985952203531184e-05, | |
| "loss": 0.6318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2788581848144531, | |
| "step": 210, | |
| "valid_targets_mean": 2503.8, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 0.6891025641025641, | |
| "grad_norm": 0.5136635755544906, | |
| "learning_rate": 3.983180547297404e-05, | |
| "loss": 0.6149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2936289310455322, | |
| "step": 215, | |
| "valid_targets_mean": 2670.8, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 0.5526474169879116, | |
| "learning_rate": 3.9801606559223286e-05, | |
| "loss": 0.6043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29138144850730896, | |
| "step": 220, | |
| "valid_targets_mean": 2094.0, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 0.7211538461538461, | |
| "grad_norm": 0.5005790560858441, | |
| "learning_rate": 3.9768929074064206e-05, | |
| "loss": 0.6046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3046284317970276, | |
| "step": 225, | |
| "valid_targets_mean": 2856.6, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 0.7371794871794872, | |
| "grad_norm": 0.5319298199331578, | |
| "learning_rate": 3.973377710774474e-05, | |
| "loss": 0.5987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2717939019203186, | |
| "step": 230, | |
| "valid_targets_mean": 2771.9, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 0.7532051282051282, | |
| "grad_norm": 0.5462975979383605, | |
| "learning_rate": 3.9696155060244166e-05, | |
| "loss": 0.5994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2797490358352661, | |
| "step": 235, | |
| "valid_targets_mean": 2263.8, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.5625047884049433, | |
| "learning_rate": 3.965606764072237e-05, | |
| "loss": 0.6007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2672920227050781, | |
| "step": 240, | |
| "valid_targets_mean": 2372.8, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 0.7852564102564102, | |
| "grad_norm": 0.5159405507851967, | |
| "learning_rate": 3.96135198669304e-05, | |
| "loss": 0.588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26713523268699646, | |
| "step": 245, | |
| "valid_targets_mean": 2816.5, | |
| "valid_targets_min": 1356 | |
| }, | |
| { | |
| "epoch": 0.8012820512820513, | |
| "grad_norm": 0.55075615565925, | |
| "learning_rate": 3.956851706458236e-05, | |
| "loss": 0.5946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32864856719970703, | |
| "step": 250, | |
| "valid_targets_mean": 3047.9, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 0.8173076923076923, | |
| "grad_norm": 0.4623296031936575, | |
| "learning_rate": 3.952106486668884e-05, | |
| "loss": 0.596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26747170090675354, | |
| "step": 255, | |
| "valid_targets_mean": 2893.2, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.553339516613882, | |
| "learning_rate": 3.9471169212851774e-05, | |
| "loss": 0.5915, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3047725260257721, | |
| "step": 260, | |
| "valid_targets_mean": 2358.8, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 0.8493589743589743, | |
| "grad_norm": 0.4923474896251558, | |
| "learning_rate": 3.9418836348521045e-05, | |
| "loss": 0.5974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2745221257209778, | |
| "step": 265, | |
| "valid_targets_mean": 2917.7, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 0.8653846153846154, | |
| "grad_norm": 0.5493468058767709, | |
| "learning_rate": 3.936407282421267e-05, | |
| "loss": 0.6092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29619723558425903, | |
| "step": 270, | |
| "valid_targets_mean": 2358.4, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 0.8814102564102564, | |
| "grad_norm": 0.5064322514275459, | |
| "learning_rate": 3.930688549468894e-05, | |
| "loss": 0.5912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2947567105293274, | |
| "step": 275, | |
| "valid_targets_mean": 2665.6, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.5308193147953131, | |
| "learning_rate": 3.924728151810034e-05, | |
| "loss": 0.6003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29639333486557007, | |
| "step": 280, | |
| "valid_targets_mean": 2355.9, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 0.9134615384615384, | |
| "grad_norm": 0.5219547698552953, | |
| "learning_rate": 3.9185268355089606e-05, | |
| "loss": 0.6013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3138948082923889, | |
| "step": 285, | |
| "valid_targets_mean": 2612.1, | |
| "valid_targets_min": 1453 | |
| }, | |
| { | |
| "epoch": 0.9294871794871795, | |
| "grad_norm": 0.5477432963074507, | |
| "learning_rate": 3.912085376785788e-05, | |
| "loss": 0.5957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2890666723251343, | |
| "step": 290, | |
| "valid_targets_mean": 2642.9, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 0.9455128205128205, | |
| "grad_norm": 0.530177701496871, | |
| "learning_rate": 3.9054045819193074e-05, | |
| "loss": 0.5888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2837531864643097, | |
| "step": 295, | |
| "valid_targets_mean": 2790.3, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.5225578306865748, | |
| "learning_rate": 3.898485287146068e-05, | |
| "loss": 0.5877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26135194301605225, | |
| "step": 300, | |
| "valid_targets_mean": 2409.1, | |
| "valid_targets_min": 1671 | |
| }, | |
| { | |
| "epoch": 0.9775641025641025, | |
| "grad_norm": 0.8252101210922115, | |
| "learning_rate": 3.8913283585557054e-05, | |
| "loss": 0.597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28889599442481995, | |
| "step": 305, | |
| "valid_targets_mean": 2582.6, | |
| "valid_targets_min": 1479 | |
| }, | |
| { | |
| "epoch": 0.9935897435897436, | |
| "grad_norm": 0.49951767985407336, | |
| "learning_rate": 3.8839346919825304e-05, | |
| "loss": 0.5756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2502233386039734, | |
| "step": 310, | |
| "valid_targets_mean": 2504.4, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 1.0096153846153846, | |
| "grad_norm": 0.4813342649549324, | |
| "learning_rate": 3.876305212893399e-05, | |
| "loss": 0.5868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26282885670661926, | |
| "step": 315, | |
| "valid_targets_mean": 2381.0, | |
| "valid_targets_min": 1152 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.5610493952941918, | |
| "learning_rate": 3.868440876271871e-05, | |
| "loss": 0.565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28068363666534424, | |
| "step": 320, | |
| "valid_targets_mean": 2664.7, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "grad_norm": 0.5516739165526598, | |
| "learning_rate": 3.860342666498677e-05, | |
| "loss": 0.5807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28632718324661255, | |
| "step": 325, | |
| "valid_targets_mean": 2595.9, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 1.0576923076923077, | |
| "grad_norm": 0.7325610152519612, | |
| "learning_rate": 3.8520115972284975e-05, | |
| "loss": 0.5688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2912096083164215, | |
| "step": 330, | |
| "valid_targets_mean": 2864.9, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 1.0737179487179487, | |
| "grad_norm": 0.5182656532810745, | |
| "learning_rate": 3.843448711263089e-05, | |
| "loss": 0.5613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29608041048049927, | |
| "step": 335, | |
| "valid_targets_mean": 2716.9, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 0.5019346432841351, | |
| "learning_rate": 3.8346550804207544e-05, | |
| "loss": 0.5701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2656922936439514, | |
| "step": 340, | |
| "valid_targets_mean": 2720.6, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 1.1057692307692308, | |
| "grad_norm": 0.520447499485171, | |
| "learning_rate": 3.825631805402182e-05, | |
| "loss": 0.5678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27221739292144775, | |
| "step": 345, | |
| "valid_targets_mean": 2582.6, | |
| "valid_targets_min": 1527 | |
| }, | |
| { | |
| "epoch": 1.1217948717948718, | |
| "grad_norm": 0.5141310110105949, | |
| "learning_rate": 3.816380015652672e-05, | |
| "loss": 0.5816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3048447370529175, | |
| "step": 350, | |
| "valid_targets_mean": 2909.9, | |
| "valid_targets_min": 1531 | |
| }, | |
| { | |
| "epoch": 1.1378205128205128, | |
| "grad_norm": 0.5273829773882149, | |
| "learning_rate": 3.806900869220765e-05, | |
| "loss": 0.5783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32696932554244995, | |
| "step": 355, | |
| "valid_targets_mean": 2834.4, | |
| "valid_targets_min": 1602 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.5074759814713276, | |
| "learning_rate": 3.797195552613284e-05, | |
| "loss": 0.5603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.277593731880188, | |
| "step": 360, | |
| "valid_targets_mean": 3161.1, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 1.169871794871795, | |
| "grad_norm": 0.5468399046124904, | |
| "learning_rate": 3.787265280646825e-05, | |
| "loss": 0.5523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32178542017936707, | |
| "step": 365, | |
| "valid_targets_mean": 3033.4, | |
| "valid_targets_min": 1516 | |
| }, | |
| { | |
| "epoch": 1.185897435897436, | |
| "grad_norm": 0.5410326948331472, | |
| "learning_rate": 3.7771112962956936e-05, | |
| "loss": 0.5578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24614498019218445, | |
| "step": 370, | |
| "valid_targets_mean": 2371.9, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 1.2019230769230769, | |
| "grad_norm": 0.5374891279143514, | |
| "learning_rate": 3.7667348705363227e-05, | |
| "loss": 0.5558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27958446741104126, | |
| "step": 375, | |
| "valid_targets_mean": 2468.5, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 0.5332748254418984, | |
| "learning_rate": 3.7561373021881885e-05, | |
| "loss": 0.5655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3065238893032074, | |
| "step": 380, | |
| "valid_targets_mean": 3105.9, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 1.233974358974359, | |
| "grad_norm": 0.5173446868519098, | |
| "learning_rate": 3.745319917751229e-05, | |
| "loss": 0.579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3173753619194031, | |
| "step": 385, | |
| "valid_targets_mean": 2959.2, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.529087530825308, | |
| "learning_rate": 3.734284071239811e-05, | |
| "loss": 0.5773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3165820837020874, | |
| "step": 390, | |
| "valid_targets_mean": 2732.4, | |
| "valid_targets_min": 1148 | |
| }, | |
| { | |
| "epoch": 1.266025641025641, | |
| "grad_norm": 0.5487377230267219, | |
| "learning_rate": 3.7230311440132494e-05, | |
| "loss": 0.5721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27153822779655457, | |
| "step": 395, | |
| "valid_targets_mean": 2447.1, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.5533430874813212, | |
| "learning_rate": 3.711562544602895e-05, | |
| "loss": 0.5671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30601072311401367, | |
| "step": 400, | |
| "valid_targets_mean": 2748.7, | |
| "valid_targets_min": 1332 | |
| }, | |
| { | |
| "epoch": 1.2980769230769231, | |
| "grad_norm": 0.48601670442445527, | |
| "learning_rate": 3.699879708535838e-05, | |
| "loss": 0.5651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2588644325733185, | |
| "step": 405, | |
| "valid_targets_mean": 2897.8, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 1.314102564102564, | |
| "grad_norm": 0.504375358351585, | |
| "learning_rate": 3.687984098155212e-05, | |
| "loss": 0.577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2488366812467575, | |
| "step": 410, | |
| "valid_targets_mean": 2508.9, | |
| "valid_targets_min": 1226 | |
| }, | |
| { | |
| "epoch": 1.330128205128205, | |
| "grad_norm": 0.4906340385259277, | |
| "learning_rate": 3.6758772024371626e-05, | |
| "loss": 0.563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608366310596466, | |
| "step": 415, | |
| "valid_targets_mean": 2512.8, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.5421789236972442, | |
| "learning_rate": 3.663560536804465e-05, | |
| "loss": 0.5626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2818584740161896, | |
| "step": 420, | |
| "valid_targets_mean": 2951.8, | |
| "valid_targets_min": 1396 | |
| }, | |
| { | |
| "epoch": 1.3621794871794872, | |
| "grad_norm": 0.5810036665050738, | |
| "learning_rate": 3.65103564293684e-05, | |
| "loss": 0.5765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30127882957458496, | |
| "step": 425, | |
| "valid_targets_mean": 2694.9, | |
| "valid_targets_min": 1274 | |
| }, | |
| { | |
| "epoch": 1.3782051282051282, | |
| "grad_norm": 0.47903358727394846, | |
| "learning_rate": 3.638304088577984e-05, | |
| "loss": 0.5727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.256487101316452, | |
| "step": 430, | |
| "valid_targets_mean": 2993.9, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 1.3942307692307692, | |
| "grad_norm": 0.498296258427307, | |
| "learning_rate": 3.625367467339329e-05, | |
| "loss": 0.5612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2921409606933594, | |
| "step": 435, | |
| "valid_targets_mean": 3052.1, | |
| "valid_targets_min": 1811 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.5769087450477887, | |
| "learning_rate": 3.612227398500575e-05, | |
| "loss": 0.5717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29023826122283936, | |
| "step": 440, | |
| "valid_targets_mean": 2366.1, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 1.4262820512820513, | |
| "grad_norm": 0.5460088607922002, | |
| "learning_rate": 3.598885526807003e-05, | |
| "loss": 0.5783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28189247846603394, | |
| "step": 445, | |
| "valid_targets_mean": 2592.5, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 1.4423076923076923, | |
| "grad_norm": 0.5847261044409127, | |
| "learning_rate": 3.585343522263599e-05, | |
| "loss": 0.5667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2702714204788208, | |
| "step": 450, | |
| "valid_targets_mean": 2488.9, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 1.4583333333333333, | |
| "grad_norm": 0.5145747032617356, | |
| "learning_rate": 3.571603079926024e-05, | |
| "loss": 0.5757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27167177200317383, | |
| "step": 455, | |
| "valid_targets_mean": 2837.7, | |
| "valid_targets_min": 1581 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 0.5168757591741376, | |
| "learning_rate": 3.5576659196884395e-05, | |
| "loss": 0.5769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2823517322540283, | |
| "step": 460, | |
| "valid_targets_mean": 2455.3, | |
| "valid_targets_min": 1044 | |
| }, | |
| { | |
| "epoch": 1.4903846153846154, | |
| "grad_norm": 0.5330528082012648, | |
| "learning_rate": 3.5435337860682304e-05, | |
| "loss": 0.5621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27999985218048096, | |
| "step": 465, | |
| "valid_targets_mean": 2777.6, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 1.5064102564102564, | |
| "grad_norm": 0.5412004827054049, | |
| "learning_rate": 3.529208447987641e-05, | |
| "loss": 0.57, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2921620309352875, | |
| "step": 470, | |
| "valid_targets_mean": 2736.0, | |
| "valid_targets_min": 1853 | |
| }, | |
| { | |
| "epoch": 1.5224358974358974, | |
| "grad_norm": 0.5405160852428765, | |
| "learning_rate": 3.5146916985523604e-05, | |
| "loss": 0.5548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28153955936431885, | |
| "step": 475, | |
| "valid_targets_mean": 2858.3, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.49547704458976594, | |
| "learning_rate": 3.499985354827079e-05, | |
| "loss": 0.5638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561967074871063, | |
| "step": 480, | |
| "valid_targets_mean": 2891.4, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 1.5544871794871795, | |
| "grad_norm": 0.565058238537899, | |
| "learning_rate": 3.485091257608047e-05, | |
| "loss": 0.5656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2924017906188965, | |
| "step": 485, | |
| "valid_targets_mean": 2463.4, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 1.5705128205128205, | |
| "grad_norm": 0.497498184926292, | |
| "learning_rate": 3.4700112711926574e-05, | |
| "loss": 0.5619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27482593059539795, | |
| "step": 490, | |
| "valid_targets_mean": 2584.8, | |
| "valid_targets_min": 1259 | |
| }, | |
| { | |
| "epoch": 1.5865384615384617, | |
| "grad_norm": 0.5287840023150424, | |
| "learning_rate": 3.4547472831460976e-05, | |
| "loss": 0.5585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.297797828912735, | |
| "step": 495, | |
| "valid_targets_mean": 2593.3, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 0.4679531756830481, | |
| "learning_rate": 3.439301204065077e-05, | |
| "loss": 0.5583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25573623180389404, | |
| "step": 500, | |
| "valid_targets_mean": 2744.5, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 1.6185897435897436, | |
| "grad_norm": 0.5475195616735163, | |
| "learning_rate": 3.423674967338681e-05, | |
| "loss": 0.5775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3060113191604614, | |
| "step": 505, | |
| "valid_targets_mean": 2838.1, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 1.6346153846153846, | |
| "grad_norm": 0.5235316750346353, | |
| "learning_rate": 3.407870528906366e-05, | |
| "loss": 0.5745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.305141419172287, | |
| "step": 510, | |
| "valid_targets_mean": 2863.4, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 1.6506410256410255, | |
| "grad_norm": 0.4938595683656762, | |
| "learning_rate": 3.391889867013134e-05, | |
| "loss": 0.5709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28400900959968567, | |
| "step": 515, | |
| "valid_targets_mean": 2622.2, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.4946310276214785, | |
| "learning_rate": 3.375734981961918e-05, | |
| "loss": 0.5653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2853955626487732, | |
| "step": 520, | |
| "valid_targets_mean": 2732.2, | |
| "valid_targets_min": 1004 | |
| }, | |
| { | |
| "epoch": 1.6826923076923077, | |
| "grad_norm": 0.4903094742766738, | |
| "learning_rate": 3.359407895863199e-05, | |
| "loss": 0.5541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.273275226354599, | |
| "step": 525, | |
| "valid_targets_mean": 2855.8, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 1.6987179487179487, | |
| "grad_norm": 0.5052281623241814, | |
| "learning_rate": 3.342910652381902e-05, | |
| "loss": 0.5421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2506556808948517, | |
| "step": 530, | |
| "valid_targets_mean": 2644.6, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 1.7147435897435899, | |
| "grad_norm": 0.5072487823218924, | |
| "learning_rate": 3.326245316481591e-05, | |
| "loss": 0.5595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2633092999458313, | |
| "step": 535, | |
| "valid_targets_mean": 2397.3, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.5195874820272381, | |
| "learning_rate": 3.30941397416599e-05, | |
| "loss": 0.5556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26943325996398926, | |
| "step": 540, | |
| "valid_targets_mean": 2366.9, | |
| "valid_targets_min": 1427 | |
| }, | |
| { | |
| "epoch": 1.7467948717948718, | |
| "grad_norm": 0.5142077283080907, | |
| "learning_rate": 3.2924187322178865e-05, | |
| "loss": 0.5569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27412715554237366, | |
| "step": 545, | |
| "valid_targets_mean": 2490.5, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 1.7628205128205128, | |
| "grad_norm": 0.47875545252541785, | |
| "learning_rate": 3.275261717935417e-05, | |
| "loss": 0.5694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30112797021865845, | |
| "step": 550, | |
| "valid_targets_mean": 3342.2, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 1.7788461538461537, | |
| "grad_norm": 0.4754964525049478, | |
| "learning_rate": 3.2579450788657997e-05, | |
| "loss": 0.5531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3025818467140198, | |
| "step": 555, | |
| "valid_targets_mean": 2941.9, | |
| "valid_targets_min": 1454 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.5071018173857419, | |
| "learning_rate": 3.2404709825365204e-05, | |
| "loss": 0.5759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25834736227989197, | |
| "step": 560, | |
| "valid_targets_mean": 2617.4, | |
| "valid_targets_min": 1589 | |
| }, | |
| { | |
| "epoch": 1.810897435897436, | |
| "grad_norm": 0.4734235774166826, | |
| "learning_rate": 3.222841616184025e-05, | |
| "loss": 0.5564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2764870524406433, | |
| "step": 565, | |
| "valid_targets_mean": 2861.0, | |
| "valid_targets_min": 1255 | |
| }, | |
| { | |
| "epoch": 1.8269230769230769, | |
| "grad_norm": 0.44588023212202244, | |
| "learning_rate": 3.2050591864799406e-05, | |
| "loss": 0.5423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.248623788356781, | |
| "step": 570, | |
| "valid_targets_mean": 2789.8, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 1.842948717948718, | |
| "grad_norm": 0.5165230352269572, | |
| "learning_rate": 3.187125919254869e-05, | |
| "loss": 0.5627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2903827428817749, | |
| "step": 575, | |
| "valid_targets_mean": 2986.1, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 0.4542582048942946, | |
| "learning_rate": 3.169044059219778e-05, | |
| "loss": 0.5585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2745620310306549, | |
| "step": 580, | |
| "valid_targets_mean": 3092.4, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 0.47805109284658576, | |
| "learning_rate": 3.1508158696850275e-05, | |
| "loss": 0.5582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2748388648033142, | |
| "step": 585, | |
| "valid_targets_mean": 2798.6, | |
| "valid_targets_min": 1635 | |
| }, | |
| { | |
| "epoch": 1.891025641025641, | |
| "grad_norm": 0.5557249435844636, | |
| "learning_rate": 3.132443632277075e-05, | |
| "loss": 0.5595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3172360956668854, | |
| "step": 590, | |
| "valid_targets_mean": 2392.4, | |
| "valid_targets_min": 1421 | |
| }, | |
| { | |
| "epoch": 1.907051282051282, | |
| "grad_norm": 0.5342438102011375, | |
| "learning_rate": 3.113929646652879e-05, | |
| "loss": 0.5497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28413185477256775, | |
| "step": 595, | |
| "valid_targets_mean": 2749.2, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.4903784085042197, | |
| "learning_rate": 3.095276230212056e-05, | |
| "loss": 0.5767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24444545805454254, | |
| "step": 600, | |
| "valid_targets_mean": 2513.0, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 1.939102564102564, | |
| "grad_norm": 0.4919753494428372, | |
| "learning_rate": 3.076485717806808e-05, | |
| "loss": 0.5701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2963429093360901, | |
| "step": 605, | |
| "valid_targets_mean": 3101.6, | |
| "valid_targets_min": 1700 | |
| }, | |
| { | |
| "epoch": 1.9551282051282053, | |
| "grad_norm": 0.5341843461050885, | |
| "learning_rate": 3.057560461449665e-05, | |
| "loss": 0.5727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2879267930984497, | |
| "step": 610, | |
| "valid_targets_mean": 2705.4, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 1.9711538461538463, | |
| "grad_norm": 0.4695007929493055, | |
| "learning_rate": 3.038502830019092e-05, | |
| "loss": 0.5563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27306631207466125, | |
| "step": 615, | |
| "valid_targets_mean": 2854.8, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 0.47019937309397336, | |
| "learning_rate": 3.019315208962968e-05, | |
| "loss": 0.5622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25543850660324097, | |
| "step": 620, | |
| "valid_targets_mean": 2876.4, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 2.003205128205128, | |
| "grad_norm": 0.5385068085538951, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.5497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3040393888950348, | |
| "step": 625, | |
| "valid_targets_mean": 2746.8, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 2.019230769230769, | |
| "grad_norm": 0.5370993791663188, | |
| "learning_rate": 2.9805596208191056e-05, | |
| "loss": 0.5329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2809440791606903, | |
| "step": 630, | |
| "valid_targets_mean": 2666.8, | |
| "valid_targets_min": 1709 | |
| }, | |
| { | |
| "epoch": 2.03525641025641, | |
| "grad_norm": 0.5066634114490087, | |
| "learning_rate": 2.960996504776783e-05, | |
| "loss": 0.5337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636590600013733, | |
| "step": 635, | |
| "valid_targets_mean": 2582.0, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.4662311214684164, | |
| "learning_rate": 2.9413131005925296e-05, | |
| "loss": 0.5285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2907227575778961, | |
| "step": 640, | |
| "valid_targets_mean": 3295.8, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 2.0673076923076925, | |
| "grad_norm": 0.5210975495484659, | |
| "learning_rate": 2.9215118720423375e-05, | |
| "loss": 0.5335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26075279712677, | |
| "step": 645, | |
| "valid_targets_mean": 2613.9, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 0.5089974927966768, | |
| "learning_rate": 2.9015952976502994e-05, | |
| "loss": 0.5387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2646489143371582, | |
| "step": 650, | |
| "valid_targets_mean": 2547.9, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 2.0993589743589745, | |
| "grad_norm": 0.45259300115190093, | |
| "learning_rate": 2.8815658703783715e-05, | |
| "loss": 0.5369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29911428689956665, | |
| "step": 655, | |
| "valid_targets_mean": 3205.0, | |
| "valid_targets_min": 1595 | |
| }, | |
| { | |
| "epoch": 2.1153846153846154, | |
| "grad_norm": 0.6431099125108005, | |
| "learning_rate": 2.8614260973143318e-05, | |
| "loss": 0.5274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597063183784485, | |
| "step": 660, | |
| "valid_targets_mean": 2840.9, | |
| "valid_targets_min": 547 | |
| }, | |
| { | |
| "epoch": 2.1314102564102564, | |
| "grad_norm": 0.5633027672264547, | |
| "learning_rate": 2.8411784993579633e-05, | |
| "loss": 0.5305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2584155201911926, | |
| "step": 665, | |
| "valid_targets_mean": 2480.5, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 2.1474358974358974, | |
| "grad_norm": 0.5062724588765886, | |
| "learning_rate": 2.820825610905514e-05, | |
| "loss": 0.5279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2673356533050537, | |
| "step": 670, | |
| "valid_targets_mean": 2614.7, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 2.1634615384615383, | |
| "grad_norm": 0.6211215546739832, | |
| "learning_rate": 2.8003699795324674e-05, | |
| "loss": 0.5296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267412394285202, | |
| "step": 675, | |
| "valid_targets_mean": 2339.8, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.47260767768104733, | |
| "learning_rate": 2.7798141656746606e-05, | |
| "loss": 0.5383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26123547554016113, | |
| "step": 680, | |
| "valid_targets_mean": 2792.8, | |
| "valid_targets_min": 1836 | |
| }, | |
| { | |
| "epoch": 2.1955128205128207, | |
| "grad_norm": 0.5167646182153408, | |
| "learning_rate": 2.7591607423077932e-05, | |
| "loss": 0.5229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2804722487926483, | |
| "step": 685, | |
| "valid_targets_mean": 2891.1, | |
| "valid_targets_min": 1396 | |
| }, | |
| { | |
| "epoch": 2.2115384615384617, | |
| "grad_norm": 0.45992088740325227, | |
| "learning_rate": 2.738412294625369e-05, | |
| "loss": 0.5256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2086869180202484, | |
| "step": 690, | |
| "valid_targets_mean": 2911.8, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 2.2275641025641026, | |
| "grad_norm": 0.5124490642725669, | |
| "learning_rate": 2.717571419715107e-05, | |
| "loss": 0.523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25613564252853394, | |
| "step": 695, | |
| "valid_targets_mean": 2460.9, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 2.2435897435897436, | |
| "grad_norm": 0.47586846790245935, | |
| "learning_rate": 2.69664072623386e-05, | |
| "loss": 0.5218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24780923128128052, | |
| "step": 700, | |
| "valid_targets_mean": 2644.0, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 2.2596153846153846, | |
| "grad_norm": 0.5114331024168999, | |
| "learning_rate": 2.6756228340810946e-05, | |
| "loss": 0.534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31844857335090637, | |
| "step": 705, | |
| "valid_targets_mean": 3366.5, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 2.2756410256410255, | |
| "grad_norm": 0.5412665077335189, | |
| "learning_rate": 2.6545203740709502e-05, | |
| "loss": 0.5414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2947283089160919, | |
| "step": 710, | |
| "valid_targets_mean": 2735.8, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 2.2916666666666665, | |
| "grad_norm": 0.48466975498534615, | |
| "learning_rate": 2.6333359876029455e-05, | |
| "loss": 0.5434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28271210193634033, | |
| "step": 715, | |
| "valid_targets_mean": 2636.4, | |
| "valid_targets_min": 1655 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.5118067123694199, | |
| "learning_rate": 2.612072326331351e-05, | |
| "loss": 0.533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27647602558135986, | |
| "step": 720, | |
| "valid_targets_mean": 2821.6, | |
| "valid_targets_min": 1448 | |
| }, | |
| { | |
| "epoch": 2.323717948717949, | |
| "grad_norm": 0.49715857445422257, | |
| "learning_rate": 2.5907320518332827e-05, | |
| "loss": 0.5356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3041874170303345, | |
| "step": 725, | |
| "valid_targets_mean": 3134.6, | |
| "valid_targets_min": 1678 | |
| }, | |
| { | |
| "epoch": 2.33974358974359, | |
| "grad_norm": 0.5298322972875337, | |
| "learning_rate": 2.5693178352755497e-05, | |
| "loss": 0.5266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25558072328567505, | |
| "step": 730, | |
| "valid_targets_mean": 2448.6, | |
| "valid_targets_min": 573 | |
| }, | |
| { | |
| "epoch": 2.355769230769231, | |
| "grad_norm": 0.5333606639898194, | |
| "learning_rate": 2.547832357080305e-05, | |
| "loss": 0.5215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27683860063552856, | |
| "step": 735, | |
| "valid_targets_mean": 2680.8, | |
| "valid_targets_min": 1742 | |
| }, | |
| { | |
| "epoch": 2.371794871794872, | |
| "grad_norm": 0.48051460695506026, | |
| "learning_rate": 2.5262783065895377e-05, | |
| "loss": 0.5202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2652096450328827, | |
| "step": 740, | |
| "valid_targets_mean": 3207.2, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 2.3878205128205128, | |
| "grad_norm": 0.4783808612705691, | |
| "learning_rate": 2.5046583817284437e-05, | |
| "loss": 0.532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22841182351112366, | |
| "step": 745, | |
| "valid_targets_mean": 2587.9, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 2.4038461538461537, | |
| "grad_norm": 0.5391709249007991, | |
| "learning_rate": 2.48297528866773e-05, | |
| "loss": 0.5261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2468860149383545, | |
| "step": 750, | |
| "valid_targets_mean": 2376.3, | |
| "valid_targets_min": 892 | |
| }, | |
| { | |
| "epoch": 2.4198717948717947, | |
| "grad_norm": 0.5263511548452783, | |
| "learning_rate": 2.4612317414848804e-05, | |
| "loss": 0.5353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2790845036506653, | |
| "step": 755, | |
| "valid_targets_mean": 2924.8, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 0.49865538126497133, | |
| "learning_rate": 2.4394304618244346e-05, | |
| "loss": 0.5187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2641284465789795, | |
| "step": 760, | |
| "valid_targets_mean": 2560.9, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 2.451923076923077, | |
| "grad_norm": 0.4919863527000147, | |
| "learning_rate": 2.4175741785573177e-05, | |
| "loss": 0.5287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2962533235549927, | |
| "step": 765, | |
| "valid_targets_mean": 2848.9, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 2.467948717948718, | |
| "grad_norm": 0.5579768978590288, | |
| "learning_rate": 2.39566562743927e-05, | |
| "loss": 0.5379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2721223831176758, | |
| "step": 770, | |
| "valid_targets_mean": 2622.4, | |
| "valid_targets_min": 1480 | |
| }, | |
| { | |
| "epoch": 2.483974358974359, | |
| "grad_norm": 0.4917371422140305, | |
| "learning_rate": 2.3737075507684103e-05, | |
| "loss": 0.5519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2823132872581482, | |
| "step": 775, | |
| "valid_targets_mean": 3060.8, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.5183572497009322, | |
| "learning_rate": 2.3517026970419786e-05, | |
| "loss": 0.5285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.243934765458107, | |
| "step": 780, | |
| "valid_targets_mean": 2466.1, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 2.516025641025641, | |
| "grad_norm": 0.5689569336565411, | |
| "learning_rate": 2.3296538206123134e-05, | |
| "loss": 0.5385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29714637994766235, | |
| "step": 785, | |
| "valid_targets_mean": 2579.8, | |
| "valid_targets_min": 769 | |
| }, | |
| { | |
| "epoch": 2.532051282051282, | |
| "grad_norm": 0.4852903583594617, | |
| "learning_rate": 2.307563681342081e-05, | |
| "loss": 0.5375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30132395029067993, | |
| "step": 790, | |
| "valid_targets_mean": 2919.2, | |
| "valid_targets_min": 1185 | |
| }, | |
| { | |
| "epoch": 2.5480769230769234, | |
| "grad_norm": 0.5048000072384919, | |
| "learning_rate": 2.285435044258829e-05, | |
| "loss": 0.5226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26704657077789307, | |
| "step": 795, | |
| "valid_targets_mean": 3074.5, | |
| "valid_targets_min": 970 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.49317222290343393, | |
| "learning_rate": 2.263270679208883e-05, | |
| "loss": 0.5175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24643100798130035, | |
| "step": 800, | |
| "valid_targets_mean": 2436.0, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 2.5801282051282053, | |
| "grad_norm": 0.4737520930741298, | |
| "learning_rate": 2.2410733605106462e-05, | |
| "loss": 0.5415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26149970293045044, | |
| "step": 805, | |
| "valid_targets_mean": 2902.1, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 2.5961538461538463, | |
| "grad_norm": 0.5121758444175616, | |
| "learning_rate": 2.2188458666073382e-05, | |
| "loss": 0.5318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2612760066986084, | |
| "step": 810, | |
| "valid_targets_mean": 3133.7, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 2.6121794871794872, | |
| "grad_norm": 0.5045481361963446, | |
| "learning_rate": 2.1965909797192143e-05, | |
| "loss": 0.5313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25052016973495483, | |
| "step": 815, | |
| "valid_targets_mean": 2793.5, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 2.628205128205128, | |
| "grad_norm": 0.49866393126043274, | |
| "learning_rate": 2.174311485495317e-05, | |
| "loss": 0.5437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26840710639953613, | |
| "step": 820, | |
| "valid_targets_mean": 2718.1, | |
| "valid_targets_min": 1183 | |
| }, | |
| { | |
| "epoch": 2.644230769230769, | |
| "grad_norm": 0.5328425522492785, | |
| "learning_rate": 2.1520101726647922e-05, | |
| "loss": 0.5389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2338663637638092, | |
| "step": 825, | |
| "valid_targets_mean": 2070.3, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 2.66025641025641, | |
| "grad_norm": 0.4935178661564159, | |
| "learning_rate": 2.1296898326878282e-05, | |
| "loss": 0.5293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2643035054206848, | |
| "step": 830, | |
| "valid_targets_mean": 2890.3, | |
| "valid_targets_min": 966 | |
| }, | |
| { | |
| "epoch": 2.676282051282051, | |
| "grad_norm": 0.5137500688356019, | |
| "learning_rate": 2.1073532594062432e-05, | |
| "loss": 0.5259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31183022260665894, | |
| "step": 835, | |
| "valid_targets_mean": 2903.3, | |
| "valid_targets_min": 1411 | |
| }, | |
| { | |
| "epoch": 2.6923076923076925, | |
| "grad_norm": 0.5790430984783882, | |
| "learning_rate": 2.0850032486937838e-05, | |
| "loss": 0.5397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2541824281215668, | |
| "step": 840, | |
| "valid_targets_mean": 2222.1, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 2.7083333333333335, | |
| "grad_norm": 0.5089728268620257, | |
| "learning_rate": 2.0626425981061608e-05, | |
| "loss": 0.5361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26173079013824463, | |
| "step": 845, | |
| "valid_targets_mean": 3020.2, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 2.7243589743589745, | |
| "grad_norm": 0.5063766494827193, | |
| "learning_rate": 2.0402741065308808e-05, | |
| "loss": 0.5235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28143495321273804, | |
| "step": 850, | |
| "valid_targets_mean": 2642.5, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 2.7403846153846154, | |
| "grad_norm": 0.5309100876021802, | |
| "learning_rate": 2.0179005738369098e-05, | |
| "loss": 0.5509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267280638217926, | |
| "step": 855, | |
| "valid_targets_mean": 2583.9, | |
| "valid_targets_min": 1500 | |
| }, | |
| { | |
| "epoch": 2.7564102564102564, | |
| "grad_norm": 0.5281817313172733, | |
| "learning_rate": 1.995524800524211e-05, | |
| "loss": 0.5513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2864803671836853, | |
| "step": 860, | |
| "valid_targets_mean": 2709.2, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 2.7724358974358974, | |
| "grad_norm": 0.4852294842051727, | |
| "learning_rate": 1.9731495873732055e-05, | |
| "loss": 0.5443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24208587408065796, | |
| "step": 865, | |
| "valid_targets_mean": 2688.0, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 2.7884615384615383, | |
| "grad_norm": 0.623697654841149, | |
| "learning_rate": 1.9507777350941996e-05, | |
| "loss": 0.5374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2755206227302551, | |
| "step": 870, | |
| "valid_targets_mean": 2876.7, | |
| "valid_targets_min": 1219 | |
| }, | |
| { | |
| "epoch": 2.8044871794871797, | |
| "grad_norm": 0.47700443954173843, | |
| "learning_rate": 1.9284120439768192e-05, | |
| "loss": 0.5417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26930078864097595, | |
| "step": 875, | |
| "valid_targets_mean": 2995.6, | |
| "valid_targets_min": 1731 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.5340023662623198, | |
| "learning_rate": 1.9060553135394957e-05, | |
| "loss": 0.527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22173257172107697, | |
| "step": 880, | |
| "valid_targets_mean": 2209.4, | |
| "valid_targets_min": 1215 | |
| }, | |
| { | |
| "epoch": 2.8365384615384617, | |
| "grad_norm": 0.49369388993999563, | |
| "learning_rate": 1.8837103421790486e-05, | |
| "loss": 0.5235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2612769901752472, | |
| "step": 885, | |
| "valid_targets_mean": 2726.4, | |
| "valid_targets_min": 520 | |
| }, | |
| { | |
| "epoch": 2.8525641025641026, | |
| "grad_norm": 0.5014272176624708, | |
| "learning_rate": 1.861379926820414e-05, | |
| "loss": 0.552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28455230593681335, | |
| "step": 890, | |
| "valid_targets_mean": 2690.3, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 2.8685897435897436, | |
| "grad_norm": 0.5121877748715592, | |
| "learning_rate": 1.8390668625665483e-05, | |
| "loss": 0.5258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2517583966255188, | |
| "step": 895, | |
| "valid_targets_mean": 2481.9, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 2.8846153846153846, | |
| "grad_norm": 0.46919528692963297, | |
| "learning_rate": 1.8167739423485668e-05, | |
| "loss": 0.5261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25650352239608765, | |
| "step": 900, | |
| "valid_targets_mean": 2941.4, | |
| "valid_targets_min": 1404 | |
| }, | |
| { | |
| "epoch": 2.9006410256410255, | |
| "grad_norm": 0.5019379963438835, | |
| "learning_rate": 1.794503956576152e-05, | |
| "loss": 0.5485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27527838945388794, | |
| "step": 905, | |
| "valid_targets_mean": 2664.9, | |
| "valid_targets_min": 1325 | |
| }, | |
| { | |
| "epoch": 2.9166666666666665, | |
| "grad_norm": 0.4855003398140857, | |
| "learning_rate": 1.7722596927882758e-05, | |
| "loss": 0.5284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23271912336349487, | |
| "step": 910, | |
| "valid_targets_mean": 2628.2, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 2.9326923076923075, | |
| "grad_norm": 0.4877223370952336, | |
| "learning_rate": 1.7500439353042834e-05, | |
| "loss": 0.5624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26730355620384216, | |
| "step": 915, | |
| "valid_targets_mean": 2775.1, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 2.948717948717949, | |
| "grad_norm": 0.5401641959987764, | |
| "learning_rate": 1.727859464875381e-05, | |
| "loss": 0.542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27316930890083313, | |
| "step": 920, | |
| "valid_targets_mean": 2610.7, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 2.96474358974359, | |
| "grad_norm": 0.467235720702022, | |
| "learning_rate": 1.7057090583365678e-05, | |
| "loss": 0.5179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26495471596717834, | |
| "step": 925, | |
| "valid_targets_mean": 3275.5, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 2.980769230769231, | |
| "grad_norm": 0.49024323938310016, | |
| "learning_rate": 1.6835954882590567e-05, | |
| "loss": 0.5313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27755841612815857, | |
| "step": 930, | |
| "valid_targets_mean": 2761.4, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 2.996794871794872, | |
| "grad_norm": 0.48260465618458004, | |
| "learning_rate": 1.6615215226032332e-05, | |
| "loss": 0.5237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2788291275501251, | |
| "step": 935, | |
| "valid_targets_mean": 3261.9, | |
| "valid_targets_min": 1666 | |
| }, | |
| { | |
| "epoch": 3.0128205128205128, | |
| "grad_norm": 0.4901412147381482, | |
| "learning_rate": 1.6394899243721887e-05, | |
| "loss": 0.5119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22973302006721497, | |
| "step": 940, | |
| "valid_targets_mean": 3016.4, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 3.0288461538461537, | |
| "grad_norm": 0.5016178718406432, | |
| "learning_rate": 1.6175034512658753e-05, | |
| "loss": 0.522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2631143629550934, | |
| "step": 945, | |
| "valid_targets_mean": 3169.8, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 3.0448717948717947, | |
| "grad_norm": 0.5207415731021814, | |
| "learning_rate": 1.5955648553359247e-05, | |
| "loss": 0.5081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2500995099544525, | |
| "step": 950, | |
| "valid_targets_mean": 2769.1, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 3.0608974358974357, | |
| "grad_norm": 0.4952883381542296, | |
| "learning_rate": 1.5736768826411683e-05, | |
| "loss": 0.5318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25875523686408997, | |
| "step": 955, | |
| "valid_targets_mean": 2645.2, | |
| "valid_targets_min": 1112 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 0.5381528529606109, | |
| "learning_rate": 1.5518422729039188e-05, | |
| "loss": 0.5134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2681347727775574, | |
| "step": 960, | |
| "valid_targets_mean": 2322.7, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 3.092948717948718, | |
| "grad_norm": 0.5019141279039122, | |
| "learning_rate": 1.5300637591670357e-05, | |
| "loss": 0.5126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23699134588241577, | |
| "step": 965, | |
| "valid_targets_mean": 2605.8, | |
| "valid_targets_min": 1386 | |
| }, | |
| { | |
| "epoch": 3.108974358974359, | |
| "grad_norm": 0.5284324796248476, | |
| "learning_rate": 1.5083440674518302e-05, | |
| "loss": 0.5212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25518831610679626, | |
| "step": 970, | |
| "valid_targets_mean": 2437.3, | |
| "valid_targets_min": 1194 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.5545288550997762, | |
| "learning_rate": 1.4866859164168466e-05, | |
| "loss": 0.524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25043103098869324, | |
| "step": 975, | |
| "valid_targets_mean": 2586.2, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 3.141025641025641, | |
| "grad_norm": 0.5150871184607682, | |
| "learning_rate": 1.4650920170175704e-05, | |
| "loss": 0.5181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24903708696365356, | |
| "step": 980, | |
| "valid_targets_mean": 2856.2, | |
| "valid_targets_min": 1416 | |
| }, | |
| { | |
| "epoch": 3.157051282051282, | |
| "grad_norm": 0.51903581494101, | |
| "learning_rate": 1.443565072167095e-05, | |
| "loss": 0.5278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26731231808662415, | |
| "step": 985, | |
| "valid_targets_mean": 2566.1, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 3.173076923076923, | |
| "grad_norm": 0.5466632124851736, | |
| "learning_rate": 1.4221077763977984e-05, | |
| "loss": 0.5165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24252647161483765, | |
| "step": 990, | |
| "valid_targets_mean": 2442.4, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 3.189102564102564, | |
| "grad_norm": 0.5466556370515516, | |
| "learning_rate": 1.4007228155240696e-05, | |
| "loss": 0.5124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26542800664901733, | |
| "step": 995, | |
| "valid_targets_mean": 2529.8, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 3.2051282051282053, | |
| "grad_norm": 0.5069311466573636, | |
| "learning_rate": 1.37941286630612e-05, | |
| "loss": 0.5114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2801700830459595, | |
| "step": 1000, | |
| "valid_targets_mean": 2792.9, | |
| "valid_targets_min": 1377 | |
| }, | |
| { | |
| "epoch": 3.2211538461538463, | |
| "grad_norm": 0.6250859912027822, | |
| "learning_rate": 1.3581805961149371e-05, | |
| "loss": 0.508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25172659754753113, | |
| "step": 1005, | |
| "valid_targets_mean": 2743.6, | |
| "valid_targets_min": 1259 | |
| }, | |
| { | |
| "epoch": 3.2371794871794872, | |
| "grad_norm": 0.5199458882968508, | |
| "learning_rate": 1.3370286625984089e-05, | |
| "loss": 0.5113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2833164632320404, | |
| "step": 1010, | |
| "valid_targets_mean": 2770.8, | |
| "valid_targets_min": 1114 | |
| }, | |
| { | |
| "epoch": 3.253205128205128, | |
| "grad_norm": 0.4596497760610937, | |
| "learning_rate": 1.3159597133486628e-05, | |
| "loss": 0.503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23313070833683014, | |
| "step": 1015, | |
| "valid_targets_mean": 2712.9, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 3.269230769230769, | |
| "grad_norm": 0.5167847220688774, | |
| "learning_rate": 1.2949763855706678e-05, | |
| "loss": 0.5055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2619097828865051, | |
| "step": 1020, | |
| "valid_targets_mean": 2816.8, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 3.28525641025641, | |
| "grad_norm": 0.4909239667561551, | |
| "learning_rate": 1.274081305752135e-05, | |
| "loss": 0.516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2434234470129013, | |
| "step": 1025, | |
| "valid_targets_mean": 2543.8, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 3.301282051282051, | |
| "grad_norm": 0.46584291867859573, | |
| "learning_rate": 1.2532770893347582e-05, | |
| "loss": 0.5095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23388712108135223, | |
| "step": 1030, | |
| "valid_targets_mean": 2714.8, | |
| "valid_targets_min": 1576 | |
| }, | |
| { | |
| "epoch": 3.3173076923076925, | |
| "grad_norm": 0.502132146117417, | |
| "learning_rate": 1.2325663403868406e-05, | |
| "loss": 0.4977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2469586580991745, | |
| "step": 1035, | |
| "valid_targets_mean": 2671.7, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.5571585854450248, | |
| "learning_rate": 1.2119516512773424e-05, | |
| "loss": 0.5168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2683925926685333, | |
| "step": 1040, | |
| "valid_targets_mean": 2520.4, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 3.3493589743589745, | |
| "grad_norm": 0.5336762575664108, | |
| "learning_rate": 1.1914356023513904e-05, | |
| "loss": 0.5198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2457447350025177, | |
| "step": 1045, | |
| "valid_targets_mean": 2380.1, | |
| "valid_targets_min": 1242 | |
| }, | |
| { | |
| "epoch": 3.3653846153846154, | |
| "grad_norm": 0.5667115296323573, | |
| "learning_rate": 1.1710207616073001e-05, | |
| "loss": 0.514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28504765033721924, | |
| "step": 1050, | |
| "valid_targets_mean": 2624.0, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 3.3814102564102564, | |
| "grad_norm": 0.5099600194173104, | |
| "learning_rate": 1.1507096843751372e-05, | |
| "loss": 0.5127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2691630721092224, | |
| "step": 1055, | |
| "valid_targets_mean": 2777.8, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 3.3974358974358974, | |
| "grad_norm": 0.533137614413181, | |
| "learning_rate": 1.1305049129968637e-05, | |
| "loss": 0.5129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21904993057250977, | |
| "step": 1060, | |
| "valid_targets_mean": 2296.4, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 3.4134615384615383, | |
| "grad_norm": 0.5146746955617865, | |
| "learning_rate": 1.110408976508118e-05, | |
| "loss": 0.5059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2554003596305847, | |
| "step": 1065, | |
| "valid_targets_mean": 2615.2, | |
| "valid_targets_min": 1338 | |
| }, | |
| { | |
| "epoch": 3.4294871794871793, | |
| "grad_norm": 0.569499082119217, | |
| "learning_rate": 1.090424390321648e-05, | |
| "loss": 0.5266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2377270758152008, | |
| "step": 1070, | |
| "valid_targets_mean": 2141.8, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 3.4455128205128207, | |
| "grad_norm": 0.4835581841975967, | |
| "learning_rate": 1.070553655912463e-05, | |
| "loss": 0.5122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2490512728691101, | |
| "step": 1075, | |
| "valid_targets_mean": 2829.5, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 3.4615384615384617, | |
| "grad_norm": 0.5182915999209446, | |
| "learning_rate": 1.0507992605047193e-05, | |
| "loss": 0.5121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28138262033462524, | |
| "step": 1080, | |
| "valid_targets_mean": 3092.6, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 3.4775641025641026, | |
| "grad_norm": 0.5102091196096896, | |
| "learning_rate": 1.0311636767603952e-05, | |
| "loss": 0.5219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24811017513275146, | |
| "step": 1085, | |
| "valid_targets_mean": 2473.1, | |
| "valid_targets_min": 1263 | |
| }, | |
| { | |
| "epoch": 3.4935897435897436, | |
| "grad_norm": 0.5505424626120781, | |
| "learning_rate": 1.0116493624697862e-05, | |
| "loss": 0.5251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25646162033081055, | |
| "step": 1090, | |
| "valid_targets_mean": 2409.8, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 3.5096153846153846, | |
| "grad_norm": 0.5453130427654166, | |
| "learning_rate": 9.922587602438657e-06, | |
| "loss": 0.5186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267898827791214, | |
| "step": 1095, | |
| "valid_targets_mean": 2698.6, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 3.5256410256410255, | |
| "grad_norm": 0.5011760790811203, | |
| "learning_rate": 9.729942972085401e-06, | |
| "loss": 0.5121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2532099187374115, | |
| "step": 1100, | |
| "valid_targets_mean": 2729.1, | |
| "valid_targets_min": 1471 | |
| }, | |
| { | |
| "epoch": 3.5416666666666665, | |
| "grad_norm": 0.5063405039437282, | |
| "learning_rate": 9.538583847008452e-06, | |
| "loss": 0.5183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2585946321487427, | |
| "step": 1105, | |
| "valid_targets_mean": 2670.7, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 3.5576923076923075, | |
| "grad_norm": 0.4878211464301314, | |
| "learning_rate": 9.348534179671202e-06, | |
| "loss": 0.4917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25911808013916016, | |
| "step": 1110, | |
| "valid_targets_mean": 3231.8, | |
| "valid_targets_min": 1715 | |
| }, | |
| { | |
| "epoch": 3.573717948717949, | |
| "grad_norm": 0.48958046086249296, | |
| "learning_rate": 9.159817758631923e-06, | |
| "loss": 0.5104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26951271295547485, | |
| "step": 1115, | |
| "valid_targets_mean": 2618.6, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 0.4839908494296597, | |
| "learning_rate": 8.972458205566168e-06, | |
| "loss": 0.5116, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25361812114715576, | |
| "step": 1120, | |
| "valid_targets_mean": 2789.6, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 3.605769230769231, | |
| "grad_norm": 0.5535168080897457, | |
| "learning_rate": 8.786478972310023e-06, | |
| "loss": 0.5066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26389190554618835, | |
| "step": 1125, | |
| "valid_targets_mean": 2648.0, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 3.621794871794872, | |
| "grad_norm": 0.5080355494863202, | |
| "learning_rate": 8.601903337924646e-06, | |
| "loss": 0.5118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2588704526424408, | |
| "step": 1130, | |
| "valid_targets_mean": 2575.2, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 3.6378205128205128, | |
| "grad_norm": 0.49125868635329756, | |
| "learning_rate": 8.418754405782423e-06, | |
| "loss": 0.5154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24010632932186127, | |
| "step": 1135, | |
| "valid_targets_mean": 3124.4, | |
| "valid_targets_min": 1320 | |
| }, | |
| { | |
| "epoch": 3.6538461538461537, | |
| "grad_norm": 0.522430677893815, | |
| "learning_rate": 8.237055100675092e-06, | |
| "loss": 0.5173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2740614414215088, | |
| "step": 1140, | |
| "valid_targets_mean": 2463.3, | |
| "valid_targets_min": 956 | |
| }, | |
| { | |
| "epoch": 3.6698717948717947, | |
| "grad_norm": 0.5095945655848667, | |
| "learning_rate": 8.056828165944282e-06, | |
| "loss": 0.5138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24778661131858826, | |
| "step": 1145, | |
| "valid_targets_mean": 2634.4, | |
| "valid_targets_min": 1133 | |
| }, | |
| { | |
| "epoch": 3.685897435897436, | |
| "grad_norm": 0.5205507571372584, | |
| "learning_rate": 7.878096160634675e-06, | |
| "loss": 0.5065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2700071632862091, | |
| "step": 1150, | |
| "valid_targets_mean": 2605.7, | |
| "valid_targets_min": 961 | |
| }, | |
| { | |
| "epoch": 3.7019230769230766, | |
| "grad_norm": 0.5022199361254235, | |
| "learning_rate": 7.700881456670342e-06, | |
| "loss": 0.5224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25807714462280273, | |
| "step": 1155, | |
| "valid_targets_mean": 2745.6, | |
| "valid_targets_min": 1467 | |
| }, | |
| { | |
| "epoch": 3.717948717948718, | |
| "grad_norm": 0.5204165070548322, | |
| "learning_rate": 7.525206236054385e-06, | |
| "loss": 0.5144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26541176438331604, | |
| "step": 1160, | |
| "valid_targets_mean": 2881.2, | |
| "valid_targets_min": 1531 | |
| }, | |
| { | |
| "epoch": 3.733974358974359, | |
| "grad_norm": 0.4847397653951549, | |
| "learning_rate": 7.3510924880924575e-06, | |
| "loss": 0.5147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2076713740825653, | |
| "step": 1165, | |
| "valid_targets_mean": 2522.8, | |
| "valid_targets_min": 1030 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.49758404101693104, | |
| "learning_rate": 7.178562006640337e-06, | |
| "loss": 0.5296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2572706639766693, | |
| "step": 1170, | |
| "valid_targets_mean": 2866.5, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 3.766025641025641, | |
| "grad_norm": 0.4865801190784059, | |
| "learning_rate": 7.0076363873759865e-06, | |
| "loss": 0.5041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2694791853427887, | |
| "step": 1175, | |
| "valid_targets_mean": 3112.3, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 3.782051282051282, | |
| "grad_norm": 0.4289474023794107, | |
| "learning_rate": 6.838337025096424e-06, | |
| "loss": 0.4976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24875801801681519, | |
| "step": 1180, | |
| "valid_targets_mean": 3378.5, | |
| "valid_targets_min": 2046 | |
| }, | |
| { | |
| "epoch": 3.7980769230769234, | |
| "grad_norm": 0.49994513336481994, | |
| "learning_rate": 6.67068511103971e-06, | |
| "loss": 0.5071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25759273767471313, | |
| "step": 1185, | |
| "valid_targets_mean": 2810.8, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 3.814102564102564, | |
| "grad_norm": 0.5145087645637461, | |
| "learning_rate": 6.504701630232475e-06, | |
| "loss": 0.5001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24269188940525055, | |
| "step": 1190, | |
| "valid_targets_mean": 2315.5, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 3.8301282051282053, | |
| "grad_norm": 0.5053964635592612, | |
| "learning_rate": 6.340407358863167e-06, | |
| "loss": 0.5106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27107882499694824, | |
| "step": 1195, | |
| "valid_targets_mean": 2939.9, | |
| "valid_targets_min": 1553 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 0.48788700328753865, | |
| "learning_rate": 6.177822861681557e-06, | |
| "loss": 0.512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24716894328594208, | |
| "step": 1200, | |
| "valid_targets_mean": 2915.2, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 3.8621794871794872, | |
| "grad_norm": 0.5375256238281615, | |
| "learning_rate": 6.016968489424572e-06, | |
| "loss": 0.5223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2848437428474426, | |
| "step": 1205, | |
| "valid_targets_mean": 2627.2, | |
| "valid_targets_min": 1600 | |
| }, | |
| { | |
| "epoch": 3.878205128205128, | |
| "grad_norm": 0.44335134280100075, | |
| "learning_rate": 5.857864376269051e-06, | |
| "loss": 0.5028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24449265003204346, | |
| "step": 1210, | |
| "valid_targets_mean": 3266.6, | |
| "valid_targets_min": 1885 | |
| }, | |
| { | |
| "epoch": 3.894230769230769, | |
| "grad_norm": 0.4877168445363961, | |
| "learning_rate": 5.700530437311509e-06, | |
| "loss": 0.5187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2589573860168457, | |
| "step": 1215, | |
| "valid_targets_mean": 2644.8, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 3.91025641025641, | |
| "grad_norm": 0.5115202360691214, | |
| "learning_rate": 5.544986366075371e-06, | |
| "loss": 0.5202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2709648311138153, | |
| "step": 1220, | |
| "valid_targets_mean": 3078.7, | |
| "valid_targets_min": 1332 | |
| }, | |
| { | |
| "epoch": 3.926282051282051, | |
| "grad_norm": 0.46525331541379866, | |
| "learning_rate": 5.39125163204594e-06, | |
| "loss": 0.503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2524239420890808, | |
| "step": 1225, | |
| "valid_targets_mean": 3194.6, | |
| "valid_targets_min": 1622 | |
| }, | |
| { | |
| "epoch": 3.9423076923076925, | |
| "grad_norm": 0.5134129988969063, | |
| "learning_rate": 5.239345478233364e-06, | |
| "loss": 0.5013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27747344970703125, | |
| "step": 1230, | |
| "valid_targets_mean": 2960.1, | |
| "valid_targets_min": 1354 | |
| }, | |
| { | |
| "epoch": 3.9583333333333335, | |
| "grad_norm": 0.490211374373267, | |
| "learning_rate": 5.089286918764031e-06, | |
| "loss": 0.5082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24609319865703583, | |
| "step": 1235, | |
| "valid_targets_mean": 3089.4, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 3.9743589743589745, | |
| "grad_norm": 0.468208756136862, | |
| "learning_rate": 4.941094736500522e-06, | |
| "loss": 0.4994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23819640278816223, | |
| "step": 1240, | |
| "valid_targets_mean": 2995.7, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 3.9903846153846154, | |
| "grad_norm": 0.5090017684601932, | |
| "learning_rate": 4.794787480690597e-06, | |
| "loss": 0.5204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2769983112812042, | |
| "step": 1245, | |
| "valid_targets_mean": 2845.6, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 4.006410256410256, | |
| "grad_norm": 0.47177334327622505, | |
| "learning_rate": 4.650383464645338e-06, | |
| "loss": 0.5117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2771986126899719, | |
| "step": 1250, | |
| "valid_targets_mean": 3190.9, | |
| "valid_targets_min": 1686 | |
| }, | |
| { | |
| "epoch": 4.022435897435898, | |
| "grad_norm": 0.5007827975128015, | |
| "learning_rate": 4.507900763446911e-06, | |
| "loss": 0.5032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2388410121202469, | |
| "step": 1255, | |
| "valid_targets_mean": 2622.6, | |
| "valid_targets_min": 1570 | |
| }, | |
| { | |
| "epoch": 4.038461538461538, | |
| "grad_norm": 0.48832340426080595, | |
| "learning_rate": 4.367357211686072e-06, | |
| "loss": 0.5037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24476832151412964, | |
| "step": 1260, | |
| "valid_targets_mean": 2671.0, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 4.05448717948718, | |
| "grad_norm": 0.4933650082162676, | |
| "learning_rate": 4.228770401229824e-06, | |
| "loss": 0.5025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2521268129348755, | |
| "step": 1265, | |
| "valid_targets_mean": 2734.1, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 4.07051282051282, | |
| "grad_norm": 0.5079581171636282, | |
| "learning_rate": 4.092157679019442e-06, | |
| "loss": 0.4991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26366621255874634, | |
| "step": 1270, | |
| "valid_targets_mean": 3056.3, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 4.086538461538462, | |
| "grad_norm": 0.49596862412646814, | |
| "learning_rate": 3.957536144899123e-06, | |
| "loss": 0.513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24502941966056824, | |
| "step": 1275, | |
| "valid_targets_mean": 2438.4, | |
| "valid_targets_min": 1514 | |
| }, | |
| { | |
| "epoch": 4.102564102564102, | |
| "grad_norm": 0.5145129088905596, | |
| "learning_rate": 3.8249226494756445e-06, | |
| "loss": 0.4966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25848090648651123, | |
| "step": 1280, | |
| "valid_targets_mean": 2680.9, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 4.118589743589744, | |
| "grad_norm": 0.5037020159257446, | |
| "learning_rate": 3.694333792009115e-06, | |
| "loss": 0.5052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22823549807071686, | |
| "step": 1285, | |
| "valid_targets_mean": 2669.3, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 4.134615384615385, | |
| "grad_norm": 0.5060562285691546, | |
| "learning_rate": 3.565785918335292e-06, | |
| "loss": 0.5004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24490630626678467, | |
| "step": 1290, | |
| "valid_targets_mean": 2757.9, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 4.1506410256410255, | |
| "grad_norm": 0.4710171907923074, | |
| "learning_rate": 3.43929511881953e-06, | |
| "loss": 0.4886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2338603436946869, | |
| "step": 1295, | |
| "valid_targets_mean": 2913.7, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 0.555995128304345, | |
| "learning_rate": 3.3148772263427743e-06, | |
| "loss": 0.5163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.275356650352478, | |
| "step": 1300, | |
| "valid_targets_mean": 2518.2, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 4.1826923076923075, | |
| "grad_norm": 0.759342398332849, | |
| "learning_rate": 3.1925478143197418e-06, | |
| "loss": 0.5048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25380346179008484, | |
| "step": 1305, | |
| "valid_targets_mean": 2555.2, | |
| "valid_targets_min": 1653 | |
| }, | |
| { | |
| "epoch": 4.198717948717949, | |
| "grad_norm": 0.5072591530204905, | |
| "learning_rate": 3.0723221947495907e-06, | |
| "loss": 0.5003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21137681603431702, | |
| "step": 1310, | |
| "valid_targets_mean": 2328.9, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 4.214743589743589, | |
| "grad_norm": 0.5180541192988526, | |
| "learning_rate": 2.954215416299331e-06, | |
| "loss": 0.5043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24892708659172058, | |
| "step": 1315, | |
| "valid_targets_mean": 2616.9, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 4.230769230769231, | |
| "grad_norm": 0.5132267950452266, | |
| "learning_rate": 2.838242262420148e-06, | |
| "loss": 0.5036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25727900862693787, | |
| "step": 1320, | |
| "valid_targets_mean": 2797.8, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 4.246794871794872, | |
| "grad_norm": 0.5184018388606338, | |
| "learning_rate": 2.7244172494969978e-06, | |
| "loss": 0.509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24256864190101624, | |
| "step": 1325, | |
| "valid_targets_mean": 2934.1, | |
| "valid_targets_min": 1565 | |
| }, | |
| { | |
| "epoch": 4.262820512820513, | |
| "grad_norm": 0.5252899074313376, | |
| "learning_rate": 2.6127546250315438e-06, | |
| "loss": 0.5098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24076877534389496, | |
| "step": 1330, | |
| "valid_targets_mean": 2428.9, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 4.278846153846154, | |
| "grad_norm": 0.5108531790336086, | |
| "learning_rate": 2.503268365858831e-06, | |
| "loss": 0.5041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2731473445892334, | |
| "step": 1335, | |
| "valid_targets_mean": 2923.9, | |
| "valid_targets_min": 1600 | |
| }, | |
| { | |
| "epoch": 4.294871794871795, | |
| "grad_norm": 0.5321025763349825, | |
| "learning_rate": 2.3959721763977805e-06, | |
| "loss": 0.5123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2777535617351532, | |
| "step": 1340, | |
| "valid_targets_mean": 2700.3, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 4.310897435897436, | |
| "grad_norm": 0.6033522758194312, | |
| "learning_rate": 2.2908794869358044e-06, | |
| "loss": 0.4999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2676275968551636, | |
| "step": 1345, | |
| "valid_targets_mean": 2569.8, | |
| "valid_targets_min": 1370 | |
| }, | |
| { | |
| "epoch": 4.326923076923077, | |
| "grad_norm": 0.5333361636057034, | |
| "learning_rate": 2.188003451947747e-06, | |
| "loss": 0.5061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2521325349807739, | |
| "step": 1350, | |
| "valid_targets_mean": 2529.4, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 4.342948717948718, | |
| "grad_norm": 0.5123708912901537, | |
| "learning_rate": 2.0873569484493305e-06, | |
| "loss": 0.4987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2577575743198395, | |
| "step": 1355, | |
| "valid_targets_mean": 2865.8, | |
| "valid_targets_min": 1345 | |
| }, | |
| { | |
| "epoch": 4.358974358974359, | |
| "grad_norm": 0.5054990888582566, | |
| "learning_rate": 1.9889525743853323e-06, | |
| "loss": 0.4986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2553538978099823, | |
| "step": 1360, | |
| "valid_targets_mean": 2472.9, | |
| "valid_targets_min": 1112 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 0.5678085203826202, | |
| "learning_rate": 1.8928026470526917e-06, | |
| "loss": 0.5114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2596340775489807, | |
| "step": 1365, | |
| "valid_targets_mean": 2628.6, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 4.391025641025641, | |
| "grad_norm": 0.4893241712584916, | |
| "learning_rate": 1.7989192015587776e-06, | |
| "loss": 0.4961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23696500062942505, | |
| "step": 1370, | |
| "valid_targets_mean": 2611.6, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 4.407051282051282, | |
| "grad_norm": 0.5091429153868882, | |
| "learning_rate": 1.7073139893149092e-06, | |
| "loss": 0.4983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2712137699127197, | |
| "step": 1375, | |
| "valid_targets_mean": 2809.8, | |
| "valid_targets_min": 1100 | |
| }, | |
| { | |
| "epoch": 4.423076923076923, | |
| "grad_norm": 0.572290095970069, | |
| "learning_rate": 1.6179984765654743e-06, | |
| "loss": 0.4996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23557832837104797, | |
| "step": 1380, | |
| "valid_targets_mean": 2475.9, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 4.439102564102564, | |
| "grad_norm": 0.5831706000170874, | |
| "learning_rate": 1.5309838429526714e-06, | |
| "loss": 0.5145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23172461986541748, | |
| "step": 1385, | |
| "valid_targets_mean": 2455.8, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 4.455128205128205, | |
| "grad_norm": 0.5140434264325106, | |
| "learning_rate": 1.4462809801171428e-06, | |
| "loss": 0.5168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26323020458221436, | |
| "step": 1390, | |
| "valid_targets_mean": 2694.8, | |
| "valid_targets_min": 1232 | |
| }, | |
| { | |
| "epoch": 4.471153846153846, | |
| "grad_norm": 0.5019615556872697, | |
| "learning_rate": 1.3639004903346954e-06, | |
| "loss": 0.5077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24197980761528015, | |
| "step": 1395, | |
| "valid_targets_mean": 2674.9, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 4.487179487179487, | |
| "grad_norm": 0.5101131304847485, | |
| "learning_rate": 1.2838526851891864e-06, | |
| "loss": 0.5002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25778818130493164, | |
| "step": 1400, | |
| "valid_targets_mean": 2975.2, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 4.503205128205128, | |
| "grad_norm": 0.494280031757431, | |
| "learning_rate": 1.2061475842818337e-06, | |
| "loss": 0.5081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28242138028144836, | |
| "step": 1405, | |
| "valid_targets_mean": 3291.9, | |
| "valid_targets_min": 1955 | |
| }, | |
| { | |
| "epoch": 4.519230769230769, | |
| "grad_norm": 0.4859471337024964, | |
| "learning_rate": 1.1307949139770446e-06, | |
| "loss": 0.4874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636260688304901, | |
| "step": 1410, | |
| "valid_targets_mean": 3025.3, | |
| "valid_targets_min": 1367 | |
| }, | |
| { | |
| "epoch": 4.535256410256411, | |
| "grad_norm": 0.5115875440836855, | |
| "learning_rate": 1.057804106184992e-06, | |
| "loss": 0.5068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23679447174072266, | |
| "step": 1415, | |
| "valid_targets_mean": 2550.2, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 4.551282051282051, | |
| "grad_norm": 0.49992052994826364, | |
| "learning_rate": 9.871842971809853e-07, | |
| "loss": 0.509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28533414006233215, | |
| "step": 1420, | |
| "valid_targets_mean": 2607.3, | |
| "valid_targets_min": 1195 | |
| }, | |
| { | |
| "epoch": 4.5673076923076925, | |
| "grad_norm": 0.4701214147504648, | |
| "learning_rate": 9.189443264619102e-07, | |
| "loss": 0.5015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25728708505630493, | |
| "step": 1425, | |
| "valid_targets_mean": 3328.6, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 4.583333333333333, | |
| "grad_norm": 0.5676405967354015, | |
| "learning_rate": 8.530927356397778e-07, | |
| "loss": 0.5017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25928235054016113, | |
| "step": 1430, | |
| "valid_targets_mean": 2991.0, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 4.5993589743589745, | |
| "grad_norm": 0.5627440521581173, | |
| "learning_rate": 7.896377673725553e-07, | |
| "loss": 0.4988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22346052527427673, | |
| "step": 1435, | |
| "valid_targets_mean": 2614.4, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 0.504256223200527, | |
| "learning_rate": 7.285873643324514e-07, | |
| "loss": 0.5065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2539728283882141, | |
| "step": 1440, | |
| "valid_targets_mean": 2844.8, | |
| "valid_targets_min": 1455 | |
| }, | |
| { | |
| "epoch": 4.631410256410256, | |
| "grad_norm": 0.4925495661414361, | |
| "learning_rate": 6.69949168211721e-07, | |
| "loss": 0.4925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22008481621742249, | |
| "step": 1445, | |
| "valid_targets_mean": 2975.1, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 4.647435897435898, | |
| "grad_norm": 0.5494300003962139, | |
| "learning_rate": 6.137305187661513e-07, | |
| "loss": 0.5076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2878986895084381, | |
| "step": 1450, | |
| "valid_targets_mean": 2772.4, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 4.663461538461538, | |
| "grad_norm": 0.5039636136792486, | |
| "learning_rate": 5.599384528963425e-07, | |
| "loss": 0.5015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26579830050468445, | |
| "step": 1455, | |
| "valid_targets_mean": 2956.1, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 4.67948717948718, | |
| "grad_norm": 0.513557568224406, | |
| "learning_rate": 5.085797037669072e-07, | |
| "loss": 0.5119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26804548501968384, | |
| "step": 1460, | |
| "valid_targets_mean": 2945.6, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 4.69551282051282, | |
| "grad_norm": 0.48755150331371994, | |
| "learning_rate": 4.5966069996365993e-07, | |
| "loss": 0.4999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25493526458740234, | |
| "step": 1465, | |
| "valid_targets_mean": 2965.0, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 4.711538461538462, | |
| "grad_norm": 0.48996510478060334, | |
| "learning_rate": 4.1318756468897047e-07, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26829373836517334, | |
| "step": 1470, | |
| "valid_targets_mean": 3138.4, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 4.727564102564102, | |
| "grad_norm": 0.540400619341247, | |
| "learning_rate": 3.691661149953096e-07, | |
| "loss": 0.5104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25986146926879883, | |
| "step": 1475, | |
| "valid_targets_mean": 2386.2, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 4.743589743589744, | |
| "grad_norm": 0.5070305685741632, | |
| "learning_rate": 3.2760186105712964e-07, | |
| "loss": 0.5052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23506109416484833, | |
| "step": 1480, | |
| "valid_targets_mean": 2560.3, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 4.759615384615385, | |
| "grad_norm": 0.5067194990562207, | |
| "learning_rate": 2.8850000548115155e-07, | |
| "loss": 0.499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23781508207321167, | |
| "step": 1485, | |
| "valid_targets_mean": 2701.9, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 4.7756410256410255, | |
| "grad_norm": 0.5156995328819387, | |
| "learning_rate": 2.518654426551592e-07, | |
| "loss": 0.496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27351832389831543, | |
| "step": 1490, | |
| "valid_targets_mean": 2925.2, | |
| "valid_targets_min": 1015 | |
| }, | |
| { | |
| "epoch": 4.791666666666667, | |
| "grad_norm": 0.5599487612757731, | |
| "learning_rate": 2.1770275813536746e-07, | |
| "loss": 0.497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25763094425201416, | |
| "step": 1495, | |
| "valid_targets_mean": 2683.4, | |
| "valid_targets_min": 1528 | |
| }, | |
| { | |
| "epoch": 4.8076923076923075, | |
| "grad_norm": 0.5385472477141456, | |
| "learning_rate": 1.8601622807244312e-07, | |
| "loss": 0.5143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2503165900707245, | |
| "step": 1500, | |
| "valid_targets_mean": 2487.6, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 4.823717948717949, | |
| "grad_norm": 0.4726991070974343, | |
| "learning_rate": 1.5680981867625566e-07, | |
| "loss": 0.5138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24221260845661163, | |
| "step": 1505, | |
| "valid_targets_mean": 2659.8, | |
| "valid_targets_min": 1354 | |
| }, | |
| { | |
| "epoch": 4.839743589743589, | |
| "grad_norm": 0.5461272895850571, | |
| "learning_rate": 1.3008718571943636e-07, | |
| "loss": 0.5003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2652673125267029, | |
| "step": 1510, | |
| "valid_targets_mean": 2697.6, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 4.855769230769231, | |
| "grad_norm": 0.5253200725248618, | |
| "learning_rate": 1.058516740797777e-07, | |
| "loss": 0.4958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25738656520843506, | |
| "step": 1515, | |
| "valid_targets_mean": 3106.8, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 4.871794871794872, | |
| "grad_norm": 0.47676822865399054, | |
| "learning_rate": 8.410631732155062e-08, | |
| "loss": 0.5006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26309454441070557, | |
| "step": 1520, | |
| "valid_targets_mean": 3213.8, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 4.887820512820513, | |
| "grad_norm": 0.49678497951381617, | |
| "learning_rate": 6.485383731580142e-08, | |
| "loss": 0.4982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.258725643157959, | |
| "step": 1525, | |
| "valid_targets_mean": 2639.6, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 4.903846153846154, | |
| "grad_norm": 0.5295930413362215, | |
| "learning_rate": 4.809664389964441e-08, | |
| "loss": 0.5071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24629537761211395, | |
| "step": 1530, | |
| "valid_targets_mean": 2534.1, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 4.919871794871795, | |
| "grad_norm": 0.5092910461235371, | |
| "learning_rate": 3.383683457463649e-08, | |
| "loss": 0.4957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28113991022109985, | |
| "step": 1535, | |
| "valid_targets_mean": 2743.7, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 4.935897435897436, | |
| "grad_norm": 0.5052772865334241, | |
| "learning_rate": 2.207619424421381e-08, | |
| "loss": 0.507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22167710959911346, | |
| "step": 1540, | |
| "valid_targets_mean": 2326.5, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 4.951923076923077, | |
| "grad_norm": 0.5286114149755486, | |
| "learning_rate": 1.281619499029274e-08, | |
| "loss": 0.5125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25723177194595337, | |
| "step": 1545, | |
| "valid_targets_mean": 2850.4, | |
| "valid_targets_min": 1711 | |
| }, | |
| { | |
| "epoch": 4.967948717948718, | |
| "grad_norm": 0.5176168142039531, | |
| "learning_rate": 6.057995888997248e-09, | |
| "loss": 0.4946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2718939483165741, | |
| "step": 1550, | |
| "valid_targets_mean": 2905.1, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 4.983974358974359, | |
| "grad_norm": 0.5326959721897664, | |
| "learning_rate": 1.8024428655794012e-09, | |
| "loss": 0.5016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3013129234313965, | |
| "step": 1555, | |
| "valid_targets_mean": 3025.9, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5167254136149978, | |
| "learning_rate": 5.00685885418406e-11, | |
| "loss": 0.4965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21225425601005554, | |
| "step": 1560, | |
| "valid_targets_mean": 2312.1, | |
| "valid_targets_min": 769 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21225425601005554, | |
| "step": 1560, | |
| "total_flos": 6.613684893274604e+17, | |
| "train_loss": 0.5546745132177304, | |
| "train_runtime": 11827.5918, | |
| "train_samples_per_second": 4.22, | |
| "train_steps_per_second": 0.132, | |
| "valid_targets_mean": 2312.1, | |
| "valid_targets_min": 769 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.613684893274604e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |