| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.5071225071225074, | |
| "eval_steps": 500, | |
| "global_step": 440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0056657223796034, | |
| "grad_norm": 24.973131796915897, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.8537, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0113314447592068, | |
| "grad_norm": 32.79573813738381, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.0212, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0169971671388102, | |
| "grad_norm": 23.800880905805656, | |
| "learning_rate": 3e-06, | |
| "loss": 2.1456, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0226628895184136, | |
| "grad_norm": 19.091198715081358, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.9808, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.028328611898016998, | |
| "grad_norm": 14.124470348172405, | |
| "learning_rate": 5e-06, | |
| "loss": 2.1825, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0339943342776204, | |
| "grad_norm": 11.461608032959802, | |
| "learning_rate": 6e-06, | |
| "loss": 1.6353, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.039660056657223795, | |
| "grad_norm": 10.354681496346823, | |
| "learning_rate": 7e-06, | |
| "loss": 1.9076, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0453257790368272, | |
| "grad_norm": 10.167669680172194, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.4754, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05099150141643059, | |
| "grad_norm": 7.5541696713086255, | |
| "learning_rate": 9e-06, | |
| "loss": 1.6213, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.056657223796033995, | |
| "grad_norm": 4.087852973173369, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5217, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06232294617563739, | |
| "grad_norm": 4.071392878063137, | |
| "learning_rate": 9.999948174819623e-06, | |
| "loss": 1.6551, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0679886685552408, | |
| "grad_norm": 5.2075402015034, | |
| "learning_rate": 9.999792700352826e-06, | |
| "loss": 1.4474, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07365439093484419, | |
| "grad_norm": 3.6492933345906637, | |
| "learning_rate": 9.999533579822611e-06, | |
| "loss": 1.5585, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07932011331444759, | |
| "grad_norm": 6.482920810973195, | |
| "learning_rate": 9.999170818600562e-06, | |
| "loss": 1.3317, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08498583569405099, | |
| "grad_norm": 4.137365745831386, | |
| "learning_rate": 9.998704424206747e-06, | |
| "loss": 1.4029, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0906515580736544, | |
| "grad_norm": 4.745717244720069, | |
| "learning_rate": 9.998134406309555e-06, | |
| "loss": 1.6586, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09631728045325778, | |
| "grad_norm": 5.4377770096801346, | |
| "learning_rate": 9.997460776725497e-06, | |
| "loss": 1.365, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.10198300283286119, | |
| "grad_norm": 3.317130182493388, | |
| "learning_rate": 9.996683549418964e-06, | |
| "loss": 1.4956, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10764872521246459, | |
| "grad_norm": 1.7845609616841893, | |
| "learning_rate": 9.995802740501933e-06, | |
| "loss": 1.3472, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.11331444759206799, | |
| "grad_norm": 14.387033772755194, | |
| "learning_rate": 9.994818368233639e-06, | |
| "loss": 1.4116, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11898016997167139, | |
| "grad_norm": 6.920700020611593, | |
| "learning_rate": 9.993730453020187e-06, | |
| "loss": 1.2776, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12464589235127478, | |
| "grad_norm": 6.05951274644599, | |
| "learning_rate": 9.99253901741414e-06, | |
| "loss": 1.4433, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.13031161473087818, | |
| "grad_norm": 3.0541449788715935, | |
| "learning_rate": 9.991244086114046e-06, | |
| "loss": 1.3396, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1359773371104816, | |
| "grad_norm": 1.8438099140328046, | |
| "learning_rate": 9.989845685963917e-06, | |
| "loss": 1.3061, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.141643059490085, | |
| "grad_norm": 4.048301070320613, | |
| "learning_rate": 9.988343845952697e-06, | |
| "loss": 1.2283, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14730878186968838, | |
| "grad_norm": 3.5627296346591457, | |
| "learning_rate": 9.986738597213633e-06, | |
| "loss": 1.2865, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1529745042492918, | |
| "grad_norm": 2.237494567304501, | |
| "learning_rate": 9.98502997302365e-06, | |
| "loss": 1.3233, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15864022662889518, | |
| "grad_norm": 3.479719952104877, | |
| "learning_rate": 9.983218008802648e-06, | |
| "loss": 1.3033, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1643059490084986, | |
| "grad_norm": 2.066121083229141, | |
| "learning_rate": 9.98130274211278e-06, | |
| "loss": 1.3326, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16997167138810199, | |
| "grad_norm": 4.090684571263736, | |
| "learning_rate": 9.979284212657658e-06, | |
| "loss": 1.3102, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17563739376770537, | |
| "grad_norm": 2.369637256277251, | |
| "learning_rate": 9.977162462281544e-06, | |
| "loss": 1.4067, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1813031161473088, | |
| "grad_norm": 1.4378564529803546, | |
| "learning_rate": 9.97493753496848e-06, | |
| "loss": 1.2409, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.18696883852691218, | |
| "grad_norm": 1.810353068849482, | |
| "learning_rate": 9.972609476841368e-06, | |
| "loss": 1.2659, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.19263456090651557, | |
| "grad_norm": 2.954930884156565, | |
| "learning_rate": 9.970178336161018e-06, | |
| "loss": 1.3727, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.19830028328611898, | |
| "grad_norm": 2.053307140265503, | |
| "learning_rate": 9.967644163325157e-06, | |
| "loss": 1.3463, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.20396600566572237, | |
| "grad_norm": 1.8032124432327943, | |
| "learning_rate": 9.965007010867366e-06, | |
| "loss": 1.1998, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2096317280453258, | |
| "grad_norm": 1.4952983263862012, | |
| "learning_rate": 9.962266933456008e-06, | |
| "loss": 1.2829, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.21529745042492918, | |
| "grad_norm": 1.3649794008291625, | |
| "learning_rate": 9.959423987893086e-06, | |
| "loss": 1.2056, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.22096317280453256, | |
| "grad_norm": 1.4380773398306634, | |
| "learning_rate": 9.956478233113066e-06, | |
| "loss": 1.29, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.22662889518413598, | |
| "grad_norm": 1.6072540934424309, | |
| "learning_rate": 9.953429730181653e-06, | |
| "loss": 1.2593, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.23229461756373937, | |
| "grad_norm": 1.6010739399694889, | |
| "learning_rate": 9.95027854229454e-06, | |
| "loss": 1.2117, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.23796033994334279, | |
| "grad_norm": 1.2474393925785745, | |
| "learning_rate": 9.947024734776076e-06, | |
| "loss": 1.2022, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.24362606232294617, | |
| "grad_norm": 1.4019264249340568, | |
| "learning_rate": 9.943668375077926e-06, | |
| "loss": 1.2365, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.24929178470254956, | |
| "grad_norm": 1.5087040675714003, | |
| "learning_rate": 9.940209532777666e-06, | |
| "loss": 1.274, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.254957507082153, | |
| "grad_norm": 1.1953570915609946, | |
| "learning_rate": 9.93664827957735e-06, | |
| "loss": 1.2526, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.26062322946175637, | |
| "grad_norm": 1.4826450819224886, | |
| "learning_rate": 9.932984689302012e-06, | |
| "loss": 1.1978, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.26628895184135976, | |
| "grad_norm": 1.1937833972167977, | |
| "learning_rate": 9.929218837898143e-06, | |
| "loss": 1.1816, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2719546742209632, | |
| "grad_norm": 1.1238100782353855, | |
| "learning_rate": 9.925350803432112e-06, | |
| "loss": 1.1931, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2776203966005666, | |
| "grad_norm": 1.3338900623153498, | |
| "learning_rate": 9.921380666088558e-06, | |
| "loss": 1.1978, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.28328611898017, | |
| "grad_norm": 1.3236848667289738, | |
| "learning_rate": 9.917308508168712e-06, | |
| "loss": 1.2551, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.28895184135977336, | |
| "grad_norm": 1.425578635546673, | |
| "learning_rate": 9.913134414088698e-06, | |
| "loss": 1.2441, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.29461756373937675, | |
| "grad_norm": 1.171581674684746, | |
| "learning_rate": 9.908858470377793e-06, | |
| "loss": 1.2369, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3002832861189802, | |
| "grad_norm": 1.1564744150302062, | |
| "learning_rate": 9.904480765676617e-06, | |
| "loss": 1.209, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3059490084985836, | |
| "grad_norm": 1.1357504524893798, | |
| "learning_rate": 9.9000013907353e-06, | |
| "loss": 1.2152, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.311614730878187, | |
| "grad_norm": 1.0498825437855333, | |
| "learning_rate": 9.895420438411616e-06, | |
| "loss": 1.2043, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.31728045325779036, | |
| "grad_norm": 1.6465219316145685, | |
| "learning_rate": 9.890738003669029e-06, | |
| "loss": 1.2289, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.32294617563739375, | |
| "grad_norm": 1.711551232749367, | |
| "learning_rate": 9.885954183574753e-06, | |
| "loss": 1.1831, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3286118980169972, | |
| "grad_norm": 1.2636664413259953, | |
| "learning_rate": 9.881069077297724e-06, | |
| "loss": 1.2061, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3342776203966006, | |
| "grad_norm": 1.4260407982081962, | |
| "learning_rate": 9.876082786106546e-06, | |
| "loss": 1.1998, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.33994334277620397, | |
| "grad_norm": 1.95604739866899, | |
| "learning_rate": 9.870995413367397e-06, | |
| "loss": 1.2215, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.34560906515580736, | |
| "grad_norm": 1.2316545141521473, | |
| "learning_rate": 9.865807064541878e-06, | |
| "loss": 1.1599, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.35127478753541075, | |
| "grad_norm": 1.1178440688886253, | |
| "learning_rate": 9.860517847184837e-06, | |
| "loss": 1.1907, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.35694050991501414, | |
| "grad_norm": 1.305376049095191, | |
| "learning_rate": 9.855127870942131e-06, | |
| "loss": 1.1474, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3626062322946176, | |
| "grad_norm": 1.0495122657744762, | |
| "learning_rate": 9.849637247548356e-06, | |
| "loss": 1.2424, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.36827195467422097, | |
| "grad_norm": 1.141538926125254, | |
| "learning_rate": 9.844046090824533e-06, | |
| "loss": 1.1689, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.37393767705382436, | |
| "grad_norm": 1.26961257521241, | |
| "learning_rate": 9.83835451667574e-06, | |
| "loss": 1.2106, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.37960339943342775, | |
| "grad_norm": 1.081533609255719, | |
| "learning_rate": 9.832562643088724e-06, | |
| "loss": 1.1834, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.38526912181303113, | |
| "grad_norm": 1.443083776392187, | |
| "learning_rate": 9.826670590129442e-06, | |
| "loss": 1.1505, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3909348441926346, | |
| "grad_norm": 1.135777382976375, | |
| "learning_rate": 9.820678479940573e-06, | |
| "loss": 1.1489, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.39660056657223797, | |
| "grad_norm": 1.8779005247112062, | |
| "learning_rate": 9.814586436738998e-06, | |
| "loss": 1.1643, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.40226628895184136, | |
| "grad_norm": 1.7980060811236744, | |
| "learning_rate": 9.808394586813209e-06, | |
| "loss": 1.1594, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.40793201133144474, | |
| "grad_norm": 2.572405910372765, | |
| "learning_rate": 9.802103058520704e-06, | |
| "loss": 1.1854, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.41359773371104813, | |
| "grad_norm": 2.0253448122778606, | |
| "learning_rate": 9.795711982285317e-06, | |
| "loss": 1.1826, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4192634560906516, | |
| "grad_norm": 6.483254642683073, | |
| "learning_rate": 9.78922149059452e-06, | |
| "loss": 1.1646, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.42492917847025496, | |
| "grad_norm": 1.2964281102887218, | |
| "learning_rate": 9.782631717996675e-06, | |
| "loss": 1.2379, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.43059490084985835, | |
| "grad_norm": 1.9517402996335103, | |
| "learning_rate": 9.775942801098241e-06, | |
| "loss": 1.164, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.43626062322946174, | |
| "grad_norm": 3.064531007561859, | |
| "learning_rate": 9.76915487856095e-06, | |
| "loss": 1.1418, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.44192634560906513, | |
| "grad_norm": 1.5009905490397355, | |
| "learning_rate": 9.762268091098926e-06, | |
| "loss": 1.1653, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4475920679886686, | |
| "grad_norm": 1.104518219439204, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 1.2025, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.45325779036827196, | |
| "grad_norm": 7.807500502849419, | |
| "learning_rate": 9.748198494501598e-06, | |
| "loss": 1.148, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.45892351274787535, | |
| "grad_norm": 6.196503908242147, | |
| "learning_rate": 9.741015977030046e-06, | |
| "loss": 1.1819, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.46458923512747874, | |
| "grad_norm": 2.2714978855142736, | |
| "learning_rate": 9.733735177955219e-06, | |
| "loss": 1.1907, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4702549575070821, | |
| "grad_norm": 1.834743890260826, | |
| "learning_rate": 9.72635624820861e-06, | |
| "loss": 1.1381, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.47592067988668557, | |
| "grad_norm": 1.28470626171519, | |
| "learning_rate": 9.71887934075596e-06, | |
| "loss": 1.2079, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.48158640226628896, | |
| "grad_norm": 6.197048819949928, | |
| "learning_rate": 9.711304610594104e-06, | |
| "loss": 1.1272, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.48725212464589235, | |
| "grad_norm": 3.412508821399008, | |
| "learning_rate": 9.703632214747742e-06, | |
| "loss": 1.2382, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.49291784702549574, | |
| "grad_norm": 1.57336480270559, | |
| "learning_rate": 9.695862312266195e-06, | |
| "loss": 1.157, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4985835694050991, | |
| "grad_norm": 7.383065472181884, | |
| "learning_rate": 9.687995064220102e-06, | |
| "loss": 1.1684, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5042492917847026, | |
| "grad_norm": 7.508526165016783, | |
| "learning_rate": 9.680030633698083e-06, | |
| "loss": 1.155, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.509915014164306, | |
| "grad_norm": 9.25317664016253, | |
| "learning_rate": 9.671969185803357e-06, | |
| "loss": 1.1452, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5155807365439093, | |
| "grad_norm": 2.2525643431971876, | |
| "learning_rate": 9.66381088765032e-06, | |
| "loss": 1.1505, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5212464589235127, | |
| "grad_norm": 1.4721293586733248, | |
| "learning_rate": 9.65555590836108e-06, | |
| "loss": 1.1812, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5269121813031161, | |
| "grad_norm": 2.6949100034582103, | |
| "learning_rate": 9.647204419061957e-06, | |
| "loss": 1.1739, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5325779036827195, | |
| "grad_norm": 2.027029228479332, | |
| "learning_rate": 9.638756592879923e-06, | |
| "loss": 1.1335, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5382436260623229, | |
| "grad_norm": 1.8382974162119243, | |
| "learning_rate": 9.630212604939026e-06, | |
| "loss": 1.1298, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5439093484419264, | |
| "grad_norm": 1.2086577711922202, | |
| "learning_rate": 9.621572632356754e-06, | |
| "loss": 1.167, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5495750708215298, | |
| "grad_norm": 1.2819489966676616, | |
| "learning_rate": 9.61283685424036e-06, | |
| "loss": 1.1151, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5552407932011332, | |
| "grad_norm": 1.6800709750196126, | |
| "learning_rate": 9.604005451683154e-06, | |
| "loss": 1.1945, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5609065155807366, | |
| "grad_norm": 1.3375384173734144, | |
| "learning_rate": 9.59507860776075e-06, | |
| "loss": 1.1621, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.56657223796034, | |
| "grad_norm": 2.188062868326175, | |
| "learning_rate": 9.586056507527266e-06, | |
| "loss": 1.1555, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5722379603399433, | |
| "grad_norm": 1.3814102048227788, | |
| "learning_rate": 9.57693933801149e-06, | |
| "loss": 1.1733, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5779036827195467, | |
| "grad_norm": 1.8014483071872645, | |
| "learning_rate": 9.567727288213005e-06, | |
| "loss": 1.1964, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5835694050991501, | |
| "grad_norm": 1.1912746031738484, | |
| "learning_rate": 9.558420549098269e-06, | |
| "loss": 1.2144, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5892351274787535, | |
| "grad_norm": 3.034007485521762, | |
| "learning_rate": 9.549019313596652e-06, | |
| "loss": 1.1321, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5949008498583569, | |
| "grad_norm": 1.866729945439932, | |
| "learning_rate": 9.539523776596446e-06, | |
| "loss": 1.1539, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6005665722379604, | |
| "grad_norm": 1.5773392319922173, | |
| "learning_rate": 9.529934134940819e-06, | |
| "loss": 1.1373, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.6062322946175638, | |
| "grad_norm": 1.6561757646401918, | |
| "learning_rate": 9.520250587423733e-06, | |
| "loss": 1.1788, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6118980169971672, | |
| "grad_norm": 1.2809743948171723, | |
| "learning_rate": 9.510473334785828e-06, | |
| "loss": 1.1509, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6175637393767706, | |
| "grad_norm": 3.3019220495325405, | |
| "learning_rate": 9.500602579710256e-06, | |
| "loss": 1.1879, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.623229461756374, | |
| "grad_norm": 1.5241985081276304, | |
| "learning_rate": 9.490638526818482e-06, | |
| "loss": 1.1114, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6288951841359773, | |
| "grad_norm": 2.053104975498995, | |
| "learning_rate": 9.480581382666041e-06, | |
| "loss": 1.2417, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6345609065155807, | |
| "grad_norm": 1.450461775862418, | |
| "learning_rate": 9.470431355738257e-06, | |
| "loss": 1.0761, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6402266288951841, | |
| "grad_norm": 2.831772615909268, | |
| "learning_rate": 9.460188656445921e-06, | |
| "loss": 1.1684, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6458923512747875, | |
| "grad_norm": 1.5478096558601282, | |
| "learning_rate": 9.449853497120928e-06, | |
| "loss": 1.1695, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6515580736543909, | |
| "grad_norm": 1.6582616402814803, | |
| "learning_rate": 9.439426092011877e-06, | |
| "loss": 1.1099, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6572237960339944, | |
| "grad_norm": 1.0617767973732541, | |
| "learning_rate": 9.428906657279629e-06, | |
| "loss": 1.1584, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6628895184135978, | |
| "grad_norm": 1.6822664727814025, | |
| "learning_rate": 9.418295410992821e-06, | |
| "loss": 1.1527, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6685552407932012, | |
| "grad_norm": 1.1837357577931802, | |
| "learning_rate": 9.407592573123359e-06, | |
| "loss": 1.187, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6742209631728046, | |
| "grad_norm": 1.690006148754325, | |
| "learning_rate": 9.396798365541841e-06, | |
| "loss": 1.1023, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6798866855524079, | |
| "grad_norm": 1.2755747770023382, | |
| "learning_rate": 9.385913012012972e-06, | |
| "loss": 1.1779, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6855524079320113, | |
| "grad_norm": 1.0625930962823409, | |
| "learning_rate": 9.374936738190913e-06, | |
| "loss": 1.1586, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6912181303116147, | |
| "grad_norm": 1.4107647400186194, | |
| "learning_rate": 9.363869771614615e-06, | |
| "loss": 1.1227, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6968838526912181, | |
| "grad_norm": 1.4237393729227041, | |
| "learning_rate": 9.35271234170309e-06, | |
| "loss": 1.1526, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7025495750708215, | |
| "grad_norm": 1.239081728465614, | |
| "learning_rate": 9.341464679750669e-06, | |
| "loss": 1.1676, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.7082152974504249, | |
| "grad_norm": 1.2250609811941313, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 1.1549, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7138810198300283, | |
| "grad_norm": 1.0079463118549998, | |
| "learning_rate": 9.318699594248192e-06, | |
| "loss": 1.0825, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7195467422096318, | |
| "grad_norm": 1.1822482076914111, | |
| "learning_rate": 9.307182642620001e-06, | |
| "loss": 1.1699, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7252124645892352, | |
| "grad_norm": 1.192585782341377, | |
| "learning_rate": 9.295576402784858e-06, | |
| "loss": 1.1864, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7308781869688386, | |
| "grad_norm": 1.1793876225801334, | |
| "learning_rate": 9.283881115340957e-06, | |
| "loss": 1.1592, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7365439093484419, | |
| "grad_norm": 1.4328581990598621, | |
| "learning_rate": 9.272097022732444e-06, | |
| "loss": 1.1264, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7422096317280453, | |
| "grad_norm": 1.4063460821599099, | |
| "learning_rate": 9.260224369244414e-06, | |
| "loss": 1.1582, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7478753541076487, | |
| "grad_norm": 1.3928551806399836, | |
| "learning_rate": 9.248263400997826e-06, | |
| "loss": 1.1036, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7535410764872521, | |
| "grad_norm": 1.0443812793505807, | |
| "learning_rate": 9.236214365944418e-06, | |
| "loss": 1.1809, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7592067988668555, | |
| "grad_norm": 7.4865021772015234, | |
| "learning_rate": 9.224077513861556e-06, | |
| "loss": 1.1432, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7648725212464589, | |
| "grad_norm": 4.687727924279942, | |
| "learning_rate": 9.211853096347059e-06, | |
| "loss": 1.1436, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7705382436260623, | |
| "grad_norm": 1.7813513129483227, | |
| "learning_rate": 9.199541366813984e-06, | |
| "loss": 1.2003, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7762039660056658, | |
| "grad_norm": 1.1574866856711652, | |
| "learning_rate": 9.18714258048537e-06, | |
| "loss": 1.0949, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7818696883852692, | |
| "grad_norm": 1.5923532949818175, | |
| "learning_rate": 9.174656994388957e-06, | |
| "loss": 1.1312, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7875354107648725, | |
| "grad_norm": 1.4090405021331738, | |
| "learning_rate": 9.16208486735184e-06, | |
| "loss": 1.1371, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7932011331444759, | |
| "grad_norm": 1.1066958591085674, | |
| "learning_rate": 9.149426459995127e-06, | |
| "loss": 1.1892, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7988668555240793, | |
| "grad_norm": 1.3806489023187403, | |
| "learning_rate": 9.136682034728508e-06, | |
| "loss": 1.1203, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.8045325779036827, | |
| "grad_norm": 1.4492241915768966, | |
| "learning_rate": 9.123851855744842e-06, | |
| "loss": 1.1606, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.8101983002832861, | |
| "grad_norm": 1.2880006738591805, | |
| "learning_rate": 9.110936189014668e-06, | |
| "loss": 1.1363, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.8158640226628895, | |
| "grad_norm": 1.4252322295071467, | |
| "learning_rate": 9.097935302280682e-06, | |
| "loss": 1.1299, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8215297450424929, | |
| "grad_norm": 1.1051239821774794, | |
| "learning_rate": 9.08484946505221e-06, | |
| "loss": 1.1855, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8271954674220963, | |
| "grad_norm": 1.1582328438262173, | |
| "learning_rate": 9.0716789485996e-06, | |
| "loss": 1.1173, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8328611898016998, | |
| "grad_norm": 1.1514645858243073, | |
| "learning_rate": 9.058424025948609e-06, | |
| "loss": 1.0758, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8385269121813032, | |
| "grad_norm": 1.9099023373890425, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 1.1502, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8441926345609065, | |
| "grad_norm": 1.4883203974156398, | |
| "learning_rate": 9.03166206289754e-06, | |
| "loss": 1.1244, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8498583569405099, | |
| "grad_norm": 1.2439793782301596, | |
| "learning_rate": 9.018155577274891e-06, | |
| "loss": 1.1188, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8555240793201133, | |
| "grad_norm": 0.9842320904106822, | |
| "learning_rate": 9.004565794997209e-06, | |
| "loss": 1.0915, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8611898016997167, | |
| "grad_norm": 1.1256206443075392, | |
| "learning_rate": 8.990892997781661e-06, | |
| "loss": 1.1418, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8668555240793201, | |
| "grad_norm": 1.4668868690697237, | |
| "learning_rate": 8.977137469066321e-06, | |
| "loss": 1.1439, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8725212464589235, | |
| "grad_norm": 1.0357963651071045, | |
| "learning_rate": 8.963299494004292e-06, | |
| "loss": 1.1489, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8781869688385269, | |
| "grad_norm": 1.2279259538562963, | |
| "learning_rate": 8.949379359457795e-06, | |
| "loss": 1.148, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8838526912181303, | |
| "grad_norm": 1.279164021341607, | |
| "learning_rate": 8.935377353992222e-06, | |
| "loss": 1.1291, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8895184135977338, | |
| "grad_norm": 1.0117872914387078, | |
| "learning_rate": 8.921293767870157e-06, | |
| "loss": 1.1029, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8951841359773371, | |
| "grad_norm": 1.0385739682984056, | |
| "learning_rate": 8.907128893045359e-06, | |
| "loss": 1.1378, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.9008498583569405, | |
| "grad_norm": 0.9862798736503189, | |
| "learning_rate": 8.892883023156703e-06, | |
| "loss": 1.1247, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.9065155807365439, | |
| "grad_norm": 1.0052226052209343, | |
| "learning_rate": 8.8785564535221e-06, | |
| "loss": 1.1396, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9121813031161473, | |
| "grad_norm": 1.0025191403649947, | |
| "learning_rate": 8.86414948113237e-06, | |
| "loss": 1.1072, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.9178470254957507, | |
| "grad_norm": 1.0190829556170014, | |
| "learning_rate": 8.849662404645097e-06, | |
| "loss": 1.0692, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.9235127478753541, | |
| "grad_norm": 1.065083676666634, | |
| "learning_rate": 8.835095524378413e-06, | |
| "loss": 1.0839, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.9291784702549575, | |
| "grad_norm": 2.75250829153078, | |
| "learning_rate": 8.820449142304805e-06, | |
| "loss": 1.0976, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9348441926345609, | |
| "grad_norm": 1.11457337735503, | |
| "learning_rate": 8.805723562044825e-06, | |
| "loss": 1.1383, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9405099150141643, | |
| "grad_norm": 1.223823647150824, | |
| "learning_rate": 8.790919088860815e-06, | |
| "loss": 1.1331, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9461756373937678, | |
| "grad_norm": 0.9688685956053592, | |
| "learning_rate": 8.776036029650573e-06, | |
| "loss": 1.1168, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9518413597733711, | |
| "grad_norm": 1.0407006447195224, | |
| "learning_rate": 8.76107469294099e-06, | |
| "loss": 1.1353, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9575070821529745, | |
| "grad_norm": 1.477166466547593, | |
| "learning_rate": 8.746035388881655e-06, | |
| "loss": 1.146, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9631728045325779, | |
| "grad_norm": 1.1923873158431406, | |
| "learning_rate": 8.730918429238429e-06, | |
| "loss": 1.1513, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9688385269121813, | |
| "grad_norm": 1.2104600261128056, | |
| "learning_rate": 8.715724127386971e-06, | |
| "loss": 1.0846, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9745042492917847, | |
| "grad_norm": 1.026649259168152, | |
| "learning_rate": 8.70045279830626e-06, | |
| "loss": 1.0987, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9801699716713881, | |
| "grad_norm": 1.1324270741577538, | |
| "learning_rate": 8.685104758572047e-06, | |
| "loss": 1.1884, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9858356940509915, | |
| "grad_norm": 1.1264630127825281, | |
| "learning_rate": 8.669680326350303e-06, | |
| "loss": 1.1505, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9915014164305949, | |
| "grad_norm": 1.0463584307162723, | |
| "learning_rate": 8.65417982139062e-06, | |
| "loss": 1.1194, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9971671388101983, | |
| "grad_norm": 1.1195551791308074, | |
| "learning_rate": 8.638603565019588e-06, | |
| "loss": 1.1228, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.0113636363636365, | |
| "grad_norm": 1.7869848977800533, | |
| "learning_rate": 8.622951880134122e-06, | |
| "loss": 1.0017, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.0170454545454546, | |
| "grad_norm": 1.8967548711721598, | |
| "learning_rate": 8.60722509119478e-06, | |
| "loss": 1.0646, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.0227272727272727, | |
| "grad_norm": 2.7719840532515856, | |
| "learning_rate": 8.59142352421903e-06, | |
| "loss": 0.9887, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.0284090909090908, | |
| "grad_norm": 1.8480101734746917, | |
| "learning_rate": 8.575547506774498e-06, | |
| "loss": 1.0262, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0340909090909092, | |
| "grad_norm": 1.4999444026158775, | |
| "learning_rate": 8.559597367972168e-06, | |
| "loss": 0.9829, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.0397727272727273, | |
| "grad_norm": 1.38809085421665, | |
| "learning_rate": 8.543573438459573e-06, | |
| "loss": 1.0144, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 1.2624399470463477, | |
| "learning_rate": 8.527476050413922e-06, | |
| "loss": 0.9867, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.0511363636363635, | |
| "grad_norm": 7.342610894443344, | |
| "learning_rate": 8.511305537535238e-06, | |
| "loss": 0.9866, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.0568181818181819, | |
| "grad_norm": 8.705248219538825, | |
| "learning_rate": 8.49506223503941e-06, | |
| "loss": 0.9728, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0625, | |
| "grad_norm": 2.0263962989089936, | |
| "learning_rate": 8.47874647965128e-06, | |
| "loss": 0.9965, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0681818181818181, | |
| "grad_norm": 2.13351438929688, | |
| "learning_rate": 8.462358609597629e-06, | |
| "loss": 1.0024, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.0738636363636365, | |
| "grad_norm": 2.0005753741817736, | |
| "learning_rate": 8.445898964600188e-06, | |
| "loss": 0.993, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0795454545454546, | |
| "grad_norm": 2.084050032615475, | |
| "learning_rate": 8.429367885868582e-06, | |
| "loss": 0.9958, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0852272727272727, | |
| "grad_norm": 1.7516330808766072, | |
| "learning_rate": 8.412765716093273e-06, | |
| "loss": 1.0554, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.2861019981619892, | |
| "learning_rate": 8.396092799438429e-06, | |
| "loss": 1.013, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0965909090909092, | |
| "grad_norm": 1.4381225932886976, | |
| "learning_rate": 8.379349481534822e-06, | |
| "loss": 0.9797, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.1022727272727273, | |
| "grad_norm": 1.8623594079891328, | |
| "learning_rate": 8.362536109472637e-06, | |
| "loss": 1.0018, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.1079545454545454, | |
| "grad_norm": 1.5115381108478676, | |
| "learning_rate": 8.345653031794292e-06, | |
| "loss": 1.016, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.1136363636363635, | |
| "grad_norm": 1.193026650866575, | |
| "learning_rate": 8.328700598487203e-06, | |
| "loss": 0.9977, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.1193181818181819, | |
| "grad_norm": 1.080840404605079, | |
| "learning_rate": 8.31167916097654e-06, | |
| "loss": 0.9982, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "grad_norm": 1.244418182887263, | |
| "learning_rate": 8.294589072117925e-06, | |
| "loss": 1.0206, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.1306818181818181, | |
| "grad_norm": 1.054116651622593, | |
| "learning_rate": 8.277430686190137e-06, | |
| "loss": 0.9932, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 1.6708346020909142, | |
| "learning_rate": 8.260204358887753e-06, | |
| "loss": 0.9867, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.1420454545454546, | |
| "grad_norm": 1.764380671950815, | |
| "learning_rate": 8.24291044731378e-06, | |
| "loss": 1.0255, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1477272727272727, | |
| "grad_norm": 1.4610852940462264, | |
| "learning_rate": 8.225549309972256e-06, | |
| "loss": 1.0016, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.1534090909090908, | |
| "grad_norm": 1.3465974910520928, | |
| "learning_rate": 8.208121306760806e-06, | |
| "loss": 0.9942, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.1590909090909092, | |
| "grad_norm": 3.407109598217383, | |
| "learning_rate": 8.190626798963198e-06, | |
| "loss": 0.9595, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.1647727272727273, | |
| "grad_norm": 3.4569449045424228, | |
| "learning_rate": 8.173066149241839e-06, | |
| "loss": 0.9679, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1704545454545454, | |
| "grad_norm": 3.5722389574790623, | |
| "learning_rate": 8.155439721630265e-06, | |
| "loss": 1.0112, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.1761363636363638, | |
| "grad_norm": 1.7368368324960894, | |
| "learning_rate": 8.137747881525593e-06, | |
| "loss": 0.9658, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 3.5425491105943365, | |
| "learning_rate": 8.119990995680942e-06, | |
| "loss": 1.0097, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.1875, | |
| "grad_norm": 4.277519958399436, | |
| "learning_rate": 8.102169432197842e-06, | |
| "loss": 1.0525, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1931818181818181, | |
| "grad_norm": 1.5253776819790414, | |
| "learning_rate": 8.084283560518584e-06, | |
| "loss": 1.0257, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1988636363636362, | |
| "grad_norm": 2.393941181872517, | |
| "learning_rate": 8.066333751418582e-06, | |
| "loss": 0.9519, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.2045454545454546, | |
| "grad_norm": 1.8648154402777406, | |
| "learning_rate": 8.048320376998675e-06, | |
| "loss": 1.0314, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.2102272727272727, | |
| "grad_norm": 1.1560926115738988, | |
| "learning_rate": 8.030243810677408e-06, | |
| "loss": 1.0079, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.2159090909090908, | |
| "grad_norm": 1.9861708806007312, | |
| "learning_rate": 8.012104427183313e-06, | |
| "loss": 0.9712, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.2215909090909092, | |
| "grad_norm": 1.6176603802315128, | |
| "learning_rate": 7.993902602547113e-06, | |
| "loss": 1.0604, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 1.206136483858858, | |
| "learning_rate": 7.97563871409395e-06, | |
| "loss": 0.9968, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.2329545454545454, | |
| "grad_norm": 1.0849650106469113, | |
| "learning_rate": 7.957313140435545e-06, | |
| "loss": 1.0013, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.2386363636363638, | |
| "grad_norm": 1.2530592258144626, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 1.0392, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.2443181818181819, | |
| "grad_norm": 1.4528013728950318, | |
| "learning_rate": 7.920478458335738e-06, | |
| "loss": 0.945, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.1182010469150763, | |
| "learning_rate": 7.901970113479956e-06, | |
| "loss": 0.9755, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.2556818181818181, | |
| "grad_norm": 1.274158214216111, | |
| "learning_rate": 7.883401610574338e-06, | |
| "loss": 0.9827, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2613636363636362, | |
| "grad_norm": 1.4460645426911298, | |
| "learning_rate": 7.86477333454529e-06, | |
| "loss": 1.0233, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.2670454545454546, | |
| "grad_norm": 1.004043430975716, | |
| "learning_rate": 7.84608567155832e-06, | |
| "loss": 0.988, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 1.1277928768546195, | |
| "learning_rate": 7.82733900901003e-06, | |
| "loss": 1.0092, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2784090909090908, | |
| "grad_norm": 1.30174465678015, | |
| "learning_rate": 7.808533735520087e-06, | |
| "loss": 1.0023, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2840909090909092, | |
| "grad_norm": 1.155122280361969, | |
| "learning_rate": 7.789670240923169e-06, | |
| "loss": 0.9938, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.2897727272727273, | |
| "grad_norm": 1.1535920929699675, | |
| "learning_rate": 7.770748916260875e-06, | |
| "loss": 1.0215, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2954545454545454, | |
| "grad_norm": 1.7495637702269113, | |
| "learning_rate": 7.751770153773635e-06, | |
| "loss": 0.9776, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.3011363636363638, | |
| "grad_norm": 1.2776922576240242, | |
| "learning_rate": 7.732734346892561e-06, | |
| "loss": 0.9716, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.3068181818181819, | |
| "grad_norm": 1.3172404492877499, | |
| "learning_rate": 7.71364189023131e-06, | |
| "loss": 0.9928, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.3125, | |
| "grad_norm": 1.0320305867343866, | |
| "learning_rate": 7.69449317957788e-06, | |
| "loss": 0.9544, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 0.9917633137560159, | |
| "learning_rate": 7.675288611886423e-06, | |
| "loss": 0.9762, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.3238636363636362, | |
| "grad_norm": 0.8750459875550817, | |
| "learning_rate": 7.656028585269017e-06, | |
| "loss": 0.9649, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.3295454545454546, | |
| "grad_norm": 1.0172245413205394, | |
| "learning_rate": 7.636713498987405e-06, | |
| "loss": 0.9915, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.3352272727272727, | |
| "grad_norm": 1.1026610095660114, | |
| "learning_rate": 7.617343753444714e-06, | |
| "loss": 0.9167, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.3409090909090908, | |
| "grad_norm": 0.9838674494365538, | |
| "learning_rate": 7.597919750177168e-06, | |
| "loss": 0.9978, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.3465909090909092, | |
| "grad_norm": 0.9922575875228704, | |
| "learning_rate": 7.5784418918457605e-06, | |
| "loss": 1.0052, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.3522727272727273, | |
| "grad_norm": 0.9776223871792626, | |
| "learning_rate": 7.5589105822278944e-06, | |
| "loss": 1.0096, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.3579545454545454, | |
| "grad_norm": 1.4258305295766374, | |
| "learning_rate": 7.539326226209032e-06, | |
| "loss": 1.0458, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 1.0015058561164187, | |
| "learning_rate": 7.519689229774282e-06, | |
| "loss": 1.0248, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.3693181818181819, | |
| "grad_norm": 1.0082049852889665, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.9766, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.375, | |
| "grad_norm": 0.904307095617801, | |
| "learning_rate": 7.4802589450453415e-06, | |
| "loss": 1.029, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.3806818181818181, | |
| "grad_norm": 0.9709949750288794, | |
| "learning_rate": 7.4604664741437975e-06, | |
| "loss": 0.9803, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.3863636363636362, | |
| "grad_norm": 0.9137049440782995, | |
| "learning_rate": 7.440622997594718e-06, | |
| "loss": 0.9838, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3920454545454546, | |
| "grad_norm": 0.955522616879317, | |
| "learning_rate": 7.420728926754803e-06, | |
| "loss": 0.9841, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3977272727272727, | |
| "grad_norm": 0.8924545271105511, | |
| "learning_rate": 7.400784674029579e-06, | |
| "loss": 0.9747, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.4034090909090908, | |
| "grad_norm": 0.9275527221675671, | |
| "learning_rate": 7.380790652864842e-06, | |
| "loss": 1.0203, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 0.9480980891308645, | |
| "learning_rate": 7.360747277738094e-06, | |
| "loss": 0.9923, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.4147727272727273, | |
| "grad_norm": 0.8427849664059336, | |
| "learning_rate": 7.340654964149947e-06, | |
| "loss": 0.9806, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.4204545454545454, | |
| "grad_norm": 0.9076953250803492, | |
| "learning_rate": 7.320514128615511e-06, | |
| "loss": 0.9982, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.4261363636363638, | |
| "grad_norm": 1.0540250139165377, | |
| "learning_rate": 7.300325188655762e-06, | |
| "loss": 0.9902, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.4318181818181819, | |
| "grad_norm": 0.9954503040475974, | |
| "learning_rate": 7.280088562788879e-06, | |
| "loss": 0.9809, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.4375, | |
| "grad_norm": 0.9967393104089797, | |
| "learning_rate": 7.259804670521579e-06, | |
| "loss": 1.0, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.4431818181818181, | |
| "grad_norm": 0.9891797210154472, | |
| "learning_rate": 7.2394739323404105e-06, | |
| "loss": 1.0005, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.4488636363636362, | |
| "grad_norm": 1.1178308003268749, | |
| "learning_rate": 7.219096769703045e-06, | |
| "loss": 0.9868, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 1.0000809761609377, | |
| "learning_rate": 7.198673605029529e-06, | |
| "loss": 0.9648, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.4602272727272727, | |
| "grad_norm": 0.9396228245111997, | |
| "learning_rate": 7.178204861693546e-06, | |
| "loss": 1.0009, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.4659090909090908, | |
| "grad_norm": 1.055214770002229, | |
| "learning_rate": 7.15769096401362e-06, | |
| "loss": 0.9478, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.4715909090909092, | |
| "grad_norm": 1.0750160280057304, | |
| "learning_rate": 7.137132337244329e-06, | |
| "loss": 0.958, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.4772727272727273, | |
| "grad_norm": 1.0648150711699151, | |
| "learning_rate": 7.116529407567489e-06, | |
| "loss": 0.9828, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.4829545454545454, | |
| "grad_norm": 1.1192077304577122, | |
| "learning_rate": 7.095882602083321e-06, | |
| "loss": 0.9707, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4886363636363638, | |
| "grad_norm": 1.1092309283046025, | |
| "learning_rate": 7.075192348801591e-06, | |
| "loss": 0.9842, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4943181818181819, | |
| "grad_norm": 1.0585087928308756, | |
| "learning_rate": 7.054459076632742e-06, | |
| "loss": 1.0636, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.041991357364786, | |
| "learning_rate": 7.033683215379002e-06, | |
| "loss": 0.9753, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.5056818181818183, | |
| "grad_norm": 0.9720414152268064, | |
| "learning_rate": 7.012865195725473e-06, | |
| "loss": 0.9916, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.5113636363636362, | |
| "grad_norm": 1.1265716150738212, | |
| "learning_rate": 6.9920054492312086e-06, | |
| "loss": 1.0678, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.5170454545454546, | |
| "grad_norm": 1.0711823881169122, | |
| "learning_rate": 6.971104408320253e-06, | |
| "loss": 0.9776, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.5227272727272727, | |
| "grad_norm": 1.1256078273217827, | |
| "learning_rate": 6.950162506272697e-06, | |
| "loss": 0.9904, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.5284090909090908, | |
| "grad_norm": 0.9811471547098307, | |
| "learning_rate": 6.9291801772156775e-06, | |
| "loss": 0.987, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.5340909090909092, | |
| "grad_norm": 1.205853115403329, | |
| "learning_rate": 6.9081578561143924e-06, | |
| "loss": 0.9352, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.5397727272727273, | |
| "grad_norm": 0.9564252171879485, | |
| "learning_rate": 6.887095978763072e-06, | |
| "loss": 1.0099, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 0.9739638011221726, | |
| "learning_rate": 6.865994981775958e-06, | |
| "loss": 0.9186, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.5511363636363638, | |
| "grad_norm": 1.3776679228140132, | |
| "learning_rate": 6.844855302578236e-06, | |
| "loss": 1.0077, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.5568181818181817, | |
| "grad_norm": 1.0125445825014543, | |
| "learning_rate": 6.823677379396984e-06, | |
| "loss": 0.9993, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.5625, | |
| "grad_norm": 0.9892499359106408, | |
| "learning_rate": 6.802461651252073e-06, | |
| "loss": 0.9571, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.5681818181818183, | |
| "grad_norm": 1.0831674501266864, | |
| "learning_rate": 6.781208557947085e-06, | |
| "loss": 1.0061, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.5738636363636362, | |
| "grad_norm": 0.9356751500366064, | |
| "learning_rate": 6.759918540060173e-06, | |
| "loss": 0.979, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.5795454545454546, | |
| "grad_norm": 1.0557115003350075, | |
| "learning_rate": 6.738592038934946e-06, | |
| "loss": 0.9961, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.5852272727272727, | |
| "grad_norm": 1.2599637679261655, | |
| "learning_rate": 6.717229496671307e-06, | |
| "loss": 0.9753, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 1.0507134323091725, | |
| "learning_rate": 6.6958313561163046e-06, | |
| "loss": 0.9425, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.5965909090909092, | |
| "grad_norm": 0.9631905231298211, | |
| "learning_rate": 6.674398060854931e-06, | |
| "loss": 1.055, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6022727272727273, | |
| "grad_norm": 0.9131560827453628, | |
| "learning_rate": 6.652930055200948e-06, | |
| "loss": 0.9929, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.6079545454545454, | |
| "grad_norm": 0.9138134537225251, | |
| "learning_rate": 6.631427784187658e-06, | |
| "loss": 0.952, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.6136363636363638, | |
| "grad_norm": 0.9436608998471452, | |
| "learning_rate": 6.609891693558692e-06, | |
| "loss": 1.0371, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.6193181818181817, | |
| "grad_norm": 1.077730549555469, | |
| "learning_rate": 6.588322229758764e-06, | |
| "loss": 1.0231, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.625, | |
| "grad_norm": 0.8542525239275349, | |
| "learning_rate": 6.566719839924412e-06, | |
| "loss": 0.9908, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.6306818181818183, | |
| "grad_norm": 0.9390889918397101, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.9965, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 1.3403721698995363, | |
| "learning_rate": 6.523418074102117e-06, | |
| "loss": 0.9865, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.6420454545454546, | |
| "grad_norm": 0.9787534693003979, | |
| "learning_rate": 6.501719595762903e-06, | |
| "loss": 0.995, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.6477272727272727, | |
| "grad_norm": 0.8866152592349634, | |
| "learning_rate": 6.479989986668118e-06, | |
| "loss": 0.9846, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.6534090909090908, | |
| "grad_norm": 0.8915138418235523, | |
| "learning_rate": 6.458229697274125e-06, | |
| "loss": 1.0373, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.6590909090909092, | |
| "grad_norm": 0.9633872591030624, | |
| "learning_rate": 6.436439178673296e-06, | |
| "loss": 0.9864, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.6647727272727273, | |
| "grad_norm": 0.9836814915125117, | |
| "learning_rate": 6.41461888258465e-06, | |
| "loss": 0.9555, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.6704545454545454, | |
| "grad_norm": 0.9708188501717393, | |
| "learning_rate": 6.392769261344502e-06, | |
| "loss": 0.9448, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.6761363636363638, | |
| "grad_norm": 0.8777800692748914, | |
| "learning_rate": 6.370890767897078e-06, | |
| "loss": 1.0044, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 1.0244121250661828, | |
| "learning_rate": 6.348983855785122e-06, | |
| "loss": 0.9802, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.6875, | |
| "grad_norm": 1.0027302545771752, | |
| "learning_rate": 6.3270489791405055e-06, | |
| "loss": 0.9562, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.6931818181818183, | |
| "grad_norm": 1.7051161806513946, | |
| "learning_rate": 6.305086592674802e-06, | |
| "loss": 0.9892, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.6988636363636362, | |
| "grad_norm": 1.12580729447642, | |
| "learning_rate": 6.283097151669869e-06, | |
| "loss": 0.9821, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.7045454545454546, | |
| "grad_norm": 0.9839470381373491, | |
| "learning_rate": 6.261081111968403e-06, | |
| "loss": 0.9916, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.7102272727272727, | |
| "grad_norm": 1.0613072641616672, | |
| "learning_rate": 6.2390389299645e-06, | |
| "loss": 0.9783, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7159090909090908, | |
| "grad_norm": 0.9792881716793711, | |
| "learning_rate": 6.216971062594179e-06, | |
| "loss": 1.0007, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.7215909090909092, | |
| "grad_norm": 1.1054016241161089, | |
| "learning_rate": 6.1948779673259256e-06, | |
| "loss": 1.0079, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 1.2013950643084332, | |
| "learning_rate": 6.172760102151195e-06, | |
| "loss": 1.0137, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.7329545454545454, | |
| "grad_norm": 1.0486842583129228, | |
| "learning_rate": 6.1506179255749335e-06, | |
| "loss": 0.9611, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.7386363636363638, | |
| "grad_norm": 0.9879084512426718, | |
| "learning_rate": 6.128451896606054e-06, | |
| "loss": 0.987, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.7443181818181817, | |
| "grad_norm": 0.8702171126549813, | |
| "learning_rate": 6.106262474747939e-06, | |
| "loss": 1.0354, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.9479994120475482, | |
| "learning_rate": 6.084050119988905e-06, | |
| "loss": 0.9687, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.7556818181818183, | |
| "grad_norm": 0.841865035975423, | |
| "learning_rate": 6.061815292792666e-06, | |
| "loss": 0.9692, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.7613636363636362, | |
| "grad_norm": 1.1986107322286728, | |
| "learning_rate": 6.039558454088796e-06, | |
| "loss": 0.9869, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.7670454545454546, | |
| "grad_norm": 0.9606223972077408, | |
| "learning_rate": 6.0172800652631706e-06, | |
| "loss": 1.0164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 0.8967627253652938, | |
| "learning_rate": 5.994980588148391e-06, | |
| "loss": 1.043, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.7784090909090908, | |
| "grad_norm": 0.7941576266062421, | |
| "learning_rate": 5.972660485014231e-06, | |
| "loss": 0.9485, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.7840909090909092, | |
| "grad_norm": 1.0936763123716517, | |
| "learning_rate": 5.950320218558037e-06, | |
| "loss": 0.9886, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.7897727272727273, | |
| "grad_norm": 1.0795280588915757, | |
| "learning_rate": 5.927960251895146e-06, | |
| "loss": 1.0174, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.7954545454545454, | |
| "grad_norm": 0.8880700856278866, | |
| "learning_rate": 5.905581048549279e-06, | |
| "loss": 0.9825, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.8011363636363638, | |
| "grad_norm": 0.8742464433982793, | |
| "learning_rate": 5.883183072442938e-06, | |
| "loss": 0.9392, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.8068181818181817, | |
| "grad_norm": 0.9015845437433646, | |
| "learning_rate": 5.860766787887781e-06, | |
| "loss": 0.9507, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.8125, | |
| "grad_norm": 0.8777902350206828, | |
| "learning_rate": 5.838332659575005e-06, | |
| "loss": 1.0214, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.9432419707404883, | |
| "learning_rate": 5.815881152565712e-06, | |
| "loss": 0.9913, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.8238636363636362, | |
| "grad_norm": 1.554034736388586, | |
| "learning_rate": 5.793412732281258e-06, | |
| "loss": 0.9762, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.8295454545454546, | |
| "grad_norm": 0.9581038943273897, | |
| "learning_rate": 5.7709278644936164e-06, | |
| "loss": 0.9848, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.8352272727272727, | |
| "grad_norm": 0.8898637306384684, | |
| "learning_rate": 5.7484270153157215e-06, | |
| "loss": 0.9396, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.8409090909090908, | |
| "grad_norm": 1.0203919143753812, | |
| "learning_rate": 5.725910651191798e-06, | |
| "loss": 1.0037, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.8465909090909092, | |
| "grad_norm": 0.8907537657379099, | |
| "learning_rate": 5.703379238887703e-06, | |
| "loss": 0.9609, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.8522727272727273, | |
| "grad_norm": 1.114214216754724, | |
| "learning_rate": 5.680833245481234e-06, | |
| "loss": 0.9412, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.8579545454545454, | |
| "grad_norm": 1.0249614863719094, | |
| "learning_rate": 5.6582731383524625e-06, | |
| "loss": 1.0452, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 0.9715196988270898, | |
| "learning_rate": 5.63569938517404e-06, | |
| "loss": 1.0453, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.8693181818181817, | |
| "grad_norm": 1.1613903786334339, | |
| "learning_rate": 5.613112453901493e-06, | |
| "loss": 0.9735, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 1.059608988677026, | |
| "learning_rate": 5.590512812763541e-06, | |
| "loss": 0.9618, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.8806818181818183, | |
| "grad_norm": 1.0952964220643884, | |
| "learning_rate": 5.567900930252375e-06, | |
| "loss": 0.9793, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.8863636363636362, | |
| "grad_norm": 1.014146750998599, | |
| "learning_rate": 5.5452772751139496e-06, | |
| "loss": 0.9863, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.8920454545454546, | |
| "grad_norm": 0.9663339556094782, | |
| "learning_rate": 5.522642316338268e-06, | |
| "loss": 1.0089, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.8977272727272727, | |
| "grad_norm": 0.9872369642699137, | |
| "learning_rate": 5.49999652314966e-06, | |
| "loss": 1.0105, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.9034090909090908, | |
| "grad_norm": 0.9388637738282897, | |
| "learning_rate": 5.477340364997051e-06, | |
| "loss": 0.9993, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 1.005111659331097, | |
| "learning_rate": 5.454674311544236e-06, | |
| "loss": 1.024, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.9147727272727273, | |
| "grad_norm": 1.1189249784542552, | |
| "learning_rate": 5.431998832660136e-06, | |
| "loss": 0.9167, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.9204545454545454, | |
| "grad_norm": 0.8754985353482484, | |
| "learning_rate": 5.409314398409067e-06, | |
| "loss": 0.9509, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.9261363636363638, | |
| "grad_norm": 1.0077105144422567, | |
| "learning_rate": 5.386621479040985e-06, | |
| "loss": 0.9802, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.9318181818181817, | |
| "grad_norm": 1.014077284312571, | |
| "learning_rate": 5.363920544981749e-06, | |
| "loss": 1.0046, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.9375, | |
| "grad_norm": 0.8813929725147835, | |
| "learning_rate": 5.341212066823356e-06, | |
| "loss": 1.006, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.9431818181818183, | |
| "grad_norm": 0.9749444900176537, | |
| "learning_rate": 5.3184965153142e-06, | |
| "loss": 0.987, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.9488636363636362, | |
| "grad_norm": 0.9433156213620226, | |
| "learning_rate": 5.295774361349299e-06, | |
| "loss": 0.9846, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 0.9268456057648533, | |
| "learning_rate": 5.27304607596055e-06, | |
| "loss": 0.9845, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.9602272727272727, | |
| "grad_norm": 0.8554873129583374, | |
| "learning_rate": 5.250312130306946e-06, | |
| "loss": 0.9835, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.9659090909090908, | |
| "grad_norm": 1.018982780208351, | |
| "learning_rate": 5.227572995664819e-06, | |
| "loss": 0.9825, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.9715909090909092, | |
| "grad_norm": 0.9391997048223797, | |
| "learning_rate": 5.204829143418072e-06, | |
| "loss": 1.0199, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.9772727272727273, | |
| "grad_norm": 1.0146418881124983, | |
| "learning_rate": 5.182081045048404e-06, | |
| "loss": 1.0376, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.9829545454545454, | |
| "grad_norm": 1.0574567491158355, | |
| "learning_rate": 5.159329172125533e-06, | |
| "loss": 0.9434, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.9886363636363638, | |
| "grad_norm": 0.8123284335215641, | |
| "learning_rate": 5.136573996297431e-06, | |
| "loss": 0.9802, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.9943181818181817, | |
| "grad_norm": 0.9618851741092689, | |
| "learning_rate": 5.113815989280528e-06, | |
| "loss": 1.0419, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.8632945643175781, | |
| "learning_rate": 5.091055622849958e-06, | |
| "loss": 0.976, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.005698005698006, | |
| "grad_norm": 1.6043377134817856, | |
| "learning_rate": 5.068293368829755e-06, | |
| "loss": 0.8913, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.011396011396011, | |
| "grad_norm": 1.3331364304662667, | |
| "learning_rate": 5.045529699083092e-06, | |
| "loss": 0.8424, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 1.100343372994173, | |
| "learning_rate": 5.022765085502478e-06, | |
| "loss": 0.8664, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.022792022792023, | |
| "grad_norm": 1.2647408619538267, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8975, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.0284900284900287, | |
| "grad_norm": 1.3692030374819484, | |
| "learning_rate": 4.977234914497522e-06, | |
| "loss": 0.8659, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 1.04165152843705, | |
| "learning_rate": 4.9544703009169115e-06, | |
| "loss": 0.8465, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.03988603988604, | |
| "grad_norm": 1.069447973622135, | |
| "learning_rate": 4.931706631170246e-06, | |
| "loss": 0.8254, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.0455840455840457, | |
| "grad_norm": 1.1882943942044963, | |
| "learning_rate": 4.9089443771500435e-06, | |
| "loss": 0.8759, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.9445235142025882, | |
| "learning_rate": 4.886184010719472e-06, | |
| "loss": 0.8761, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.056980056980057, | |
| "grad_norm": 0.9617221724763185, | |
| "learning_rate": 4.863426003702572e-06, | |
| "loss": 0.822, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.0626780626780628, | |
| "grad_norm": 0.9901232814378744, | |
| "learning_rate": 4.840670827874468e-06, | |
| "loss": 0.8423, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 0.8710776051974528, | |
| "learning_rate": 4.817918954951598e-06, | |
| "loss": 0.8415, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 1.2482792899259578, | |
| "learning_rate": 4.795170856581929e-06, | |
| "loss": 0.8921, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.07977207977208, | |
| "grad_norm": 1.1169049347453446, | |
| "learning_rate": 4.772427004335183e-06, | |
| "loss": 0.8731, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 1.0557231424552356, | |
| "learning_rate": 4.749687869693056e-06, | |
| "loss": 0.8622, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.091168091168091, | |
| "grad_norm": 0.9181343036612701, | |
| "learning_rate": 4.7269539240394505e-06, | |
| "loss": 0.8653, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.096866096866097, | |
| "grad_norm": 0.9543401797100639, | |
| "learning_rate": 4.7042256386507e-06, | |
| "loss": 0.8419, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 1.192131842860604, | |
| "learning_rate": 4.681503484685803e-06, | |
| "loss": 0.9153, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.1082621082621085, | |
| "grad_norm": 0.9650701175336839, | |
| "learning_rate": 4.6587879331766465e-06, | |
| "loss": 0.8422, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.113960113960114, | |
| "grad_norm": 0.9343115020962703, | |
| "learning_rate": 4.636079455018253e-06, | |
| "loss": 0.8433, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 0.9058357605337869, | |
| "learning_rate": 4.613378520959016e-06, | |
| "loss": 0.8587, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.1253561253561255, | |
| "grad_norm": 0.9303289966062062, | |
| "learning_rate": 4.5906856015909365e-06, | |
| "loss": 0.8799, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.131054131054131, | |
| "grad_norm": 0.9993338551104146, | |
| "learning_rate": 4.568001167339866e-06, | |
| "loss": 0.8789, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 1.003313234824171, | |
| "learning_rate": 4.545325688455766e-06, | |
| "loss": 0.8285, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.1424501424501425, | |
| "grad_norm": 0.9365672809002463, | |
| "learning_rate": 4.52265963500295e-06, | |
| "loss": 0.8561, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 0.8712507036248811, | |
| "learning_rate": 4.500003476850341e-06, | |
| "loss": 0.8262, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.9228004881023822, | |
| "learning_rate": 4.477357683661734e-06, | |
| "loss": 0.8766, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.1595441595441596, | |
| "grad_norm": 1.057083805253911, | |
| "learning_rate": 4.454722724886051e-06, | |
| "loss": 0.8653, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.1652421652421654, | |
| "grad_norm": 0.9682059205532203, | |
| "learning_rate": 4.432099069747625e-06, | |
| "loss": 0.8305, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 0.7938300778290989, | |
| "learning_rate": 4.40948718723646e-06, | |
| "loss": 0.8526, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.1766381766381766, | |
| "grad_norm": 0.992854757801764, | |
| "learning_rate": 4.386887546098509e-06, | |
| "loss": 0.7915, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.1823361823361824, | |
| "grad_norm": 1.1405534353610247, | |
| "learning_rate": 4.364300614825963e-06, | |
| "loss": 0.8756, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 0.9074206322121355, | |
| "learning_rate": 4.341726861647537e-06, | |
| "loss": 0.8786, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.1937321937321936, | |
| "grad_norm": 0.9106405803513904, | |
| "learning_rate": 4.319166754518768e-06, | |
| "loss": 0.8736, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.1994301994301995, | |
| "grad_norm": 0.9498694178857152, | |
| "learning_rate": 4.296620761112299e-06, | |
| "loss": 0.8382, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.9662171207890898, | |
| "learning_rate": 4.274089348808202e-06, | |
| "loss": 0.846, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.2108262108262107, | |
| "grad_norm": 0.9597347828021979, | |
| "learning_rate": 4.251572984684281e-06, | |
| "loss": 0.8565, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.2165242165242165, | |
| "grad_norm": 1.0199048543960996, | |
| "learning_rate": 4.229072135506384e-06, | |
| "loss": 0.8634, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.8699841121610784, | |
| "learning_rate": 4.206587267718743e-06, | |
| "loss": 0.8704, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.2279202279202277, | |
| "grad_norm": 0.9870860597778771, | |
| "learning_rate": 4.18411884743429e-06, | |
| "loss": 0.9155, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.2336182336182335, | |
| "grad_norm": 0.9765675083733482, | |
| "learning_rate": 4.161667340424996e-06, | |
| "loss": 0.9111, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 1.0450993205368777, | |
| "learning_rate": 4.139233212112221e-06, | |
| "loss": 0.8791, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.245014245014245, | |
| "grad_norm": 1.1146726034384589, | |
| "learning_rate": 4.116816927557063e-06, | |
| "loss": 0.8808, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.2507122507122506, | |
| "grad_norm": 0.9072001670881498, | |
| "learning_rate": 4.094418951450721e-06, | |
| "loss": 0.855, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.928713607803712, | |
| "learning_rate": 4.072039748104856e-06, | |
| "loss": 0.8895, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.262108262108262, | |
| "grad_norm": 0.9633556898613354, | |
| "learning_rate": 4.0496797814419655e-06, | |
| "loss": 0.8809, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.267806267806268, | |
| "grad_norm": 0.8844497867372285, | |
| "learning_rate": 4.0273395149857705e-06, | |
| "loss": 0.841, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 0.9239145256816056, | |
| "learning_rate": 4.0050194118516095e-06, | |
| "loss": 0.8251, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.2792022792022792, | |
| "grad_norm": 1.1068686883079584, | |
| "learning_rate": 3.982719934736832e-06, | |
| "loss": 0.8515, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.284900284900285, | |
| "grad_norm": 1.178223126387429, | |
| "learning_rate": 3.960441545911205e-06, | |
| "loss": 0.886, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 0.8243442773624833, | |
| "learning_rate": 3.9381847072073346e-06, | |
| "loss": 0.8073, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 0.8877251522703663, | |
| "learning_rate": 3.915949880011096e-06, | |
| "loss": 0.8376, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.301994301994302, | |
| "grad_norm": 1.1086289853786166, | |
| "learning_rate": 3.893737525252063e-06, | |
| "loss": 0.835, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.9736495968403257, | |
| "learning_rate": 3.871548103393947e-06, | |
| "loss": 0.8366, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.3133903133903133, | |
| "grad_norm": 0.883727910369667, | |
| "learning_rate": 3.849382074425069e-06, | |
| "loss": 0.8788, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.319088319088319, | |
| "grad_norm": 0.9302042209091447, | |
| "learning_rate": 3.827239897848805e-06, | |
| "loss": 0.8105, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 0.9816375724049557, | |
| "learning_rate": 3.805122032674077e-06, | |
| "loss": 0.8801, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.3304843304843303, | |
| "grad_norm": 0.9068093342113286, | |
| "learning_rate": 3.7830289374058214e-06, | |
| "loss": 0.8926, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.336182336182336, | |
| "grad_norm": 0.970100166469761, | |
| "learning_rate": 3.7609610700355014e-06, | |
| "loss": 0.8172, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 0.8283355970207111, | |
| "learning_rate": 3.7389188880315962e-06, | |
| "loss": 0.8541, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.347578347578348, | |
| "grad_norm": 0.836387825954222, | |
| "learning_rate": 3.7169028483301333e-06, | |
| "loss": 0.8566, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.353276353276353, | |
| "grad_norm": 0.9704274187846976, | |
| "learning_rate": 3.6949134073251993e-06, | |
| "loss": 0.856, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 0.8667279540573334, | |
| "learning_rate": 3.6729510208594954e-06, | |
| "loss": 0.896, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.364672364672365, | |
| "grad_norm": 0.9194321407732738, | |
| "learning_rate": 3.6510161442148783e-06, | |
| "loss": 0.8993, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.3703703703703702, | |
| "grad_norm": 0.8956254209520699, | |
| "learning_rate": 3.6291092321029244e-06, | |
| "loss": 0.871, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 0.8944268521885398, | |
| "learning_rate": 3.6072307386554983e-06, | |
| "loss": 0.8958, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.381766381766382, | |
| "grad_norm": 0.8881931841978906, | |
| "learning_rate": 3.58538111741535e-06, | |
| "loss": 0.8718, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.3874643874643873, | |
| "grad_norm": 0.8513595068343849, | |
| "learning_rate": 3.5635608213267063e-06, | |
| "loss": 0.8484, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 0.9552616565495209, | |
| "learning_rate": 3.5417703027258752e-06, | |
| "loss": 0.8576, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.398860398860399, | |
| "grad_norm": 0.884306660742374, | |
| "learning_rate": 3.5200100133318836e-06, | |
| "loss": 0.8623, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.4045584045584047, | |
| "grad_norm": 0.8217549127604973, | |
| "learning_rate": 3.4982804042370977e-06, | |
| "loss": 0.8789, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.9177953454550434, | |
| "learning_rate": 3.476581925897885e-06, | |
| "loss": 0.8761, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.415954415954416, | |
| "grad_norm": 0.9191232531329524, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.8381, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.421652421652422, | |
| "grad_norm": 0.8942193186940697, | |
| "learning_rate": 3.4332801600755895e-06, | |
| "loss": 0.9022, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 1.2155759035608542, | |
| "learning_rate": 3.4116777702412374e-06, | |
| "loss": 0.8673, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.433048433048433, | |
| "grad_norm": 0.928253119658496, | |
| "learning_rate": 3.39010830644131e-06, | |
| "loss": 0.8412, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.438746438746439, | |
| "grad_norm": 0.8976369958824371, | |
| "learning_rate": 3.3685722158123435e-06, | |
| "loss": 0.8572, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.9342007055562026, | |
| "learning_rate": 3.3470699447990527e-06, | |
| "loss": 0.8389, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.45014245014245, | |
| "grad_norm": 0.9368785720862421, | |
| "learning_rate": 3.3256019391450696e-06, | |
| "loss": 0.8447, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.455840455840456, | |
| "grad_norm": 0.8602147398886509, | |
| "learning_rate": 3.3041686438836984e-06, | |
| "loss": 0.8314, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.7971529130684335, | |
| "learning_rate": 3.2827705033286937e-06, | |
| "loss": 0.8075, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.467236467236467, | |
| "grad_norm": 0.9022354930189497, | |
| "learning_rate": 3.261407961065056e-06, | |
| "loss": 0.864, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.472934472934473, | |
| "grad_norm": 0.8412103377280404, | |
| "learning_rate": 3.2400814599398283e-06, | |
| "loss": 0.825, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 0.963324698161768, | |
| "learning_rate": 3.2187914420529176e-06, | |
| "loss": 0.8245, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.484330484330484, | |
| "grad_norm": 0.8974616882015672, | |
| "learning_rate": 3.197538348747927e-06, | |
| "loss": 0.8574, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.49002849002849, | |
| "grad_norm": 0.8375456208735425, | |
| "learning_rate": 3.176322620603018e-06, | |
| "loss": 0.8567, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 0.8637885686817552, | |
| "learning_rate": 3.1551446974217643e-06, | |
| "loss": 0.8348, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.5014245014245016, | |
| "grad_norm": 0.8964567431940926, | |
| "learning_rate": 3.1340050182240438e-06, | |
| "loss": 0.8614, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.5071225071225074, | |
| "grad_norm": 1.0153388506539311, | |
| "learning_rate": 3.1129040212369286e-06, | |
| "loss": 0.8288, | |
| "step": 440 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 700, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 88, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.6500642935537664e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |