| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 563148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026635982015384943, | |
| "grad_norm": 0.7201167941093445, | |
| "learning_rate": 0.0001996, | |
| "loss": 9.4233, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005327196403076989, | |
| "grad_norm": 0.15531601011753082, | |
| "learning_rate": 0.0003996, | |
| "loss": 7.4925, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.007990794604615483, | |
| "grad_norm": 0.2483946532011032, | |
| "learning_rate": 0.0005996, | |
| "loss": 7.4229, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.010654392806153977, | |
| "grad_norm": 0.5883714556694031, | |
| "learning_rate": 0.0007996, | |
| "loss": 7.2323, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.013317991007692471, | |
| "grad_norm": 0.7867951989173889, | |
| "learning_rate": 0.0009996, | |
| "loss": 7.0605, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.015981589209230967, | |
| "grad_norm": 0.8444465398788452, | |
| "learning_rate": 0.0009991117421269675, | |
| "loss": 6.9306, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01864518741076946, | |
| "grad_norm": 0.6867188215255737, | |
| "learning_rate": 0.00099821991695324, | |
| "loss": 6.8518, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.021308785612307955, | |
| "grad_norm": 0.5377506017684937, | |
| "learning_rate": 0.0009973280917795124, | |
| "loss": 6.7872, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02397238381384645, | |
| "grad_norm": 0.6717762351036072, | |
| "learning_rate": 0.0009964362666057848, | |
| "loss": 6.7506, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.026635982015384942, | |
| "grad_norm": 1.001440167427063, | |
| "learning_rate": 0.0009955444414320573, | |
| "loss": 6.7159, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.029299580216923436, | |
| "grad_norm": 0.5917439460754395, | |
| "learning_rate": 0.0009946526162583297, | |
| "loss": 6.6817, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.031963178418461934, | |
| "grad_norm": 0.6403864026069641, | |
| "learning_rate": 0.0009937607910846021, | |
| "loss": 6.6561, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.034626776620000424, | |
| "grad_norm": 0.6477270126342773, | |
| "learning_rate": 0.0009928689659108746, | |
| "loss": 6.626, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03729037482153892, | |
| "grad_norm": 0.8317912817001343, | |
| "learning_rate": 0.0009919789243874944, | |
| "loss": 6.6155, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.03995397302307741, | |
| "grad_norm": 0.81658536195755, | |
| "learning_rate": 0.0009910870992137668, | |
| "loss": 6.5983, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.04261757122461591, | |
| "grad_norm": 0.8080710768699646, | |
| "learning_rate": 0.0009901952740400395, | |
| "loss": 6.5712, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.045281169426154406, | |
| "grad_norm": 0.7330273985862732, | |
| "learning_rate": 0.000989303448866312, | |
| "loss": 6.5671, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0479447676276929, | |
| "grad_norm": 0.5048246383666992, | |
| "learning_rate": 0.0009884134073429318, | |
| "loss": 6.5566, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.050608365829231394, | |
| "grad_norm": 0.60006183385849, | |
| "learning_rate": 0.0009875215821692042, | |
| "loss": 6.5299, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.053271964030769885, | |
| "grad_norm": 0.7553561329841614, | |
| "learning_rate": 0.0009866297569954767, | |
| "loss": 6.4984, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05593556223230838, | |
| "grad_norm": 0.6969451904296875, | |
| "learning_rate": 0.000985737931821749, | |
| "loss": 6.4697, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.05859916043384687, | |
| "grad_norm": 0.8137800097465515, | |
| "learning_rate": 0.0009848461066480215, | |
| "loss": 6.4535, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06126275863538537, | |
| "grad_norm": 0.6285300850868225, | |
| "learning_rate": 0.000983954281474294, | |
| "loss": 6.4259, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.06392635683692387, | |
| "grad_norm": 0.6301620006561279, | |
| "learning_rate": 0.0009830624563005664, | |
| "loss": 6.4174, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.06658995503846236, | |
| "grad_norm": 0.49541255831718445, | |
| "learning_rate": 0.0009821706311268388, | |
| "loss": 6.4134, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.06925355324000085, | |
| "grad_norm": 0.8492177128791809, | |
| "learning_rate": 0.000981280589603459, | |
| "loss": 6.394, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07191715144153935, | |
| "grad_norm": 0.6284229755401611, | |
| "learning_rate": 0.0009803887644297313, | |
| "loss": 6.3861, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.07458074964307784, | |
| "grad_norm": 0.7854110598564148, | |
| "learning_rate": 0.0009794969392560038, | |
| "loss": 6.3795, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.07724434784461634, | |
| "grad_norm": 0.6952440738677979, | |
| "learning_rate": 0.0009786051140822762, | |
| "loss": 6.3679, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.07990794604615482, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0009777132889085486, | |
| "loss": 6.363, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08257154424769332, | |
| "grad_norm": 0.6554950475692749, | |
| "learning_rate": 0.0009768232473851685, | |
| "loss": 6.3597, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.08523514244923182, | |
| "grad_norm": 0.6918802261352539, | |
| "learning_rate": 0.000975931422211441, | |
| "loss": 6.3536, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.08789874065077032, | |
| "grad_norm": 0.749622642993927, | |
| "learning_rate": 0.0009750395970377135, | |
| "loss": 6.3438, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.09056233885230881, | |
| "grad_norm": 0.7492349743843079, | |
| "learning_rate": 0.000974147771863986, | |
| "loss": 6.3332, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0932259370538473, | |
| "grad_norm": 0.6446586847305298, | |
| "learning_rate": 0.000973257730340606, | |
| "loss": 6.3241, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.0958895352553858, | |
| "grad_norm": 0.8464730978012085, | |
| "learning_rate": 0.0009723659051668784, | |
| "loss": 6.3194, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.09855313345692429, | |
| "grad_norm": 0.6281186938285828, | |
| "learning_rate": 0.0009714740799931508, | |
| "loss": 6.309, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.10121673165846279, | |
| "grad_norm": 0.8605656027793884, | |
| "learning_rate": 0.0009705822548194233, | |
| "loss": 6.2991, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.10388032986000127, | |
| "grad_norm": 0.7788176536560059, | |
| "learning_rate": 0.0009696922132960431, | |
| "loss": 6.3005, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.10654392806153977, | |
| "grad_norm": 0.6075990200042725, | |
| "learning_rate": 0.0009688003881223157, | |
| "loss": 6.2843, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.10920752626307827, | |
| "grad_norm": 0.7577124238014221, | |
| "learning_rate": 0.0009679085629485881, | |
| "loss": 6.2759, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.11187112446461676, | |
| "grad_norm": 0.8228011727333069, | |
| "learning_rate": 0.0009670167377748605, | |
| "loss": 6.2599, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.11453472266615526, | |
| "grad_norm": 0.7447388172149658, | |
| "learning_rate": 0.0009661266962514804, | |
| "loss": 6.2513, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.11719832086769374, | |
| "grad_norm": 0.9003899097442627, | |
| "learning_rate": 0.0009652348710777528, | |
| "loss": 6.2279, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.11986191906923224, | |
| "grad_norm": 1.0574650764465332, | |
| "learning_rate": 0.0009643430459040254, | |
| "loss": 6.2027, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.12252551727077074, | |
| "grad_norm": 0.9610631465911865, | |
| "learning_rate": 0.0009634512207302978, | |
| "loss": 6.1742, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.12518911547230924, | |
| "grad_norm": 1.1535989046096802, | |
| "learning_rate": 0.0009625611792069178, | |
| "loss": 6.1294, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.12785271367384773, | |
| "grad_norm": 1.1773658990859985, | |
| "learning_rate": 0.0009616711376835376, | |
| "loss": 6.097, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.13051631187538623, | |
| "grad_norm": 1.2815760374069214, | |
| "learning_rate": 0.0009607793125098101, | |
| "loss": 6.0634, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.13317991007692473, | |
| "grad_norm": 1.4569323062896729, | |
| "learning_rate": 0.0009598874873360826, | |
| "loss": 6.0457, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.1358435082784632, | |
| "grad_norm": 1.506204605102539, | |
| "learning_rate": 0.000958995662162355, | |
| "loss": 6.0186, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.1385071064800017, | |
| "grad_norm": 1.3472563028335571, | |
| "learning_rate": 0.0009581056206389749, | |
| "loss": 6.0086, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.1411707046815402, | |
| "grad_norm": 1.4809520244598389, | |
| "learning_rate": 0.0009572137954652473, | |
| "loss": 5.9898, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.1438343028830787, | |
| "grad_norm": 1.5233690738677979, | |
| "learning_rate": 0.0009563219702915198, | |
| "loss": 5.9781, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.1464979010846172, | |
| "grad_norm": 1.5101710557937622, | |
| "learning_rate": 0.0009554301451177923, | |
| "loss": 5.9561, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.14916149928615569, | |
| "grad_norm": 1.612731695175171, | |
| "learning_rate": 0.0009545401035944123, | |
| "loss": 5.9526, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.15182509748769418, | |
| "grad_norm": 1.7018260955810547, | |
| "learning_rate": 0.0009536482784206847, | |
| "loss": 5.9338, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.15448869568923268, | |
| "grad_norm": 1.7604913711547852, | |
| "learning_rate": 0.0009527564532469571, | |
| "loss": 5.9321, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.15715229389077118, | |
| "grad_norm": 1.721969485282898, | |
| "learning_rate": 0.0009518646280732296, | |
| "loss": 5.9175, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.15981589209230965, | |
| "grad_norm": 1.5823644399642944, | |
| "learning_rate": 0.0009509745865498494, | |
| "loss": 5.9153, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.16247949029384814, | |
| "grad_norm": 1.7854641675949097, | |
| "learning_rate": 0.000950082761376122, | |
| "loss": 5.9072, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.16514308849538664, | |
| "grad_norm": 1.7369080781936646, | |
| "learning_rate": 0.0009491909362023944, | |
| "loss": 5.9029, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.16780668669692514, | |
| "grad_norm": 1.674492597579956, | |
| "learning_rate": 0.0009482991110286668, | |
| "loss": 5.8841, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.17047028489846364, | |
| "grad_norm": 1.7058457136154175, | |
| "learning_rate": 0.0009474072858549393, | |
| "loss": 5.8883, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.17313388310000213, | |
| "grad_norm": 1.5853819847106934, | |
| "learning_rate": 0.0009465172443315591, | |
| "loss": 5.8775, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.17579748130154063, | |
| "grad_norm": 1.7525198459625244, | |
| "learning_rate": 0.0009456254191578317, | |
| "loss": 5.8717, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.17846107950307913, | |
| "grad_norm": 1.9233468770980835, | |
| "learning_rate": 0.0009447335939841041, | |
| "loss": 5.8608, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.18112467770461763, | |
| "grad_norm": 1.637522578239441, | |
| "learning_rate": 0.0009438417688103765, | |
| "loss": 5.8658, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.1837882759061561, | |
| "grad_norm": 1.8892813920974731, | |
| "learning_rate": 0.000942949943636649, | |
| "loss": 5.8523, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.1864518741076946, | |
| "grad_norm": 1.9510762691497803, | |
| "learning_rate": 0.0009420599021132689, | |
| "loss": 5.8404, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1891154723092331, | |
| "grad_norm": 1.7907196283340454, | |
| "learning_rate": 0.0009411680769395415, | |
| "loss": 5.8396, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.1917790705107716, | |
| "grad_norm": 1.8805279731750488, | |
| "learning_rate": 0.0009402762517658139, | |
| "loss": 5.8293, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.19444266871231008, | |
| "grad_norm": 1.7272233963012695, | |
| "learning_rate": 0.0009393844265920863, | |
| "loss": 5.8268, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.19710626691384858, | |
| "grad_norm": 2.035203695297241, | |
| "learning_rate": 0.0009384926014183588, | |
| "loss": 5.8209, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.19976986511538708, | |
| "grad_norm": 1.8728936910629272, | |
| "learning_rate": 0.0009376007762446312, | |
| "loss": 5.8165, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.20243346331692558, | |
| "grad_norm": 1.9231390953063965, | |
| "learning_rate": 0.0009367089510709037, | |
| "loss": 5.8149, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.20509706151846407, | |
| "grad_norm": 1.7793642282485962, | |
| "learning_rate": 0.0009358171258971762, | |
| "loss": 5.8132, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.20776065972000254, | |
| "grad_norm": 1.7759062051773071, | |
| "learning_rate": 0.000934927084373796, | |
| "loss": 5.8065, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.21042425792154104, | |
| "grad_norm": 1.7528033256530762, | |
| "learning_rate": 0.0009340352592000685, | |
| "loss": 5.8023, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.21308785612307954, | |
| "grad_norm": 1.8702290058135986, | |
| "learning_rate": 0.0009331434340263409, | |
| "loss": 5.7909, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.21575145432461804, | |
| "grad_norm": 1.9332852363586426, | |
| "learning_rate": 0.000932253392502961, | |
| "loss": 5.7937, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.21841505252615653, | |
| "grad_norm": 1.8513240814208984, | |
| "learning_rate": 0.0009313615673292334, | |
| "loss": 5.7865, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.22107865072769503, | |
| "grad_norm": 1.8357592821121216, | |
| "learning_rate": 0.0009304697421555058, | |
| "loss": 5.7859, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.22374224892923353, | |
| "grad_norm": 1.7558057308197021, | |
| "learning_rate": 0.0009295779169817783, | |
| "loss": 5.7781, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.22640584713077203, | |
| "grad_norm": 1.7014683485031128, | |
| "learning_rate": 0.0009286860918080507, | |
| "loss": 5.7703, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.22906944533231052, | |
| "grad_norm": 1.8377306461334229, | |
| "learning_rate": 0.0009277942666343233, | |
| "loss": 5.7775, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.231733043533849, | |
| "grad_norm": 1.7670570611953735, | |
| "learning_rate": 0.0009269024414605957, | |
| "loss": 5.7606, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.2343966417353875, | |
| "grad_norm": 1.907322883605957, | |
| "learning_rate": 0.0009260106162868681, | |
| "loss": 5.7595, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.237060239936926, | |
| "grad_norm": 1.9192357063293457, | |
| "learning_rate": 0.000925120574763488, | |
| "loss": 5.7574, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.23972383813846448, | |
| "grad_norm": 1.801256775856018, | |
| "learning_rate": 0.0009242287495897604, | |
| "loss": 5.7623, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.24238743634000298, | |
| "grad_norm": 1.7864599227905273, | |
| "learning_rate": 0.000923336924416033, | |
| "loss": 5.7464, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.24505103454154148, | |
| "grad_norm": 2.0881760120391846, | |
| "learning_rate": 0.0009224450992423054, | |
| "loss": 5.7492, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.24771463274307998, | |
| "grad_norm": 2.0729496479034424, | |
| "learning_rate": 0.0009215550577189252, | |
| "loss": 5.7464, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.2503782309446185, | |
| "grad_norm": 1.807739496231079, | |
| "learning_rate": 0.0009206632325451977, | |
| "loss": 5.7391, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.25304182914615697, | |
| "grad_norm": 1.7898356914520264, | |
| "learning_rate": 0.0009197731910218176, | |
| "loss": 5.7399, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.25570542734769547, | |
| "grad_norm": 1.6668163537979126, | |
| "learning_rate": 0.0009188813658480901, | |
| "loss": 5.7316, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.25836902554923397, | |
| "grad_norm": 1.743788242340088, | |
| "learning_rate": 0.0009179895406743626, | |
| "loss": 5.7251, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.26103262375077246, | |
| "grad_norm": 1.7427009344100952, | |
| "learning_rate": 0.000917097715500635, | |
| "loss": 5.7231, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.26369622195231096, | |
| "grad_norm": 1.8911422491073608, | |
| "learning_rate": 0.0009162058903269075, | |
| "loss": 5.7272, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.26635982015384946, | |
| "grad_norm": 1.7783831357955933, | |
| "learning_rate": 0.0009153140651531799, | |
| "loss": 5.7193, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.2690234183553879, | |
| "grad_norm": 1.75882089138031, | |
| "learning_rate": 0.0009144222399794523, | |
| "loss": 5.7233, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.2716870165569264, | |
| "grad_norm": 1.8454984426498413, | |
| "learning_rate": 0.0009135304148057249, | |
| "loss": 5.7163, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.2743506147584649, | |
| "grad_norm": 1.8908592462539673, | |
| "learning_rate": 0.0009126403732823447, | |
| "loss": 5.7175, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.2770142129600034, | |
| "grad_norm": 1.6938859224319458, | |
| "learning_rate": 0.0009117485481086172, | |
| "loss": 5.7113, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.2796778111615419, | |
| "grad_norm": 1.8087745904922485, | |
| "learning_rate": 0.0009108567229348896, | |
| "loss": 5.7104, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.2823414093630804, | |
| "grad_norm": 1.9441509246826172, | |
| "learning_rate": 0.000909964897761162, | |
| "loss": 5.7006, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.2850050075646189, | |
| "grad_norm": 2.016289710998535, | |
| "learning_rate": 0.000909074856237782, | |
| "loss": 5.7084, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.2876686057661574, | |
| "grad_norm": 1.7924542427062988, | |
| "learning_rate": 0.0009081830310640544, | |
| "loss": 5.6967, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.2903322039676959, | |
| "grad_norm": 1.8578925132751465, | |
| "learning_rate": 0.0009072912058903269, | |
| "loss": 5.7058, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.2929958021692344, | |
| "grad_norm": 1.8592642545700073, | |
| "learning_rate": 0.0009063993807165993, | |
| "loss": 5.699, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.2956594003707729, | |
| "grad_norm": 1.726891040802002, | |
| "learning_rate": 0.0009055075555428717, | |
| "loss": 5.6873, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.29832299857231137, | |
| "grad_norm": 1.8885732889175415, | |
| "learning_rate": 0.0009046175140194918, | |
| "loss": 5.6859, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.30098659677384987, | |
| "grad_norm": 1.6777235269546509, | |
| "learning_rate": 0.0009037256888457643, | |
| "loss": 5.6843, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.30365019497538837, | |
| "grad_norm": 1.824777364730835, | |
| "learning_rate": 0.0009028338636720367, | |
| "loss": 5.6865, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.30631379317692686, | |
| "grad_norm": 1.6151602268218994, | |
| "learning_rate": 0.0009019420384983091, | |
| "loss": 5.6864, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.30897739137846536, | |
| "grad_norm": 1.7518750429153442, | |
| "learning_rate": 0.0009010502133245816, | |
| "loss": 5.6835, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.31164098958000386, | |
| "grad_norm": 1.9652341604232788, | |
| "learning_rate": 0.0009001583881508541, | |
| "loss": 5.6778, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.31430458778154235, | |
| "grad_norm": 1.8396164178848267, | |
| "learning_rate": 0.0008992665629771265, | |
| "loss": 5.6805, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.3169681859830808, | |
| "grad_norm": 1.7397726774215698, | |
| "learning_rate": 0.000898374737803399, | |
| "loss": 5.6809, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.3196317841846193, | |
| "grad_norm": 1.6550874710083008, | |
| "learning_rate": 0.0008974846962800188, | |
| "loss": 5.6713, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3222953823861578, | |
| "grad_norm": 1.7428010702133179, | |
| "learning_rate": 0.0008965928711062913, | |
| "loss": 5.6777, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.3249589805876963, | |
| "grad_norm": 1.7465174198150635, | |
| "learning_rate": 0.0008957028295829112, | |
| "loss": 5.6668, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.3276225787892348, | |
| "grad_norm": 1.719190239906311, | |
| "learning_rate": 0.0008948110044091838, | |
| "loss": 5.6736, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.3302861769907733, | |
| "grad_norm": 1.6879175901412964, | |
| "learning_rate": 0.0008939191792354562, | |
| "loss": 5.6585, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.3329497751923118, | |
| "grad_norm": 1.6741931438446045, | |
| "learning_rate": 0.0008930273540617286, | |
| "loss": 5.6584, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.3356133733938503, | |
| "grad_norm": 1.8733186721801758, | |
| "learning_rate": 0.0008921355288880011, | |
| "loss": 5.6655, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.3382769715953888, | |
| "grad_norm": 1.8366929292678833, | |
| "learning_rate": 0.0008912454873646209, | |
| "loss": 5.6551, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.3409405697969273, | |
| "grad_norm": 1.7783548831939697, | |
| "learning_rate": 0.0008903536621908935, | |
| "loss": 5.6598, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.34360416799846577, | |
| "grad_norm": 1.739394187927246, | |
| "learning_rate": 0.0008894618370171659, | |
| "loss": 5.6568, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.34626776620000427, | |
| "grad_norm": 1.706986427307129, | |
| "learning_rate": 0.0008885700118434383, | |
| "loss": 5.6577, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.34893136440154277, | |
| "grad_norm": 1.7595592737197876, | |
| "learning_rate": 0.0008876781866697108, | |
| "loss": 5.6504, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.35159496260308126, | |
| "grad_norm": 1.7445604801177979, | |
| "learning_rate": 0.0008867863614959832, | |
| "loss": 5.6457, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.35425856080461976, | |
| "grad_norm": 1.7039164304733276, | |
| "learning_rate": 0.0008858945363222557, | |
| "loss": 5.652, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.35692215900615826, | |
| "grad_norm": 1.7117230892181396, | |
| "learning_rate": 0.0008850027111485282, | |
| "loss": 5.6456, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.35958575720769675, | |
| "grad_norm": 1.8759076595306396, | |
| "learning_rate": 0.000884112669625148, | |
| "loss": 5.6504, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.36224935540923525, | |
| "grad_norm": 1.5524253845214844, | |
| "learning_rate": 0.0008832208444514205, | |
| "loss": 5.6426, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.36491295361077375, | |
| "grad_norm": 1.648575782775879, | |
| "learning_rate": 0.0008823290192776929, | |
| "loss": 5.6401, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.3675765518123122, | |
| "grad_norm": 1.6062759160995483, | |
| "learning_rate": 0.0008814371941039654, | |
| "loss": 5.6466, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.3702401500138507, | |
| "grad_norm": 1.5237386226654053, | |
| "learning_rate": 0.0008805471525805854, | |
| "loss": 5.6381, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.3729037482153892, | |
| "grad_norm": 1.7291427850723267, | |
| "learning_rate": 0.0008796553274068578, | |
| "loss": 5.6337, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.3755673464169277, | |
| "grad_norm": 1.875213623046875, | |
| "learning_rate": 0.0008787635022331303, | |
| "loss": 5.6356, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.3782309446184662, | |
| "grad_norm": 1.8453514575958252, | |
| "learning_rate": 0.0008778716770594027, | |
| "loss": 5.6348, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.3808945428200047, | |
| "grad_norm": 1.725234866142273, | |
| "learning_rate": 0.0008769816355360227, | |
| "loss": 5.6318, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.3835581410215432, | |
| "grad_norm": 1.7739455699920654, | |
| "learning_rate": 0.0008760898103622951, | |
| "loss": 5.6296, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.3862217392230817, | |
| "grad_norm": 1.683827519416809, | |
| "learning_rate": 0.0008751979851885675, | |
| "loss": 5.6357, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.38888533742462017, | |
| "grad_norm": 1.5576590299606323, | |
| "learning_rate": 0.00087430616001484, | |
| "loss": 5.63, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.39154893562615867, | |
| "grad_norm": 1.666030764579773, | |
| "learning_rate": 0.0008734161184914598, | |
| "loss": 5.6178, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.39421253382769716, | |
| "grad_norm": 1.618916392326355, | |
| "learning_rate": 0.0008725242933177324, | |
| "loss": 5.6273, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.39687613202923566, | |
| "grad_norm": 1.69428551197052, | |
| "learning_rate": 0.0008716324681440048, | |
| "loss": 5.6188, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.39953973023077416, | |
| "grad_norm": 1.8516380786895752, | |
| "learning_rate": 0.0008707406429702772, | |
| "loss": 5.6235, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.40220332843231266, | |
| "grad_norm": 1.505953311920166, | |
| "learning_rate": 0.0008698506014468972, | |
| "loss": 5.6175, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.40486692663385115, | |
| "grad_norm": 1.5639010667800903, | |
| "learning_rate": 0.0008689587762731696, | |
| "loss": 5.6213, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.40753052483538965, | |
| "grad_norm": 1.7431727647781372, | |
| "learning_rate": 0.0008680669510994421, | |
| "loss": 5.6198, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.41019412303692815, | |
| "grad_norm": 1.676757574081421, | |
| "learning_rate": 0.0008671751259257146, | |
| "loss": 5.6252, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.41285772123846665, | |
| "grad_norm": 1.6216061115264893, | |
| "learning_rate": 0.0008662850844023345, | |
| "loss": 5.6211, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.4155213194400051, | |
| "grad_norm": 1.6766453981399536, | |
| "learning_rate": 0.0008653932592286069, | |
| "loss": 5.62, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.4181849176415436, | |
| "grad_norm": 1.6790215969085693, | |
| "learning_rate": 0.0008645014340548793, | |
| "loss": 5.6093, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.4208485158430821, | |
| "grad_norm": 1.8037434816360474, | |
| "learning_rate": 0.0008636096088811518, | |
| "loss": 5.6085, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.4235121140446206, | |
| "grad_norm": 1.6324502229690552, | |
| "learning_rate": 0.0008627195673577717, | |
| "loss": 5.6031, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.4261757122461591, | |
| "grad_norm": 1.6987981796264648, | |
| "learning_rate": 0.0008618277421840443, | |
| "loss": 5.6116, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.4288393104476976, | |
| "grad_norm": 1.6692321300506592, | |
| "learning_rate": 0.0008609359170103167, | |
| "loss": 5.6062, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.43150290864923607, | |
| "grad_norm": 1.6387773752212524, | |
| "learning_rate": 0.0008600440918365891, | |
| "loss": 5.6087, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.43416650685077457, | |
| "grad_norm": 1.792861819267273, | |
| "learning_rate": 0.000859154050313209, | |
| "loss": 5.608, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.43683010505231307, | |
| "grad_norm": 1.676076889038086, | |
| "learning_rate": 0.0008582622251394815, | |
| "loss": 5.6056, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.43949370325385156, | |
| "grad_norm": 1.772159218788147, | |
| "learning_rate": 0.000857370399965754, | |
| "loss": 5.6015, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.44215730145539006, | |
| "grad_norm": 1.7022145986557007, | |
| "learning_rate": 0.0008564785747920264, | |
| "loss": 5.6056, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.44482089965692856, | |
| "grad_norm": 1.6428086757659912, | |
| "learning_rate": 0.0008555885332686463, | |
| "loss": 5.596, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.44748449785846706, | |
| "grad_norm": 1.6144286394119263, | |
| "learning_rate": 0.0008546967080949187, | |
| "loss": 5.5974, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.45014809606000555, | |
| "grad_norm": 1.5918573141098022, | |
| "learning_rate": 0.0008538048829211912, | |
| "loss": 5.604, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.45281169426154405, | |
| "grad_norm": 1.7871578931808472, | |
| "learning_rate": 0.0008529130577474637, | |
| "loss": 5.5951, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.45547529246308255, | |
| "grad_norm": 1.6631501913070679, | |
| "learning_rate": 0.0008520230162240836, | |
| "loss": 5.6014, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.45813889066462105, | |
| "grad_norm": 1.6243520975112915, | |
| "learning_rate": 0.0008511311910503561, | |
| "loss": 5.5942, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.46080248886615954, | |
| "grad_norm": 1.5686520338058472, | |
| "learning_rate": 0.0008502393658766285, | |
| "loss": 5.5981, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.463466087067698, | |
| "grad_norm": 1.7691351175308228, | |
| "learning_rate": 0.0008493475407029009, | |
| "loss": 5.5984, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.4661296852692365, | |
| "grad_norm": 1.6885465383529663, | |
| "learning_rate": 0.0008484574991795209, | |
| "loss": 5.5851, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.468793283470775, | |
| "grad_norm": 1.6488664150238037, | |
| "learning_rate": 0.0008475656740057933, | |
| "loss": 5.5831, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.4714568816723135, | |
| "grad_norm": 1.5736653804779053, | |
| "learning_rate": 0.0008466738488320658, | |
| "loss": 5.582, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.474120479873852, | |
| "grad_norm": 1.7857962846755981, | |
| "learning_rate": 0.0008457820236583382, | |
| "loss": 5.5901, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.47678407807539047, | |
| "grad_norm": 1.7936720848083496, | |
| "learning_rate": 0.0008448919821349581, | |
| "loss": 5.5822, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.47944767627692897, | |
| "grad_norm": 1.546919345855713, | |
| "learning_rate": 0.0008440001569612306, | |
| "loss": 5.581, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.48211127447846747, | |
| "grad_norm": 1.778827428817749, | |
| "learning_rate": 0.000843108331787503, | |
| "loss": 5.5922, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.48477487268000596, | |
| "grad_norm": 1.495205044746399, | |
| "learning_rate": 0.0008422165066137755, | |
| "loss": 5.5821, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.48743847088154446, | |
| "grad_norm": 1.6151823997497559, | |
| "learning_rate": 0.0008413264650903954, | |
| "loss": 5.5801, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.49010206908308296, | |
| "grad_norm": 1.7652384042739868, | |
| "learning_rate": 0.0008404346399166679, | |
| "loss": 5.5785, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.49276566728462146, | |
| "grad_norm": 1.7062280178070068, | |
| "learning_rate": 0.0008395428147429404, | |
| "loss": 5.5784, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.49542926548615995, | |
| "grad_norm": 1.5986762046813965, | |
| "learning_rate": 0.0008386509895692128, | |
| "loss": 5.5814, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.49809286368769845, | |
| "grad_norm": 1.672861933708191, | |
| "learning_rate": 0.0008377609480458327, | |
| "loss": 5.5743, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.500756461889237, | |
| "grad_norm": 1.8104331493377686, | |
| "learning_rate": 0.0008368691228721051, | |
| "loss": 5.5709, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.5034200600907754, | |
| "grad_norm": 1.8253047466278076, | |
| "learning_rate": 0.0008359772976983776, | |
| "loss": 5.5642, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.5060836582923139, | |
| "grad_norm": 1.604465126991272, | |
| "learning_rate": 0.0008350854725246501, | |
| "loss": 5.5691, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.5087472564938524, | |
| "grad_norm": 1.7985742092132568, | |
| "learning_rate": 0.00083419543100127, | |
| "loss": 5.5611, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.5114108546953909, | |
| "grad_norm": 1.652733325958252, | |
| "learning_rate": 0.0008333036058275424, | |
| "loss": 5.5577, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.5140744528969294, | |
| "grad_norm": 1.8247016668319702, | |
| "learning_rate": 0.0008324117806538148, | |
| "loss": 5.5557, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.5167380510984679, | |
| "grad_norm": 1.784303069114685, | |
| "learning_rate": 0.0008315199554800873, | |
| "loss": 5.5554, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.5194016493000064, | |
| "grad_norm": 1.705725073814392, | |
| "learning_rate": 0.0008306299139567072, | |
| "loss": 5.5545, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.5220652475015449, | |
| "grad_norm": 1.8760724067687988, | |
| "learning_rate": 0.0008297380887829798, | |
| "loss": 5.5512, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.5247288457030834, | |
| "grad_norm": 1.7412986755371094, | |
| "learning_rate": 0.0008288462636092522, | |
| "loss": 5.5522, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.5273924439046219, | |
| "grad_norm": 2.0051610469818115, | |
| "learning_rate": 0.0008279544384355246, | |
| "loss": 5.5403, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.5300560421061604, | |
| "grad_norm": 1.6867221593856812, | |
| "learning_rate": 0.0008270643969121445, | |
| "loss": 5.544, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.5327196403076989, | |
| "grad_norm": 1.838189721107483, | |
| "learning_rate": 0.0008261725717384169, | |
| "loss": 5.5396, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.5353832385092374, | |
| "grad_norm": 1.655271291732788, | |
| "learning_rate": 0.0008252807465646895, | |
| "loss": 5.5358, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.5380468367107758, | |
| "grad_norm": 1.8378669023513794, | |
| "learning_rate": 0.0008243889213909619, | |
| "loss": 5.5419, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.5407104349123143, | |
| "grad_norm": 1.7509022951126099, | |
| "learning_rate": 0.0008234988798675818, | |
| "loss": 5.523, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.5433740331138528, | |
| "grad_norm": 1.9558390378952026, | |
| "learning_rate": 0.0008226070546938542, | |
| "loss": 5.5322, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.5460376313153913, | |
| "grad_norm": 2.0113561153411865, | |
| "learning_rate": 0.0008217152295201266, | |
| "loss": 5.5303, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.5487012295169298, | |
| "grad_norm": 1.989725112915039, | |
| "learning_rate": 0.0008208234043463993, | |
| "loss": 5.5257, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.5513648277184683, | |
| "grad_norm": 1.702812671661377, | |
| "learning_rate": 0.0008199315791726717, | |
| "loss": 5.5327, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.5540284259200068, | |
| "grad_norm": 1.8519411087036133, | |
| "learning_rate": 0.0008190397539989441, | |
| "loss": 5.5272, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.5566920241215453, | |
| "grad_norm": 1.856350064277649, | |
| "learning_rate": 0.0008181479288252166, | |
| "loss": 5.5211, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.5593556223230838, | |
| "grad_norm": 1.7010074853897095, | |
| "learning_rate": 0.000817256103651489, | |
| "loss": 5.5287, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.5620192205246223, | |
| "grad_norm": 1.6479413509368896, | |
| "learning_rate": 0.000816366062128109, | |
| "loss": 5.5279, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.5646828187261608, | |
| "grad_norm": 1.9108966588974, | |
| "learning_rate": 0.0008154742369543814, | |
| "loss": 5.5203, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.5673464169276993, | |
| "grad_norm": 1.9142667055130005, | |
| "learning_rate": 0.0008145824117806538, | |
| "loss": 5.5189, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.5700100151292378, | |
| "grad_norm": 1.8495519161224365, | |
| "learning_rate": 0.0008136905866069263, | |
| "loss": 5.5196, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.5726736133307763, | |
| "grad_norm": 2.063087224960327, | |
| "learning_rate": 0.0008128005450835461, | |
| "loss": 5.5132, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.5753372115323148, | |
| "grad_norm": 2.0009357929229736, | |
| "learning_rate": 0.0008119087199098186, | |
| "loss": 5.5177, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.5780008097338533, | |
| "grad_norm": 2.0125739574432373, | |
| "learning_rate": 0.0008110168947360911, | |
| "loss": 5.5112, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.5806644079353918, | |
| "grad_norm": 1.8415509462356567, | |
| "learning_rate": 0.0008101250695623635, | |
| "loss": 5.509, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.5833280061369303, | |
| "grad_norm": 1.7688753604888916, | |
| "learning_rate": 0.0008092350280389835, | |
| "loss": 5.5032, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.5859916043384688, | |
| "grad_norm": 1.8354215621948242, | |
| "learning_rate": 0.000808343202865256, | |
| "loss": 5.5129, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.5886552025400072, | |
| "grad_norm": 2.036357879638672, | |
| "learning_rate": 0.0008074513776915284, | |
| "loss": 5.5043, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.5913188007415457, | |
| "grad_norm": 1.8382165431976318, | |
| "learning_rate": 0.0008065595525178009, | |
| "loss": 5.5065, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.5939823989430842, | |
| "grad_norm": 2.001885175704956, | |
| "learning_rate": 0.0008056695109944208, | |
| "loss": 5.507, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.5966459971446227, | |
| "grad_norm": 1.872819423675537, | |
| "learning_rate": 0.0008047776858206932, | |
| "loss": 5.5081, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.5993095953461612, | |
| "grad_norm": 1.8629109859466553, | |
| "learning_rate": 0.0008038858606469656, | |
| "loss": 5.5078, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.6019731935476997, | |
| "grad_norm": 2.0044994354248047, | |
| "learning_rate": 0.0008029940354732381, | |
| "loss": 5.498, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.6046367917492382, | |
| "grad_norm": 1.9607182741165161, | |
| "learning_rate": 0.000802103993949858, | |
| "loss": 5.5092, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.6073003899507767, | |
| "grad_norm": 1.9605486392974854, | |
| "learning_rate": 0.0008012121687761305, | |
| "loss": 5.5013, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.6099639881523152, | |
| "grad_norm": 1.999872088432312, | |
| "learning_rate": 0.0008003203436024029, | |
| "loss": 5.497, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.6126275863538537, | |
| "grad_norm": 1.7834984064102173, | |
| "learning_rate": 0.0007994285184286753, | |
| "loss": 5.5001, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.6152911845553922, | |
| "grad_norm": 1.9666252136230469, | |
| "learning_rate": 0.0007985384769052953, | |
| "loss": 5.5004, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.6179547827569307, | |
| "grad_norm": 1.810936450958252, | |
| "learning_rate": 0.0007976484353819152, | |
| "loss": 5.4934, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.6206183809584692, | |
| "grad_norm": 1.8183609247207642, | |
| "learning_rate": 0.0007967566102081877, | |
| "loss": 5.4999, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.6232819791600077, | |
| "grad_norm": 2.1452646255493164, | |
| "learning_rate": 0.0007958647850344601, | |
| "loss": 5.4937, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.6259455773615462, | |
| "grad_norm": 1.984305739402771, | |
| "learning_rate": 0.0007949729598607326, | |
| "loss": 5.494, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.6286091755630847, | |
| "grad_norm": 2.1507790088653564, | |
| "learning_rate": 0.000794081134687005, | |
| "loss": 5.4915, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.6312727737646232, | |
| "grad_norm": 1.821390151977539, | |
| "learning_rate": 0.0007931910931636249, | |
| "loss": 5.4948, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.6339363719661616, | |
| "grad_norm": 1.901696801185608, | |
| "learning_rate": 0.0007922992679898974, | |
| "loss": 5.4944, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.6365999701677001, | |
| "grad_norm": 2.214447259902954, | |
| "learning_rate": 0.0007914074428161698, | |
| "loss": 5.4901, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.6392635683692386, | |
| "grad_norm": 1.8764078617095947, | |
| "learning_rate": 0.0007905156176424423, | |
| "loss": 5.4837, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.6419271665707771, | |
| "grad_norm": 1.9411547183990479, | |
| "learning_rate": 0.0007896237924687147, | |
| "loss": 5.4889, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.6445907647723156, | |
| "grad_norm": 1.8323979377746582, | |
| "learning_rate": 0.0007887319672949871, | |
| "loss": 5.49, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.6472543629738541, | |
| "grad_norm": 1.8666421175003052, | |
| "learning_rate": 0.0007878401421212597, | |
| "loss": 5.4911, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.6499179611753926, | |
| "grad_norm": 2.0501484870910645, | |
| "learning_rate": 0.0007869483169475321, | |
| "loss": 5.4894, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.6525815593769311, | |
| "grad_norm": 1.8784074783325195, | |
| "learning_rate": 0.0007860600590744995, | |
| "loss": 5.4911, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.6552451575784696, | |
| "grad_norm": 1.9021259546279907, | |
| "learning_rate": 0.000785168233900772, | |
| "loss": 5.4844, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.6579087557800081, | |
| "grad_norm": 2.053755283355713, | |
| "learning_rate": 0.0007842764087270444, | |
| "loss": 5.4884, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.6605723539815466, | |
| "grad_norm": 1.9320204257965088, | |
| "learning_rate": 0.0007833845835533169, | |
| "loss": 5.4822, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.6632359521830851, | |
| "grad_norm": 1.793219804763794, | |
| "learning_rate": 0.0007824945420299368, | |
| "loss": 5.4834, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.6658995503846236, | |
| "grad_norm": 2.0100185871124268, | |
| "learning_rate": 0.0007816027168562092, | |
| "loss": 5.4872, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.6685631485861621, | |
| "grad_norm": 2.0543274879455566, | |
| "learning_rate": 0.0007807108916824816, | |
| "loss": 5.4826, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.6712267467877006, | |
| "grad_norm": 1.9622262716293335, | |
| "learning_rate": 0.0007798190665087542, | |
| "loss": 5.4809, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.673890344989239, | |
| "grad_norm": 1.918966293334961, | |
| "learning_rate": 0.0007789272413350267, | |
| "loss": 5.4823, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.6765539431907776, | |
| "grad_norm": 1.8516751527786255, | |
| "learning_rate": 0.0007780354161612992, | |
| "loss": 5.4786, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.679217541392316, | |
| "grad_norm": 1.8985280990600586, | |
| "learning_rate": 0.000777145374637919, | |
| "loss": 5.4762, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.6818811395938545, | |
| "grad_norm": 2.030210018157959, | |
| "learning_rate": 0.0007762535494641915, | |
| "loss": 5.4786, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.684544737795393, | |
| "grad_norm": 1.9270013570785522, | |
| "learning_rate": 0.0007753617242904639, | |
| "loss": 5.4801, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.6872083359969315, | |
| "grad_norm": 1.7799612283706665, | |
| "learning_rate": 0.0007744698991167364, | |
| "loss": 5.4715, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.68987193419847, | |
| "grad_norm": 2.1841835975646973, | |
| "learning_rate": 0.0007735780739430089, | |
| "loss": 5.4726, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.6925355324000085, | |
| "grad_norm": 1.970680594444275, | |
| "learning_rate": 0.0007726862487692813, | |
| "loss": 5.4751, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.695199130601547, | |
| "grad_norm": 2.1457014083862305, | |
| "learning_rate": 0.0007717944235955537, | |
| "loss": 5.4754, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.6978627288030855, | |
| "grad_norm": 1.8095160722732544, | |
| "learning_rate": 0.0007709025984218262, | |
| "loss": 5.4723, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.700526327004624, | |
| "grad_norm": 1.8374313116073608, | |
| "learning_rate": 0.000770012556898446, | |
| "loss": 5.4774, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.7031899252061625, | |
| "grad_norm": 1.8603581190109253, | |
| "learning_rate": 0.0007691207317247186, | |
| "loss": 5.477, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.705853523407701, | |
| "grad_norm": 1.9838221073150635, | |
| "learning_rate": 0.0007682306902013385, | |
| "loss": 5.4732, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.7085171216092395, | |
| "grad_norm": 1.9500114917755127, | |
| "learning_rate": 0.000767338865027611, | |
| "loss": 5.4742, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.711180719810778, | |
| "grad_norm": 1.9748975038528442, | |
| "learning_rate": 0.0007664470398538834, | |
| "loss": 5.4675, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.7138443180123165, | |
| "grad_norm": 1.7860807180404663, | |
| "learning_rate": 0.0007655552146801558, | |
| "loss": 5.4711, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.716507916213855, | |
| "grad_norm": 2.076504945755005, | |
| "learning_rate": 0.0007646633895064284, | |
| "loss": 5.4691, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.7191715144153935, | |
| "grad_norm": 2.1392953395843506, | |
| "learning_rate": 0.0007637715643327008, | |
| "loss": 5.4763, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.721835112616932, | |
| "grad_norm": 1.7750567197799683, | |
| "learning_rate": 0.0007628797391589732, | |
| "loss": 5.4624, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.7244987108184705, | |
| "grad_norm": 2.1746318340301514, | |
| "learning_rate": 0.0007619879139852457, | |
| "loss": 5.4632, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.727162309020009, | |
| "grad_norm": 1.9568692445755005, | |
| "learning_rate": 0.0007610978724618655, | |
| "loss": 5.4702, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.7298259072215475, | |
| "grad_norm": 1.940618634223938, | |
| "learning_rate": 0.0007602060472881381, | |
| "loss": 5.4682, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.7324895054230859, | |
| "grad_norm": 2.0432674884796143, | |
| "learning_rate": 0.0007593142221144105, | |
| "loss": 5.4661, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.7351531036246244, | |
| "grad_norm": 1.989637017250061, | |
| "learning_rate": 0.0007584223969406829, | |
| "loss": 5.4643, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.7378167018261629, | |
| "grad_norm": 1.7842735052108765, | |
| "learning_rate": 0.0007575305717669554, | |
| "loss": 5.4633, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.7404803000277014, | |
| "grad_norm": 2.000488519668579, | |
| "learning_rate": 0.0007566405302435752, | |
| "loss": 5.4645, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.7431438982292399, | |
| "grad_norm": 1.9219857454299927, | |
| "learning_rate": 0.0007557487050698478, | |
| "loss": 5.4587, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.7458074964307784, | |
| "grad_norm": 1.8964563608169556, | |
| "learning_rate": 0.0007548568798961202, | |
| "loss": 5.4594, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.7484710946323169, | |
| "grad_norm": 2.0744431018829346, | |
| "learning_rate": 0.0007539650547223926, | |
| "loss": 5.4677, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.7511346928338554, | |
| "grad_norm": 2.0807344913482666, | |
| "learning_rate": 0.0007530732295486651, | |
| "loss": 5.4594, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.7537982910353939, | |
| "grad_norm": 1.9063740968704224, | |
| "learning_rate": 0.0007521814043749375, | |
| "loss": 5.4614, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.7564618892369324, | |
| "grad_norm": 1.8823788166046143, | |
| "learning_rate": 0.0007512913628515576, | |
| "loss": 5.4612, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.7591254874384709, | |
| "grad_norm": 2.027939558029175, | |
| "learning_rate": 0.00075039953767783, | |
| "loss": 5.457, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.7617890856400094, | |
| "grad_norm": 1.956814169883728, | |
| "learning_rate": 0.0007495077125041024, | |
| "loss": 5.4561, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.7644526838415479, | |
| "grad_norm": 1.8203577995300293, | |
| "learning_rate": 0.0007486158873303749, | |
| "loss": 5.4612, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.7671162820430864, | |
| "grad_norm": 2.0049407482147217, | |
| "learning_rate": 0.0007477240621566473, | |
| "loss": 5.4572, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.7697798802446248, | |
| "grad_norm": 2.0092926025390625, | |
| "learning_rate": 0.0007468322369829198, | |
| "loss": 5.4566, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.7724434784461633, | |
| "grad_norm": 1.9448853731155396, | |
| "learning_rate": 0.0007459421954595397, | |
| "loss": 5.4567, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.7751070766477018, | |
| "grad_norm": 1.9080660343170166, | |
| "learning_rate": 0.0007450503702858121, | |
| "loss": 5.4529, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.7777706748492403, | |
| "grad_norm": 2.0922887325286865, | |
| "learning_rate": 0.0007441585451120846, | |
| "loss": 5.4594, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.7804342730507788, | |
| "grad_norm": 2.102870464324951, | |
| "learning_rate": 0.000743266719938357, | |
| "loss": 5.4533, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.7830978712523173, | |
| "grad_norm": 1.8905880451202393, | |
| "learning_rate": 0.0007423748947646295, | |
| "loss": 5.4512, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.7857614694538558, | |
| "grad_norm": 1.937587857246399, | |
| "learning_rate": 0.000741483069590902, | |
| "loss": 5.4577, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.7884250676553943, | |
| "grad_norm": 2.2599427700042725, | |
| "learning_rate": 0.0007405912444171744, | |
| "loss": 5.4545, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.7910886658569328, | |
| "grad_norm": 2.1247055530548096, | |
| "learning_rate": 0.0007396994192434468, | |
| "loss": 5.4552, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.7937522640584713, | |
| "grad_norm": 1.8920656442642212, | |
| "learning_rate": 0.0007388093777200668, | |
| "loss": 5.4551, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.7964158622600098, | |
| "grad_norm": 2.05411696434021, | |
| "learning_rate": 0.0007379175525463393, | |
| "loss": 5.4581, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.7990794604615483, | |
| "grad_norm": 2.1096110343933105, | |
| "learning_rate": 0.0007370257273726118, | |
| "loss": 5.4553, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.8017430586630868, | |
| "grad_norm": 2.060760736465454, | |
| "learning_rate": 0.0007361339021988842, | |
| "loss": 5.4557, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.8044066568646253, | |
| "grad_norm": 1.7533081769943237, | |
| "learning_rate": 0.0007352438606755041, | |
| "loss": 5.4596, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.8070702550661638, | |
| "grad_norm": 1.948110580444336, | |
| "learning_rate": 0.0007343520355017765, | |
| "loss": 5.4581, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.8097338532677023, | |
| "grad_norm": 2.0876693725585938, | |
| "learning_rate": 0.000733460210328049, | |
| "loss": 5.4517, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.8123974514692408, | |
| "grad_norm": 1.8972123861312866, | |
| "learning_rate": 0.0007325701688046689, | |
| "loss": 5.4529, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.8150610496707793, | |
| "grad_norm": 2.0049657821655273, | |
| "learning_rate": 0.0007316783436309413, | |
| "loss": 5.4506, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.8177246478723178, | |
| "grad_norm": 1.9599244594573975, | |
| "learning_rate": 0.0007307865184572138, | |
| "loss": 5.4503, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.8203882460738563, | |
| "grad_norm": 2.090162992477417, | |
| "learning_rate": 0.0007298946932834862, | |
| "loss": 5.4487, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.8230518442753948, | |
| "grad_norm": 1.9685425758361816, | |
| "learning_rate": 0.0007290028681097586, | |
| "loss": 5.4459, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.8257154424769333, | |
| "grad_norm": 2.0231292247772217, | |
| "learning_rate": 0.0007281110429360312, | |
| "loss": 5.4519, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.8283790406784717, | |
| "grad_norm": 1.824242353439331, | |
| "learning_rate": 0.0007272192177623036, | |
| "loss": 5.4495, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.8310426388800102, | |
| "grad_norm": 1.8740367889404297, | |
| "learning_rate": 0.000726327392588576, | |
| "loss": 5.4514, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.8337062370815487, | |
| "grad_norm": 1.898790955543518, | |
| "learning_rate": 0.000725437351065196, | |
| "loss": 5.4442, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.8363698352830872, | |
| "grad_norm": 1.9713107347488403, | |
| "learning_rate": 0.0007245455258914684, | |
| "loss": 5.4481, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.8390334334846257, | |
| "grad_norm": 1.892471432685852, | |
| "learning_rate": 0.000723653700717741, | |
| "loss": 5.4514, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.8416970316861642, | |
| "grad_norm": 2.0477683544158936, | |
| "learning_rate": 0.0007227618755440134, | |
| "loss": 5.4402, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.8443606298877027, | |
| "grad_norm": 1.9651503562927246, | |
| "learning_rate": 0.0007218736176709807, | |
| "loss": 5.439, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.8470242280892412, | |
| "grad_norm": 1.9664440155029297, | |
| "learning_rate": 0.0007209817924972531, | |
| "loss": 5.4512, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.8496878262907797, | |
| "grad_norm": 1.9268772602081299, | |
| "learning_rate": 0.0007200899673235256, | |
| "loss": 5.4445, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.8523514244923182, | |
| "grad_norm": 2.0761542320251465, | |
| "learning_rate": 0.0007191981421497981, | |
| "loss": 5.4476, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.8550150226938567, | |
| "grad_norm": 2.080336570739746, | |
| "learning_rate": 0.0007183063169760705, | |
| "loss": 5.4472, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.8576786208953951, | |
| "grad_norm": 1.8157365322113037, | |
| "learning_rate": 0.000717414491802343, | |
| "loss": 5.4471, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.8603422190969336, | |
| "grad_norm": 1.7620859146118164, | |
| "learning_rate": 0.0007165226666286154, | |
| "loss": 5.4486, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.8630058172984721, | |
| "grad_norm": 1.8530540466308594, | |
| "learning_rate": 0.0007156326251052354, | |
| "loss": 5.4403, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.8656694155000106, | |
| "grad_norm": 1.91478431224823, | |
| "learning_rate": 0.0007147407999315079, | |
| "loss": 5.4453, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.8683330137015491, | |
| "grad_norm": 1.944806456565857, | |
| "learning_rate": 0.0007138489747577804, | |
| "loss": 5.4438, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.8709966119030876, | |
| "grad_norm": 1.941565752029419, | |
| "learning_rate": 0.0007129571495840528, | |
| "loss": 5.4403, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.8736602101046261, | |
| "grad_norm": 1.8101640939712524, | |
| "learning_rate": 0.0007120653244103252, | |
| "loss": 5.4352, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.8763238083061646, | |
| "grad_norm": 2.391594171524048, | |
| "learning_rate": 0.0007111752828869451, | |
| "loss": 5.4379, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.8789874065077031, | |
| "grad_norm": 1.946295142173767, | |
| "learning_rate": 0.0007102834577132175, | |
| "loss": 5.4385, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.8816510047092416, | |
| "grad_norm": 2.1615066528320312, | |
| "learning_rate": 0.00070939163253949, | |
| "loss": 5.4439, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.8843146029107801, | |
| "grad_norm": 2.0320687294006348, | |
| "learning_rate": 0.0007084998073657625, | |
| "loss": 5.4434, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.8869782011123186, | |
| "grad_norm": 1.8692481517791748, | |
| "learning_rate": 0.0007076079821920349, | |
| "loss": 5.437, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.8896417993138571, | |
| "grad_norm": 2.007511854171753, | |
| "learning_rate": 0.0007067161570183073, | |
| "loss": 5.4327, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.8923053975153956, | |
| "grad_norm": 2.02004337310791, | |
| "learning_rate": 0.0007058243318445799, | |
| "loss": 5.4393, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.8949689957169341, | |
| "grad_norm": 1.7644096612930298, | |
| "learning_rate": 0.0007049325066708523, | |
| "loss": 5.4304, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.8976325939184726, | |
| "grad_norm": 2.0698578357696533, | |
| "learning_rate": 0.0007040424651474723, | |
| "loss": 5.4301, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.9002961921200111, | |
| "grad_norm": 1.881465196609497, | |
| "learning_rate": 0.0007031506399737447, | |
| "loss": 5.4399, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.9029597903215496, | |
| "grad_norm": 2.0607750415802, | |
| "learning_rate": 0.0007022588148000172, | |
| "loss": 5.4311, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.9056233885230881, | |
| "grad_norm": 2.1066737174987793, | |
| "learning_rate": 0.0007013669896262897, | |
| "loss": 5.4348, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.9082869867246266, | |
| "grad_norm": 2.0234835147857666, | |
| "learning_rate": 0.0007004769481029096, | |
| "loss": 5.4337, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.9109505849261651, | |
| "grad_norm": 1.8877592086791992, | |
| "learning_rate": 0.000699585122929182, | |
| "loss": 5.4389, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.9136141831277036, | |
| "grad_norm": 2.117302417755127, | |
| "learning_rate": 0.0006986932977554544, | |
| "loss": 5.4333, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.9162777813292421, | |
| "grad_norm": 2.073172092437744, | |
| "learning_rate": 0.0006978014725817269, | |
| "loss": 5.4318, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.9189413795307806, | |
| "grad_norm": 2.064408540725708, | |
| "learning_rate": 0.0006969114310583467, | |
| "loss": 5.431, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.9216049777323191, | |
| "grad_norm": 1.9481194019317627, | |
| "learning_rate": 0.0006960196058846193, | |
| "loss": 5.4321, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.9242685759338576, | |
| "grad_norm": 2.010923147201538, | |
| "learning_rate": 0.0006951277807108917, | |
| "loss": 5.4342, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.926932174135396, | |
| "grad_norm": 1.9323519468307495, | |
| "learning_rate": 0.0006942359555371641, | |
| "loss": 5.4303, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.9295957723369345, | |
| "grad_norm": 2.2859385013580322, | |
| "learning_rate": 0.0006933459140137841, | |
| "loss": 5.4352, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.932259370538473, | |
| "grad_norm": 2.055107593536377, | |
| "learning_rate": 0.000692455872490404, | |
| "loss": 5.4352, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.9349229687400115, | |
| "grad_norm": 1.9875715970993042, | |
| "learning_rate": 0.0006915640473166765, | |
| "loss": 5.4392, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.93758656694155, | |
| "grad_norm": 2.097477912902832, | |
| "learning_rate": 0.0006906722221429489, | |
| "loss": 5.4291, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.9402501651430885, | |
| "grad_norm": 1.8664289712905884, | |
| "learning_rate": 0.0006897803969692214, | |
| "loss": 5.423, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.942913763344627, | |
| "grad_norm": 2.0907797813415527, | |
| "learning_rate": 0.0006888885717954938, | |
| "loss": 5.4322, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.9455773615461655, | |
| "grad_norm": 1.9234920740127563, | |
| "learning_rate": 0.0006879967466217662, | |
| "loss": 5.4303, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.948240959747704, | |
| "grad_norm": 2.0696797370910645, | |
| "learning_rate": 0.0006871049214480388, | |
| "loss": 5.4251, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.9509045579492424, | |
| "grad_norm": 2.0838043689727783, | |
| "learning_rate": 0.0006862130962743112, | |
| "loss": 5.4244, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.9535681561507809, | |
| "grad_norm": 2.1029279232025146, | |
| "learning_rate": 0.0006853230547509311, | |
| "loss": 5.4323, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.9562317543523194, | |
| "grad_norm": 2.1586649417877197, | |
| "learning_rate": 0.000684433013227551, | |
| "loss": 5.4329, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.9588953525538579, | |
| "grad_norm": 1.8636375665664673, | |
| "learning_rate": 0.0006835411880538235, | |
| "loss": 5.43, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.9615589507553964, | |
| "grad_norm": 1.9289181232452393, | |
| "learning_rate": 0.0006826493628800959, | |
| "loss": 5.4193, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.9642225489569349, | |
| "grad_norm": 1.9578914642333984, | |
| "learning_rate": 0.0006817575377063684, | |
| "loss": 5.4298, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.9668861471584734, | |
| "grad_norm": 2.0745270252227783, | |
| "learning_rate": 0.0006808657125326409, | |
| "loss": 5.4315, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.9695497453600119, | |
| "grad_norm": 1.9545907974243164, | |
| "learning_rate": 0.0006799738873589133, | |
| "loss": 5.425, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.9722133435615504, | |
| "grad_norm": 1.9709100723266602, | |
| "learning_rate": 0.0006790820621851857, | |
| "loss": 5.425, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.9748769417630889, | |
| "grad_norm": 1.8214976787567139, | |
| "learning_rate": 0.0006781902370114582, | |
| "loss": 5.4307, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.9775405399646274, | |
| "grad_norm": 1.8456212282180786, | |
| "learning_rate": 0.0006773001954880781, | |
| "loss": 5.4277, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.9802041381661659, | |
| "grad_norm": 2.0278677940368652, | |
| "learning_rate": 0.0006764083703143506, | |
| "loss": 5.425, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.9828677363677044, | |
| "grad_norm": 1.8401942253112793, | |
| "learning_rate": 0.000675516545140623, | |
| "loss": 5.4228, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.9855313345692429, | |
| "grad_norm": 2.0018155574798584, | |
| "learning_rate": 0.0006746247199668954, | |
| "loss": 5.4272, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.9881949327707814, | |
| "grad_norm": 1.9544193744659424, | |
| "learning_rate": 0.0006737346784435153, | |
| "loss": 5.4297, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.9908585309723199, | |
| "grad_norm": 1.8701244592666626, | |
| "learning_rate": 0.0006728428532697878, | |
| "loss": 5.4305, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.9935221291738584, | |
| "grad_norm": 1.9702414274215698, | |
| "learning_rate": 0.0006719510280960603, | |
| "loss": 5.4272, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.9961857273753969, | |
| "grad_norm": 2.005018472671509, | |
| "learning_rate": 0.0006710592029223327, | |
| "loss": 5.4259, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.9988493255769354, | |
| "grad_norm": 1.9745688438415527, | |
| "learning_rate": 0.0006701691613989527, | |
| "loss": 5.4255, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 1.001512923778474, | |
| "grad_norm": 2.119936466217041, | |
| "learning_rate": 0.0006692773362252251, | |
| "loss": 5.4282, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.0041765219800123, | |
| "grad_norm": 1.8192147016525269, | |
| "learning_rate": 0.0006683855110514976, | |
| "loss": 5.4272, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 1.006840120181551, | |
| "grad_norm": 2.0825536251068115, | |
| "learning_rate": 0.0006674936858777701, | |
| "loss": 5.4191, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.0095037183830893, | |
| "grad_norm": 2.034301519393921, | |
| "learning_rate": 0.0006666036443543899, | |
| "loss": 5.4212, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 1.0121673165846279, | |
| "grad_norm": 2.013160467147827, | |
| "learning_rate": 0.0006657118191806624, | |
| "loss": 5.4216, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.0148309147861663, | |
| "grad_norm": 1.9328818321228027, | |
| "learning_rate": 0.0006648199940069348, | |
| "loss": 5.4286, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 1.0174945129877049, | |
| "grad_norm": 2.011674642562866, | |
| "learning_rate": 0.0006639281688332073, | |
| "loss": 5.426, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.0201581111892433, | |
| "grad_norm": 2.1039912700653076, | |
| "learning_rate": 0.0006630381273098273, | |
| "loss": 5.4261, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 1.0228217093907819, | |
| "grad_norm": 1.8038475513458252, | |
| "learning_rate": 0.0006621480857864472, | |
| "loss": 5.4201, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.0254853075923203, | |
| "grad_norm": 1.8866719007492065, | |
| "learning_rate": 0.0006612562606127196, | |
| "loss": 5.4156, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 1.0281489057938589, | |
| "grad_norm": 1.9180611371994019, | |
| "learning_rate": 0.000660364435438992, | |
| "loss": 5.4219, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.0308125039953973, | |
| "grad_norm": 1.83159339427948, | |
| "learning_rate": 0.0006594726102652645, | |
| "loss": 5.4158, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 1.0334761021969359, | |
| "grad_norm": 1.8638277053833008, | |
| "learning_rate": 0.000658580785091537, | |
| "loss": 5.4196, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.0361397003984743, | |
| "grad_norm": 1.8679394721984863, | |
| "learning_rate": 0.0006576889599178094, | |
| "loss": 5.4221, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 1.0388032986000129, | |
| "grad_norm": 1.8080953359603882, | |
| "learning_rate": 0.0006567971347440819, | |
| "loss": 5.4168, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.0414668968015512, | |
| "grad_norm": 2.044064521789551, | |
| "learning_rate": 0.0006559053095703543, | |
| "loss": 5.4152, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 1.0441304950030899, | |
| "grad_norm": 2.067416191101074, | |
| "learning_rate": 0.0006550152680469742, | |
| "loss": 5.4197, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.0467940932046282, | |
| "grad_norm": 1.8547744750976562, | |
| "learning_rate": 0.0006541234428732467, | |
| "loss": 5.416, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 1.0494576914061668, | |
| "grad_norm": 2.1002390384674072, | |
| "learning_rate": 0.0006532316176995191, | |
| "loss": 5.414, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.0521212896077052, | |
| "grad_norm": 1.8542534112930298, | |
| "learning_rate": 0.0006523397925257916, | |
| "loss": 5.4176, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 1.0547848878092438, | |
| "grad_norm": 1.8873697519302368, | |
| "learning_rate": 0.000651447967352064, | |
| "loss": 5.4155, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.0574484860107822, | |
| "grad_norm": 2.0172159671783447, | |
| "learning_rate": 0.0006505561421783364, | |
| "loss": 5.4234, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 1.0601120842123208, | |
| "grad_norm": 1.9374735355377197, | |
| "learning_rate": 0.000649664317004609, | |
| "loss": 5.4131, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.0627756824138592, | |
| "grad_norm": 2.141655921936035, | |
| "learning_rate": 0.0006487724918308814, | |
| "loss": 5.4134, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 1.0654392806153978, | |
| "grad_norm": 1.9056235551834106, | |
| "learning_rate": 0.0006478824503075014, | |
| "loss": 5.4173, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.0681028788169362, | |
| "grad_norm": 2.3003177642822266, | |
| "learning_rate": 0.0006469906251337738, | |
| "loss": 5.4049, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 1.0707664770184748, | |
| "grad_norm": 2.1843066215515137, | |
| "learning_rate": 0.0006460987999600462, | |
| "loss": 5.411, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.0734300752200132, | |
| "grad_norm": 2.0827953815460205, | |
| "learning_rate": 0.0006452069747863188, | |
| "loss": 5.4175, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 1.0760936734215516, | |
| "grad_norm": 2.02587890625, | |
| "learning_rate": 0.0006443169332629386, | |
| "loss": 5.4183, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.0787572716230902, | |
| "grad_norm": 1.8049343824386597, | |
| "learning_rate": 0.0006434251080892111, | |
| "loss": 5.4142, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 1.0814208698246286, | |
| "grad_norm": 2.1238086223602295, | |
| "learning_rate": 0.0006425332829154835, | |
| "loss": 5.4155, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.0840844680261672, | |
| "grad_norm": 1.9311139583587646, | |
| "learning_rate": 0.0006416414577417559, | |
| "loss": 5.4132, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 1.0867480662277056, | |
| "grad_norm": 1.970428228378296, | |
| "learning_rate": 0.0006407514162183758, | |
| "loss": 5.4073, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.0894116644292442, | |
| "grad_norm": 1.7967313528060913, | |
| "learning_rate": 0.0006398595910446483, | |
| "loss": 5.4113, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 1.0920752626307826, | |
| "grad_norm": 1.7493606805801392, | |
| "learning_rate": 0.0006389677658709208, | |
| "loss": 5.4106, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.0947388608323212, | |
| "grad_norm": 1.868148922920227, | |
| "learning_rate": 0.0006380777243475407, | |
| "loss": 5.4125, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 1.0974024590338596, | |
| "grad_norm": 2.0261473655700684, | |
| "learning_rate": 0.0006371858991738132, | |
| "loss": 5.4119, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.1000660572353982, | |
| "grad_norm": 1.8863203525543213, | |
| "learning_rate": 0.0006362940740000856, | |
| "loss": 5.4085, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 1.1027296554369366, | |
| "grad_norm": 1.97225821018219, | |
| "learning_rate": 0.0006354022488263581, | |
| "loss": 5.4106, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.1053932536384752, | |
| "grad_norm": 2.2650508880615234, | |
| "learning_rate": 0.0006345104236526306, | |
| "loss": 5.4128, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 1.1080568518400136, | |
| "grad_norm": 1.9305511713027954, | |
| "learning_rate": 0.000633618598478903, | |
| "loss": 5.4084, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.1107204500415522, | |
| "grad_norm": 2.110548973083496, | |
| "learning_rate": 0.0006327285569555229, | |
| "loss": 5.4078, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 1.1133840482430906, | |
| "grad_norm": 2.0234880447387695, | |
| "learning_rate": 0.0006318367317817953, | |
| "loss": 5.4125, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.1160476464446292, | |
| "grad_norm": 1.8949861526489258, | |
| "learning_rate": 0.0006309449066080678, | |
| "loss": 5.4077, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 1.1187112446461676, | |
| "grad_norm": 1.9646226167678833, | |
| "learning_rate": 0.0006300530814343403, | |
| "loss": 5.4112, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.1213748428477062, | |
| "grad_norm": 1.9960238933563232, | |
| "learning_rate": 0.0006291612562606127, | |
| "loss": 5.4062, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 1.1240384410492446, | |
| "grad_norm": 2.0510716438293457, | |
| "learning_rate": 0.0006282694310868851, | |
| "loss": 5.4094, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.1267020392507832, | |
| "grad_norm": 1.969011664390564, | |
| "learning_rate": 0.0006273776059131576, | |
| "loss": 5.4123, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 1.1293656374523215, | |
| "grad_norm": 2.0459535121917725, | |
| "learning_rate": 0.0006264857807394301, | |
| "loss": 5.4077, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.1320292356538602, | |
| "grad_norm": 2.093336343765259, | |
| "learning_rate": 0.0006255957392160501, | |
| "loss": 5.4107, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 1.1346928338553985, | |
| "grad_norm": 1.8615410327911377, | |
| "learning_rate": 0.0006247056976926699, | |
| "loss": 5.4078, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.1373564320569371, | |
| "grad_norm": 1.9422777891159058, | |
| "learning_rate": 0.0006238138725189424, | |
| "loss": 5.4115, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 1.1400200302584755, | |
| "grad_norm": 1.9412380456924438, | |
| "learning_rate": 0.0006229220473452148, | |
| "loss": 5.4013, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.1426836284600141, | |
| "grad_norm": 2.2532691955566406, | |
| "learning_rate": 0.0006220302221714873, | |
| "loss": 5.4061, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 1.1453472266615525, | |
| "grad_norm": 1.7372703552246094, | |
| "learning_rate": 0.0006211383969977598, | |
| "loss": 5.41, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.1480108248630911, | |
| "grad_norm": 1.9771249294281006, | |
| "learning_rate": 0.0006202465718240322, | |
| "loss": 5.4032, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 1.1506744230646295, | |
| "grad_norm": 1.802037000656128, | |
| "learning_rate": 0.0006193547466503046, | |
| "loss": 5.4026, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.1533380212661681, | |
| "grad_norm": 1.958177924156189, | |
| "learning_rate": 0.0006184629214765771, | |
| "loss": 5.4043, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 1.1560016194677065, | |
| "grad_norm": 1.9318652153015137, | |
| "learning_rate": 0.000617572879953197, | |
| "loss": 5.4044, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.158665217669245, | |
| "grad_norm": 1.917920470237732, | |
| "learning_rate": 0.0006166810547794695, | |
| "loss": 5.4051, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 1.1613288158707835, | |
| "grad_norm": 1.9815441370010376, | |
| "learning_rate": 0.0006157892296057419, | |
| "loss": 5.4036, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.1639924140723221, | |
| "grad_norm": 2.0141518115997314, | |
| "learning_rate": 0.0006148974044320143, | |
| "loss": 5.4093, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 1.1666560122738605, | |
| "grad_norm": 2.0144686698913574, | |
| "learning_rate": 0.0006140073629086343, | |
| "loss": 5.3992, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.169319610475399, | |
| "grad_norm": 1.848953127861023, | |
| "learning_rate": 0.0006131155377349069, | |
| "loss": 5.4069, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 1.1719832086769375, | |
| "grad_norm": 1.8711676597595215, | |
| "learning_rate": 0.0006122237125611793, | |
| "loss": 5.4058, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.1746468068784761, | |
| "grad_norm": 2.1549181938171387, | |
| "learning_rate": 0.0006113318873874517, | |
| "loss": 5.4057, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 1.1773104050800145, | |
| "grad_norm": 2.136955738067627, | |
| "learning_rate": 0.0006104418458640716, | |
| "loss": 5.4047, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.1799740032815529, | |
| "grad_norm": 1.984183430671692, | |
| "learning_rate": 0.000609550020690344, | |
| "loss": 5.397, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 1.1826376014830915, | |
| "grad_norm": 2.173187732696533, | |
| "learning_rate": 0.0006086581955166164, | |
| "loss": 5.3996, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.1853011996846299, | |
| "grad_norm": 2.0700299739837646, | |
| "learning_rate": 0.000607766370342889, | |
| "loss": 5.3976, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 1.1879647978861685, | |
| "grad_norm": 2.1351547241210938, | |
| "learning_rate": 0.0006068763288195088, | |
| "loss": 5.4113, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.1906283960877069, | |
| "grad_norm": 1.9995781183242798, | |
| "learning_rate": 0.0006059845036457813, | |
| "loss": 5.4012, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 1.1932919942892455, | |
| "grad_norm": 2.2745988368988037, | |
| "learning_rate": 0.0006050926784720537, | |
| "loss": 5.4093, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.1959555924907839, | |
| "grad_norm": 2.5383615493774414, | |
| "learning_rate": 0.0006042026369486737, | |
| "loss": 5.3934, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 1.1986191906923225, | |
| "grad_norm": 2.132570266723633, | |
| "learning_rate": 0.0006033108117749462, | |
| "loss": 5.4143, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.2012827888938609, | |
| "grad_norm": 1.9985568523406982, | |
| "learning_rate": 0.0006024189866012187, | |
| "loss": 5.3987, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 1.2039463870953995, | |
| "grad_norm": 1.9169471263885498, | |
| "learning_rate": 0.0006015271614274911, | |
| "loss": 5.4005, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.2066099852969379, | |
| "grad_norm": 1.9423543214797974, | |
| "learning_rate": 0.0006006353362537635, | |
| "loss": 5.4016, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 1.2092735834984765, | |
| "grad_norm": 2.0575485229492188, | |
| "learning_rate": 0.000599743511080036, | |
| "loss": 5.393, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.2119371817000149, | |
| "grad_norm": 2.034454584121704, | |
| "learning_rate": 0.0005988516859063085, | |
| "loss": 5.3946, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 1.2146007799015535, | |
| "grad_norm": 1.9063221216201782, | |
| "learning_rate": 0.0005979598607325809, | |
| "loss": 5.4005, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.2172643781030918, | |
| "grad_norm": 2.094717025756836, | |
| "learning_rate": 0.0005970698192092008, | |
| "loss": 5.3943, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 1.2199279763046305, | |
| "grad_norm": 1.9740791320800781, | |
| "learning_rate": 0.0005961779940354732, | |
| "loss": 5.399, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.2225915745061688, | |
| "grad_norm": 1.95699143409729, | |
| "learning_rate": 0.0005952861688617457, | |
| "loss": 5.3971, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 1.2252551727077075, | |
| "grad_norm": 1.9305535554885864, | |
| "learning_rate": 0.0005943943436880182, | |
| "loss": 5.399, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.2279187709092458, | |
| "grad_norm": 1.8926870822906494, | |
| "learning_rate": 0.000593504302164638, | |
| "loss": 5.3967, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 1.2305823691107844, | |
| "grad_norm": 1.91937255859375, | |
| "learning_rate": 0.0005926124769909105, | |
| "loss": 5.3966, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.2332459673123228, | |
| "grad_norm": 1.9494017362594604, | |
| "learning_rate": 0.0005917224354675305, | |
| "loss": 5.3988, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 1.2359095655138614, | |
| "grad_norm": 1.7676622867584229, | |
| "learning_rate": 0.0005908306102938029, | |
| "loss": 5.3954, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.2385731637153998, | |
| "grad_norm": 1.9707027673721313, | |
| "learning_rate": 0.0005899387851200753, | |
| "loss": 5.3987, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 1.2412367619169384, | |
| "grad_norm": 1.8651105165481567, | |
| "learning_rate": 0.0005890469599463479, | |
| "loss": 5.3913, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.2439003601184768, | |
| "grad_norm": 2.2256948947906494, | |
| "learning_rate": 0.0005881551347726203, | |
| "loss": 5.4022, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 1.2465639583200154, | |
| "grad_norm": 2.0236611366271973, | |
| "learning_rate": 0.0005872633095988927, | |
| "loss": 5.3928, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.2492275565215538, | |
| "grad_norm": 2.07328724861145, | |
| "learning_rate": 0.0005863714844251652, | |
| "loss": 5.3964, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 1.2518911547230922, | |
| "grad_norm": 2.011497974395752, | |
| "learning_rate": 0.000585481442901785, | |
| "loss": 5.4, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.2545547529246308, | |
| "grad_norm": 1.891579270362854, | |
| "learning_rate": 0.0005845896177280576, | |
| "loss": 5.3931, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 1.2572183511261694, | |
| "grad_norm": 1.8369475603103638, | |
| "learning_rate": 0.00058369779255433, | |
| "loss": 5.388, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.2598819493277078, | |
| "grad_norm": 2.316582441329956, | |
| "learning_rate": 0.0005828059673806024, | |
| "loss": 5.3878, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 1.2625455475292462, | |
| "grad_norm": 1.8466497659683228, | |
| "learning_rate": 0.0005819141422068749, | |
| "loss": 5.3942, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.2652091457307848, | |
| "grad_norm": 1.9420734643936157, | |
| "learning_rate": 0.0005810223170331473, | |
| "loss": 5.3907, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 1.2678727439323234, | |
| "grad_norm": 1.9229456186294556, | |
| "learning_rate": 0.0005801304918594198, | |
| "loss": 5.394, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.2705363421338618, | |
| "grad_norm": 2.126213788986206, | |
| "learning_rate": 0.0005792386666856923, | |
| "loss": 5.3875, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 1.2731999403354002, | |
| "grad_norm": 1.9714566469192505, | |
| "learning_rate": 0.0005783486251623122, | |
| "loss": 5.3938, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.2758635385369388, | |
| "grad_norm": 2.244844436645508, | |
| "learning_rate": 0.0005774567999885847, | |
| "loss": 5.3974, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 1.2785271367384774, | |
| "grad_norm": 2.083517551422119, | |
| "learning_rate": 0.0005765649748148571, | |
| "loss": 5.3827, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.2811907349400158, | |
| "grad_norm": 2.1155362129211426, | |
| "learning_rate": 0.0005756749332914771, | |
| "loss": 5.3908, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 1.2838543331415542, | |
| "grad_norm": 2.0415351390838623, | |
| "learning_rate": 0.0005747831081177495, | |
| "loss": 5.3904, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.2865179313430928, | |
| "grad_norm": 2.4744224548339844, | |
| "learning_rate": 0.0005738912829440219, | |
| "loss": 5.3825, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 1.2891815295446314, | |
| "grad_norm": 1.9680261611938477, | |
| "learning_rate": 0.0005729994577702944, | |
| "loss": 5.3915, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.2918451277461698, | |
| "grad_norm": 2.4636471271514893, | |
| "learning_rate": 0.0005721076325965668, | |
| "loss": 5.3946, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 1.2945087259477082, | |
| "grad_norm": 1.8884419202804565, | |
| "learning_rate": 0.0005712158074228393, | |
| "loss": 5.3905, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.2971723241492468, | |
| "grad_norm": 2.192204236984253, | |
| "learning_rate": 0.0005703257658994592, | |
| "loss": 5.3891, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 1.2998359223507852, | |
| "grad_norm": 1.963740587234497, | |
| "learning_rate": 0.0005694339407257316, | |
| "loss": 5.389, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.3024995205523238, | |
| "grad_norm": 2.2511630058288574, | |
| "learning_rate": 0.0005685421155520041, | |
| "loss": 5.3988, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 1.3051631187538622, | |
| "grad_norm": 1.8933221101760864, | |
| "learning_rate": 0.0005676502903782765, | |
| "loss": 5.39, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.3078267169554008, | |
| "grad_norm": 1.813040852546692, | |
| "learning_rate": 0.000566758465204549, | |
| "loss": 5.3884, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 1.3104903151569391, | |
| "grad_norm": 2.3987181186676025, | |
| "learning_rate": 0.0005658666400308215, | |
| "loss": 5.3888, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.3131539133584778, | |
| "grad_norm": 2.0762851238250732, | |
| "learning_rate": 0.0005649748148570939, | |
| "loss": 5.3881, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 1.3158175115600161, | |
| "grad_norm": 2.3197662830352783, | |
| "learning_rate": 0.0005640829896833663, | |
| "loss": 5.3876, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.3184811097615547, | |
| "grad_norm": 1.9953910112380981, | |
| "learning_rate": 0.0005631929481599863, | |
| "loss": 5.3892, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 1.3211447079630931, | |
| "grad_norm": 2.20346999168396, | |
| "learning_rate": 0.0005623011229862588, | |
| "loss": 5.3844, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.3238083061646317, | |
| "grad_norm": 1.9688447713851929, | |
| "learning_rate": 0.0005614092978125313, | |
| "loss": 5.3924, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 1.3264719043661701, | |
| "grad_norm": 1.950621485710144, | |
| "learning_rate": 0.0005605174726388037, | |
| "loss": 5.382, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.3291355025677087, | |
| "grad_norm": 2.0261106491088867, | |
| "learning_rate": 0.0005596274311154236, | |
| "loss": 5.3889, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 1.3317991007692471, | |
| "grad_norm": 1.819598913192749, | |
| "learning_rate": 0.000558735605941696, | |
| "loss": 5.3879, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.3344626989707857, | |
| "grad_norm": 2.092658042907715, | |
| "learning_rate": 0.0005578437807679685, | |
| "loss": 5.3897, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 1.3371262971723241, | |
| "grad_norm": 1.8927563428878784, | |
| "learning_rate": 0.000556951955594241, | |
| "loss": 5.3888, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.3397898953738627, | |
| "grad_norm": 1.91410493850708, | |
| "learning_rate": 0.0005560619140708608, | |
| "loss": 5.3865, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 1.3424534935754011, | |
| "grad_norm": 1.923710584640503, | |
| "learning_rate": 0.0005551700888971333, | |
| "loss": 5.3831, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.3451170917769395, | |
| "grad_norm": 2.011301279067993, | |
| "learning_rate": 0.0005542782637234058, | |
| "loss": 5.3832, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 1.347780689978478, | |
| "grad_norm": 1.8271079063415527, | |
| "learning_rate": 0.0005533864385496783, | |
| "loss": 5.3843, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.3504442881800167, | |
| "grad_norm": 2.0028188228607178, | |
| "learning_rate": 0.0005524963970262982, | |
| "loss": 5.383, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 1.353107886381555, | |
| "grad_norm": 1.8386844396591187, | |
| "learning_rate": 0.0005516045718525706, | |
| "loss": 5.3873, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.3557714845830935, | |
| "grad_norm": 1.8750890493392944, | |
| "learning_rate": 0.0005507127466788431, | |
| "loss": 5.3794, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 1.358435082784632, | |
| "grad_norm": 1.9305578470230103, | |
| "learning_rate": 0.0005498209215051155, | |
| "loss": 5.3863, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.3610986809861707, | |
| "grad_norm": 2.1922383308410645, | |
| "learning_rate": 0.0005489308799817354, | |
| "loss": 5.3889, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 1.363762279187709, | |
| "grad_norm": 2.006162405014038, | |
| "learning_rate": 0.0005480390548080079, | |
| "loss": 5.3793, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.3664258773892475, | |
| "grad_norm": 2.1891300678253174, | |
| "learning_rate": 0.0005471472296342803, | |
| "loss": 5.3805, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 1.369089475590786, | |
| "grad_norm": 2.036553144454956, | |
| "learning_rate": 0.0005462554044605528, | |
| "loss": 5.3809, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.3717530737923247, | |
| "grad_norm": 1.9189977645874023, | |
| "learning_rate": 0.0005453653629371727, | |
| "loss": 5.3766, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 1.374416671993863, | |
| "grad_norm": 1.98636794090271, | |
| "learning_rate": 0.0005444735377634452, | |
| "loss": 5.39, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.3770802701954015, | |
| "grad_norm": 1.897522211074829, | |
| "learning_rate": 0.0005435834962400651, | |
| "loss": 5.3839, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 1.37974386839694, | |
| "grad_norm": 2.0826635360717773, | |
| "learning_rate": 0.0005426916710663376, | |
| "loss": 5.383, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.3824074665984787, | |
| "grad_norm": 1.8267229795455933, | |
| "learning_rate": 0.00054179984589261, | |
| "loss": 5.3866, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 1.385071064800017, | |
| "grad_norm": 2.1117184162139893, | |
| "learning_rate": 0.0005409080207188824, | |
| "loss": 5.3787, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.3877346630015555, | |
| "grad_norm": 1.9132159948349, | |
| "learning_rate": 0.0005400161955451549, | |
| "loss": 5.3812, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 1.390398261203094, | |
| "grad_norm": 1.9600298404693604, | |
| "learning_rate": 0.0005391243703714274, | |
| "loss": 5.381, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.3930618594046325, | |
| "grad_norm": 2.000422716140747, | |
| "learning_rate": 0.0005382325451976998, | |
| "loss": 5.3823, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 1.395725457606171, | |
| "grad_norm": 2.2225003242492676, | |
| "learning_rate": 0.0005373407200239723, | |
| "loss": 5.3776, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.3983890558077094, | |
| "grad_norm": 2.084779977798462, | |
| "learning_rate": 0.0005364506785005921, | |
| "loss": 5.3781, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 1.401052654009248, | |
| "grad_norm": 2.126775026321411, | |
| "learning_rate": 0.0005355588533268646, | |
| "loss": 5.3832, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.4037162522107864, | |
| "grad_norm": 1.9713746309280396, | |
| "learning_rate": 0.0005346670281531371, | |
| "loss": 5.3792, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 1.406379850412325, | |
| "grad_norm": 2.0785419940948486, | |
| "learning_rate": 0.0005337752029794095, | |
| "loss": 5.3825, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.4090434486138634, | |
| "grad_norm": 2.3811593055725098, | |
| "learning_rate": 0.0005328851614560295, | |
| "loss": 5.3826, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 1.411707046815402, | |
| "grad_norm": 2.1196324825286865, | |
| "learning_rate": 0.0005319933362823019, | |
| "loss": 5.3785, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.4143706450169404, | |
| "grad_norm": 2.06736421585083, | |
| "learning_rate": 0.0005311015111085744, | |
| "loss": 5.3796, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 1.417034243218479, | |
| "grad_norm": 2.1438751220703125, | |
| "learning_rate": 0.0005302096859348468, | |
| "loss": 5.3747, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.4196978414200174, | |
| "grad_norm": 2.0328142642974854, | |
| "learning_rate": 0.0005293196444114668, | |
| "loss": 5.3726, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 1.422361439621556, | |
| "grad_norm": 1.9709652662277222, | |
| "learning_rate": 0.0005284278192377392, | |
| "loss": 5.3835, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.4250250378230944, | |
| "grad_norm": 2.0982072353363037, | |
| "learning_rate": 0.0005275359940640116, | |
| "loss": 5.3719, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 1.427688636024633, | |
| "grad_norm": 2.3335447311401367, | |
| "learning_rate": 0.0005266441688902841, | |
| "loss": 5.3824, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.4303522342261714, | |
| "grad_norm": 1.9240329265594482, | |
| "learning_rate": 0.0005257541273669039, | |
| "loss": 5.3754, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.43301583242771, | |
| "grad_norm": 2.0762813091278076, | |
| "learning_rate": 0.0005248623021931765, | |
| "loss": 5.3754, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.4356794306292484, | |
| "grad_norm": 1.9223084449768066, | |
| "learning_rate": 0.0005239704770194489, | |
| "loss": 5.3751, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.4383430288307868, | |
| "grad_norm": 1.9600517749786377, | |
| "learning_rate": 0.0005230786518457213, | |
| "loss": 5.3726, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.4410066270323254, | |
| "grad_norm": 2.0275826454162598, | |
| "learning_rate": 0.0005221886103223413, | |
| "loss": 5.3755, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 1.443670225233864, | |
| "grad_norm": 2.0879909992218018, | |
| "learning_rate": 0.0005212967851486137, | |
| "loss": 5.371, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.4463338234354024, | |
| "grad_norm": 2.2107584476470947, | |
| "learning_rate": 0.0005204049599748863, | |
| "loss": 5.3775, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 1.4489974216369408, | |
| "grad_norm": 1.9889525175094604, | |
| "learning_rate": 0.0005195131348011587, | |
| "loss": 5.369, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.4516610198384794, | |
| "grad_norm": 1.8878706693649292, | |
| "learning_rate": 0.0005186230932777786, | |
| "loss": 5.3762, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 1.454324618040018, | |
| "grad_norm": 2.0804665088653564, | |
| "learning_rate": 0.000517731268104051, | |
| "loss": 5.3731, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.4569882162415564, | |
| "grad_norm": 2.3155815601348877, | |
| "learning_rate": 0.0005168394429303234, | |
| "loss": 5.3696, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 1.4596518144430948, | |
| "grad_norm": 2.2707676887512207, | |
| "learning_rate": 0.000515947617756596, | |
| "loss": 5.3763, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.4623154126446334, | |
| "grad_norm": 1.947204828262329, | |
| "learning_rate": 0.0005150575762332158, | |
| "loss": 5.3689, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 1.464979010846172, | |
| "grad_norm": 1.9428602457046509, | |
| "learning_rate": 0.0005141657510594883, | |
| "loss": 5.3797, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.4676426090477104, | |
| "grad_norm": 2.4003546237945557, | |
| "learning_rate": 0.0005132739258857608, | |
| "loss": 5.3672, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 1.4703062072492488, | |
| "grad_norm": 2.047048330307007, | |
| "learning_rate": 0.0005123821007120333, | |
| "loss": 5.3761, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.4729698054507874, | |
| "grad_norm": 2.0965404510498047, | |
| "learning_rate": 0.0005114920591886531, | |
| "loss": 5.3645, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 1.475633403652326, | |
| "grad_norm": 1.9648233652114868, | |
| "learning_rate": 0.0005106002340149257, | |
| "loss": 5.37, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.4782970018538644, | |
| "grad_norm": 1.8992446660995483, | |
| "learning_rate": 0.0005097084088411981, | |
| "loss": 5.3679, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 1.4809606000554028, | |
| "grad_norm": 2.125126838684082, | |
| "learning_rate": 0.0005088165836674705, | |
| "loss": 5.3702, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.4836241982569414, | |
| "grad_norm": 2.030409574508667, | |
| "learning_rate": 0.0005079265421440904, | |
| "loss": 5.3691, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 1.4862877964584797, | |
| "grad_norm": 1.9816679954528809, | |
| "learning_rate": 0.0005070347169703628, | |
| "loss": 5.3723, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.4889513946600184, | |
| "grad_norm": 2.032564401626587, | |
| "learning_rate": 0.0005061428917966354, | |
| "loss": 5.3695, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 1.4916149928615567, | |
| "grad_norm": 2.0342843532562256, | |
| "learning_rate": 0.0005052510666229078, | |
| "loss": 5.3681, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.4942785910630954, | |
| "grad_norm": 1.9113322496414185, | |
| "learning_rate": 0.0005043610250995278, | |
| "loss": 5.3713, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 1.4969421892646337, | |
| "grad_norm": 2.1201562881469727, | |
| "learning_rate": 0.0005034691999258002, | |
| "loss": 5.375, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.4996057874661723, | |
| "grad_norm": 2.1695244312286377, | |
| "learning_rate": 0.0005025773747520726, | |
| "loss": 5.3666, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 1.5022693856677107, | |
| "grad_norm": 2.2736222743988037, | |
| "learning_rate": 0.0005016873332286925, | |
| "loss": 5.3728, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.5049329838692493, | |
| "grad_norm": 1.9306550025939941, | |
| "learning_rate": 0.000500795508054965, | |
| "loss": 5.3607, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 1.507596582070788, | |
| "grad_norm": 1.970550537109375, | |
| "learning_rate": 0.0004999036828812375, | |
| "loss": 5.372, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.5102601802723261, | |
| "grad_norm": 1.7387876510620117, | |
| "learning_rate": 0.0004990118577075099, | |
| "loss": 5.3728, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 1.5129237784738647, | |
| "grad_norm": 2.364816188812256, | |
| "learning_rate": 0.0004981200325337823, | |
| "loss": 5.3667, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.5155873766754033, | |
| "grad_norm": 1.959367036819458, | |
| "learning_rate": 0.0004972282073600549, | |
| "loss": 5.3672, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 1.5182509748769417, | |
| "grad_norm": 2.4462456703186035, | |
| "learning_rate": 0.0004963363821863273, | |
| "loss": 5.3669, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.52091457307848, | |
| "grad_norm": 1.949645757675171, | |
| "learning_rate": 0.0004954445570125997, | |
| "loss": 5.3669, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 1.5235781712800187, | |
| "grad_norm": 2.0255677700042725, | |
| "learning_rate": 0.0004945545154892197, | |
| "loss": 5.3689, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.5262417694815573, | |
| "grad_norm": 2.0761642456054688, | |
| "learning_rate": 0.0004936644739658396, | |
| "loss": 5.3633, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 1.5289053676830957, | |
| "grad_norm": 2.1219048500061035, | |
| "learning_rate": 0.000492772648792112, | |
| "loss": 5.3617, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.531568965884634, | |
| "grad_norm": 1.83650803565979, | |
| "learning_rate": 0.0004918808236183844, | |
| "loss": 5.3735, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 1.5342325640861727, | |
| "grad_norm": 2.0275492668151855, | |
| "learning_rate": 0.0004909889984446568, | |
| "loss": 5.3636, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.5368961622877113, | |
| "grad_norm": 1.9854780435562134, | |
| "learning_rate": 0.0004900971732709294, | |
| "loss": 5.3595, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 1.5395597604892497, | |
| "grad_norm": 2.282017707824707, | |
| "learning_rate": 0.0004892053480972018, | |
| "loss": 5.3673, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.542223358690788, | |
| "grad_norm": 2.0435492992401123, | |
| "learning_rate": 0.0004883135229234743, | |
| "loss": 5.3771, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 1.5448869568923267, | |
| "grad_norm": 2.4702582359313965, | |
| "learning_rate": 0.0004874216977497467, | |
| "loss": 5.3592, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.5475505550938653, | |
| "grad_norm": 2.032315731048584, | |
| "learning_rate": 0.00048653165622636666, | |
| "loss": 5.3688, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 1.5502141532954037, | |
| "grad_norm": 2.13460636138916, | |
| "learning_rate": 0.0004856398310526391, | |
| "loss": 5.3624, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.552877751496942, | |
| "grad_norm": 1.9628610610961914, | |
| "learning_rate": 0.0004847480058789115, | |
| "loss": 5.3647, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 1.5555413496984807, | |
| "grad_norm": 1.8896455764770508, | |
| "learning_rate": 0.000483856180705184, | |
| "loss": 5.3693, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.5582049479000193, | |
| "grad_norm": 1.92352294921875, | |
| "learning_rate": 0.0004829661391818039, | |
| "loss": 5.3551, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 1.5608685461015577, | |
| "grad_norm": 2.061492919921875, | |
| "learning_rate": 0.0004820743140080764, | |
| "loss": 5.3618, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.563532144303096, | |
| "grad_norm": 2.0767364501953125, | |
| "learning_rate": 0.0004811842724846963, | |
| "loss": 5.3596, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 1.5661957425046347, | |
| "grad_norm": 2.103719472885132, | |
| "learning_rate": 0.00048029244731096876, | |
| "loss": 5.3547, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.5688593407061733, | |
| "grad_norm": 2.096832275390625, | |
| "learning_rate": 0.00047940062213724124, | |
| "loss": 5.3635, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 1.5715229389077117, | |
| "grad_norm": 2.053567409515381, | |
| "learning_rate": 0.0004785087969635137, | |
| "loss": 5.3683, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.57418653710925, | |
| "grad_norm": 2.040846586227417, | |
| "learning_rate": 0.00047761697178978616, | |
| "loss": 5.3623, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 1.5768501353107887, | |
| "grad_norm": 2.0361154079437256, | |
| "learning_rate": 0.0004767251466160586, | |
| "loss": 5.3572, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.5795137335123273, | |
| "grad_norm": 2.006989002227783, | |
| "learning_rate": 0.00047583332144233103, | |
| "loss": 5.3702, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 1.5821773317138657, | |
| "grad_norm": 2.0891811847686768, | |
| "learning_rate": 0.0004749414962686035, | |
| "loss": 5.3664, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.584840929915404, | |
| "grad_norm": 2.023730754852295, | |
| "learning_rate": 0.0004740514547452234, | |
| "loss": 5.3668, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 1.5875045281169426, | |
| "grad_norm": 1.8560234308242798, | |
| "learning_rate": 0.0004731596295714958, | |
| "loss": 5.3688, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.5901681263184813, | |
| "grad_norm": 1.84561288356781, | |
| "learning_rate": 0.0004722678043977683, | |
| "loss": 5.3595, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 1.5928317245200196, | |
| "grad_norm": 2.0453810691833496, | |
| "learning_rate": 0.0004713759792240407, | |
| "loss": 5.3612, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.595495322721558, | |
| "grad_norm": 2.03952956199646, | |
| "learning_rate": 0.0004704859377006607, | |
| "loss": 5.3595, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 1.5981589209230966, | |
| "grad_norm": 2.175218343734741, | |
| "learning_rate": 0.00046959411252693313, | |
| "loss": 5.3599, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.6008225191246352, | |
| "grad_norm": 1.9432867765426636, | |
| "learning_rate": 0.00046870228735320556, | |
| "loss": 5.3579, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 1.6034861173261736, | |
| "grad_norm": 2.0046420097351074, | |
| "learning_rate": 0.00046781046217947805, | |
| "loss": 5.3506, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.606149715527712, | |
| "grad_norm": 1.9781187772750854, | |
| "learning_rate": 0.00046692042065609796, | |
| "loss": 5.3585, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 1.6088133137292506, | |
| "grad_norm": 2.0884523391723633, | |
| "learning_rate": 0.0004660285954823704, | |
| "loss": 5.36, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.611476911930789, | |
| "grad_norm": 2.0299806594848633, | |
| "learning_rate": 0.0004651367703086429, | |
| "loss": 5.3609, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 1.6141405101323274, | |
| "grad_norm": 2.0034475326538086, | |
| "learning_rate": 0.0004642449451349153, | |
| "loss": 5.3621, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.616804108333866, | |
| "grad_norm": 2.027804136276245, | |
| "learning_rate": 0.00046335490361153523, | |
| "loss": 5.3617, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 1.6194677065354046, | |
| "grad_norm": 2.2879958152770996, | |
| "learning_rate": 0.0004624630784378077, | |
| "loss": 5.3597, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.622131304736943, | |
| "grad_norm": 2.0821385383605957, | |
| "learning_rate": 0.00046157125326408015, | |
| "loss": 5.3539, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 1.6247949029384814, | |
| "grad_norm": 2.0150811672210693, | |
| "learning_rate": 0.00046067942809035263, | |
| "loss": 5.3568, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.62745850114002, | |
| "grad_norm": 1.944470763206482, | |
| "learning_rate": 0.0004597893865669725, | |
| "loss": 5.3618, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 1.6301220993415586, | |
| "grad_norm": 1.8767342567443848, | |
| "learning_rate": 0.000458897561393245, | |
| "loss": 5.3572, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.632785697543097, | |
| "grad_norm": 2.100074291229248, | |
| "learning_rate": 0.0004580057362195174, | |
| "loss": 5.3557, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 1.6354492957446354, | |
| "grad_norm": 1.8953720331192017, | |
| "learning_rate": 0.00045711569469613733, | |
| "loss": 5.3603, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.638112893946174, | |
| "grad_norm": 2.099968433380127, | |
| "learning_rate": 0.0004562238695224098, | |
| "loss": 5.3459, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 1.6407764921477126, | |
| "grad_norm": 2.21608567237854, | |
| "learning_rate": 0.00045533204434868225, | |
| "loss": 5.3602, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.643440090349251, | |
| "grad_norm": 2.0884177684783936, | |
| "learning_rate": 0.0004544402191749547, | |
| "loss": 5.3538, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 1.6461036885507894, | |
| "grad_norm": 2.0560896396636963, | |
| "learning_rate": 0.00045354839400122717, | |
| "loss": 5.3618, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.648767286752328, | |
| "grad_norm": 2.3166544437408447, | |
| "learning_rate": 0.0004526565688274996, | |
| "loss": 5.3446, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 1.6514308849538666, | |
| "grad_norm": 1.9376626014709473, | |
| "learning_rate": 0.0004517647436537721, | |
| "loss": 5.3565, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.654094483155405, | |
| "grad_norm": 1.8356984853744507, | |
| "learning_rate": 0.0004508729184800445, | |
| "loss": 5.3585, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 1.6567580813569434, | |
| "grad_norm": 2.0316951274871826, | |
| "learning_rate": 0.00044998287695666443, | |
| "loss": 5.3615, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.659421679558482, | |
| "grad_norm": 2.1165359020233154, | |
| "learning_rate": 0.00044909283543328435, | |
| "loss": 5.357, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 1.6620852777600206, | |
| "grad_norm": 2.1769607067108154, | |
| "learning_rate": 0.0004482010102595568, | |
| "loss": 5.3567, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.664748875961559, | |
| "grad_norm": 2.0454256534576416, | |
| "learning_rate": 0.0004473091850858292, | |
| "loss": 5.3573, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 1.6674124741630973, | |
| "grad_norm": 2.1431968212127686, | |
| "learning_rate": 0.0004464173599121017, | |
| "loss": 5.3509, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.670076072364636, | |
| "grad_norm": 2.0397841930389404, | |
| "learning_rate": 0.00044552553473837413, | |
| "loss": 5.3532, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 1.6727396705661746, | |
| "grad_norm": 2.080476999282837, | |
| "learning_rate": 0.0004446337095646467, | |
| "loss": 5.3558, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.675403268767713, | |
| "grad_norm": 1.9653671979904175, | |
| "learning_rate": 0.0004437418843909191, | |
| "loss": 5.3481, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 1.6780668669692513, | |
| "grad_norm": 2.2119712829589844, | |
| "learning_rate": 0.0004428500592171916, | |
| "loss": 5.3555, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.68073046517079, | |
| "grad_norm": 1.990404486656189, | |
| "learning_rate": 0.00044196001769381145, | |
| "loss": 5.3567, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 1.6833940633723286, | |
| "grad_norm": 2.0500054359436035, | |
| "learning_rate": 0.0004410681925200839, | |
| "loss": 5.3503, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.686057661573867, | |
| "grad_norm": 2.205277919769287, | |
| "learning_rate": 0.00044017636734635637, | |
| "loss": 5.3553, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 1.6887212597754053, | |
| "grad_norm": 1.9659850597381592, | |
| "learning_rate": 0.0004392845421726288, | |
| "loss": 5.3456, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.691384857976944, | |
| "grad_norm": 2.029604196548462, | |
| "learning_rate": 0.0004383927169989013, | |
| "loss": 5.3554, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 1.6940484561784825, | |
| "grad_norm": 2.041193723678589, | |
| "learning_rate": 0.0004375008918251737, | |
| "loss": 5.3534, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.696712054380021, | |
| "grad_norm": 2.068268299102783, | |
| "learning_rate": 0.00043661085030179364, | |
| "loss": 5.3564, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 1.6993756525815593, | |
| "grad_norm": 2.0078883171081543, | |
| "learning_rate": 0.0004357190251280661, | |
| "loss": 5.3518, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.702039250783098, | |
| "grad_norm": 1.9186288118362427, | |
| "learning_rate": 0.00043482719995433856, | |
| "loss": 5.3471, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 1.7047028489846365, | |
| "grad_norm": 2.0289323329925537, | |
| "learning_rate": 0.000433935374780611, | |
| "loss": 5.3513, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.7073664471861747, | |
| "grad_norm": 1.69050133228302, | |
| "learning_rate": 0.0004330435496068835, | |
| "loss": 5.3513, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 1.7100300453877133, | |
| "grad_norm": 2.0047898292541504, | |
| "learning_rate": 0.0004321517244331559, | |
| "loss": 5.3531, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.712693643589252, | |
| "grad_norm": 2.1100831031799316, | |
| "learning_rate": 0.0004312616829097759, | |
| "loss": 5.3494, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 1.7153572417907903, | |
| "grad_norm": 2.053802013397217, | |
| "learning_rate": 0.0004303698577360483, | |
| "loss": 5.3573, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.7180208399923287, | |
| "grad_norm": 1.9370436668395996, | |
| "learning_rate": 0.00042947803256232074, | |
| "loss": 5.3457, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 1.7206844381938673, | |
| "grad_norm": 2.062244176864624, | |
| "learning_rate": 0.00042858620738859323, | |
| "loss": 5.3532, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.723348036395406, | |
| "grad_norm": 2.129863739013672, | |
| "learning_rate": 0.00042769438221486566, | |
| "loss": 5.3469, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 1.7260116345969443, | |
| "grad_norm": 2.1496474742889404, | |
| "learning_rate": 0.0004268043406914855, | |
| "loss": 5.3494, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.7286752327984827, | |
| "grad_norm": 2.0887863636016846, | |
| "learning_rate": 0.00042591251551775806, | |
| "loss": 5.3483, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 1.7313388310000213, | |
| "grad_norm": 2.4094293117523193, | |
| "learning_rate": 0.0004250206903440305, | |
| "loss": 5.3485, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.73400242920156, | |
| "grad_norm": 2.046931266784668, | |
| "learning_rate": 0.000424128865170303, | |
| "loss": 5.345, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 1.7366660274030983, | |
| "grad_norm": 2.1520516872406006, | |
| "learning_rate": 0.0004232370399965754, | |
| "loss": 5.351, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.7393296256046367, | |
| "grad_norm": 2.006589651107788, | |
| "learning_rate": 0.0004223469984731953, | |
| "loss": 5.3511, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 1.7419932238061753, | |
| "grad_norm": 1.9035310745239258, | |
| "learning_rate": 0.00042145517329946776, | |
| "loss": 5.3457, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.7446568220077139, | |
| "grad_norm": 2.0777719020843506, | |
| "learning_rate": 0.0004205633481257402, | |
| "loss": 5.3519, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 1.7473204202092523, | |
| "grad_norm": 2.2958412170410156, | |
| "learning_rate": 0.0004196715229520127, | |
| "loss": 5.3455, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.7499840184107907, | |
| "grad_norm": 2.3482723236083984, | |
| "learning_rate": 0.0004187796977782851, | |
| "loss": 5.3513, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 1.7526476166123293, | |
| "grad_norm": 2.4552931785583496, | |
| "learning_rate": 0.00041788787260455755, | |
| "loss": 5.3496, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.7553112148138679, | |
| "grad_norm": 2.0816726684570312, | |
| "learning_rate": 0.00041699604743083003, | |
| "loss": 5.3434, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 1.7579748130154063, | |
| "grad_norm": 1.869194746017456, | |
| "learning_rate": 0.00041610600590744995, | |
| "loss": 5.349, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.7606384112169446, | |
| "grad_norm": 2.020172595977783, | |
| "learning_rate": 0.00041521418073372243, | |
| "loss": 5.3489, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 1.7633020094184833, | |
| "grad_norm": 2.1260483264923096, | |
| "learning_rate": 0.00041432235555999487, | |
| "loss": 5.3523, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.7659656076200219, | |
| "grad_norm": 2.1546857357025146, | |
| "learning_rate": 0.0004134305303862673, | |
| "loss": 5.3414, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 1.7686292058215602, | |
| "grad_norm": 2.2955052852630615, | |
| "learning_rate": 0.0004125387052125398, | |
| "loss": 5.3489, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.7712928040230986, | |
| "grad_norm": 2.0505149364471436, | |
| "learning_rate": 0.0004116468800388122, | |
| "loss": 5.3543, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 1.7739564022246372, | |
| "grad_norm": 1.9976879358291626, | |
| "learning_rate": 0.0004107550548650847, | |
| "loss": 5.3455, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.7766200004261758, | |
| "grad_norm": 2.1872785091400146, | |
| "learning_rate": 0.00040986322969135714, | |
| "loss": 5.345, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 1.7792835986277142, | |
| "grad_norm": 2.025681257247925, | |
| "learning_rate": 0.00040897318816797705, | |
| "loss": 5.3559, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.7819471968292526, | |
| "grad_norm": 2.051701307296753, | |
| "learning_rate": 0.00040808136299424954, | |
| "loss": 5.3424, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 1.7846107950307912, | |
| "grad_norm": 2.161292314529419, | |
| "learning_rate": 0.00040718953782052197, | |
| "loss": 5.3418, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.7872743932323298, | |
| "grad_norm": 2.1306283473968506, | |
| "learning_rate": 0.00040629771264679446, | |
| "loss": 5.352, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 1.7899379914338682, | |
| "grad_norm": 2.1994986534118652, | |
| "learning_rate": 0.00040540767112341437, | |
| "loss": 5.348, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.7926015896354066, | |
| "grad_norm": 2.3227968215942383, | |
| "learning_rate": 0.00040451762960003423, | |
| "loss": 5.3444, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 1.7952651878369452, | |
| "grad_norm": 2.1397862434387207, | |
| "learning_rate": 0.0004036258044263067, | |
| "loss": 5.3556, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.7979287860384838, | |
| "grad_norm": 2.0676870346069336, | |
| "learning_rate": 0.00040273397925257915, | |
| "loss": 5.3471, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 1.8005923842400222, | |
| "grad_norm": 2.2523062229156494, | |
| "learning_rate": 0.0004018421540788516, | |
| "loss": 5.3431, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.8032559824415606, | |
| "grad_norm": 2.1115000247955322, | |
| "learning_rate": 0.00040095211255547155, | |
| "loss": 5.3467, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 1.8059195806430992, | |
| "grad_norm": 2.0157132148742676, | |
| "learning_rate": 0.000400060287381744, | |
| "loss": 5.3462, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.8085831788446376, | |
| "grad_norm": 2.1384365558624268, | |
| "learning_rate": 0.0003991684622080165, | |
| "loss": 5.3381, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 1.811246777046176, | |
| "grad_norm": 2.016707420349121, | |
| "learning_rate": 0.0003982766370342889, | |
| "loss": 5.3424, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.8139103752477146, | |
| "grad_norm": 1.9890104532241821, | |
| "learning_rate": 0.00039738481186056134, | |
| "loss": 5.3459, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 1.8165739734492532, | |
| "grad_norm": 1.997981309890747, | |
| "learning_rate": 0.0003964947703371813, | |
| "loss": 5.3415, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.8192375716507916, | |
| "grad_norm": 2.077340602874756, | |
| "learning_rate": 0.00039560294516345374, | |
| "loss": 5.3401, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 1.82190116985233, | |
| "grad_norm": 1.9495571851730347, | |
| "learning_rate": 0.00039471111998972617, | |
| "loss": 5.3461, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.8245647680538686, | |
| "grad_norm": 2.086167097091675, | |
| "learning_rate": 0.00039381929481599866, | |
| "loss": 5.3457, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 1.8272283662554072, | |
| "grad_norm": 1.9157156944274902, | |
| "learning_rate": 0.0003929274696422711, | |
| "loss": 5.3374, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.8298919644569456, | |
| "grad_norm": 2.2283830642700195, | |
| "learning_rate": 0.0003920356444685436, | |
| "loss": 5.3403, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 1.832555562658484, | |
| "grad_norm": 2.155780553817749, | |
| "learning_rate": 0.00039114560294516344, | |
| "loss": 5.3403, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.8352191608600226, | |
| "grad_norm": 2.0122015476226807, | |
| "learning_rate": 0.00039025377777143587, | |
| "loss": 5.3485, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 1.8378827590615612, | |
| "grad_norm": 2.1252944469451904, | |
| "learning_rate": 0.00038936195259770836, | |
| "loss": 5.3534, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.8405463572630996, | |
| "grad_norm": 2.16573166847229, | |
| "learning_rate": 0.00038847012742398084, | |
| "loss": 5.3407, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 1.843209955464638, | |
| "grad_norm": 2.043785810470581, | |
| "learning_rate": 0.0003875800859006007, | |
| "loss": 5.3441, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.8458735536661766, | |
| "grad_norm": 2.0578818321228027, | |
| "learning_rate": 0.0003866882607268732, | |
| "loss": 5.344, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 1.8485371518677152, | |
| "grad_norm": 2.344649076461792, | |
| "learning_rate": 0.0003857964355531456, | |
| "loss": 5.3401, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.8512007500692536, | |
| "grad_norm": 2.2246205806732178, | |
| "learning_rate": 0.0003849046103794181, | |
| "loss": 5.3474, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 1.853864348270792, | |
| "grad_norm": 2.3041775226593018, | |
| "learning_rate": 0.00038401278520569054, | |
| "loss": 5.3403, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.8565279464723305, | |
| "grad_norm": 2.0579144954681396, | |
| "learning_rate": 0.00038312096003196303, | |
| "loss": 5.3388, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 1.8591915446738692, | |
| "grad_norm": 2.1944098472595215, | |
| "learning_rate": 0.00038223091850858294, | |
| "loss": 5.3412, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.8618551428754075, | |
| "grad_norm": 2.0834217071533203, | |
| "learning_rate": 0.0003813390933348554, | |
| "loss": 5.3465, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 1.864518741076946, | |
| "grad_norm": 1.9777040481567383, | |
| "learning_rate": 0.00038044726816112786, | |
| "loss": 5.3394, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.8671823392784845, | |
| "grad_norm": 2.341625690460205, | |
| "learning_rate": 0.0003795554429874003, | |
| "loss": 5.3414, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 1.8698459374800231, | |
| "grad_norm": 1.9645224809646606, | |
| "learning_rate": 0.0003786636178136728, | |
| "loss": 5.3429, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.8725095356815615, | |
| "grad_norm": 2.217845916748047, | |
| "learning_rate": 0.0003777717926399452, | |
| "loss": 5.3485, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 1.8751731338831, | |
| "grad_norm": 2.2836930751800537, | |
| "learning_rate": 0.00037687996746621765, | |
| "loss": 5.3369, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.8778367320846385, | |
| "grad_norm": 2.1809890270233154, | |
| "learning_rate": 0.00037598814229249013, | |
| "loss": 5.3375, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 1.8805003302861771, | |
| "grad_norm": 2.4111125469207764, | |
| "learning_rate": 0.00037509810076911005, | |
| "loss": 5.3453, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.8831639284877155, | |
| "grad_norm": 2.264157295227051, | |
| "learning_rate": 0.0003742062755953825, | |
| "loss": 5.3412, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 1.885827526689254, | |
| "grad_norm": 2.232529878616333, | |
| "learning_rate": 0.00037331445042165497, | |
| "loss": 5.3481, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.8884911248907925, | |
| "grad_norm": 2.0301549434661865, | |
| "learning_rate": 0.00037242440889827483, | |
| "loss": 5.3351, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 1.8911547230923311, | |
| "grad_norm": 2.040621757507324, | |
| "learning_rate": 0.0003715325837245473, | |
| "loss": 5.3442, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.8938183212938695, | |
| "grad_norm": 2.085535764694214, | |
| "learning_rate": 0.0003706407585508198, | |
| "loss": 5.3302, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 1.896481919495408, | |
| "grad_norm": 2.1077394485473633, | |
| "learning_rate": 0.00036974893337709223, | |
| "loss": 5.3383, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.8991455176969465, | |
| "grad_norm": 2.242241621017456, | |
| "learning_rate": 0.0003688571082033647, | |
| "loss": 5.3315, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 1.901809115898485, | |
| "grad_norm": 2.2890877723693848, | |
| "learning_rate": 0.00036796528302963715, | |
| "loss": 5.3378, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.9044727141000233, | |
| "grad_norm": 2.3517234325408936, | |
| "learning_rate": 0.000367075241506257, | |
| "loss": 5.3369, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 1.9071363123015619, | |
| "grad_norm": 2.3767483234405518, | |
| "learning_rate": 0.0003661834163325295, | |
| "loss": 5.3365, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.9097999105031005, | |
| "grad_norm": 2.2238335609436035, | |
| "learning_rate": 0.00036529159115880193, | |
| "loss": 5.3353, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 1.9124635087046389, | |
| "grad_norm": 2.0594356060028076, | |
| "learning_rate": 0.0003643997659850744, | |
| "loss": 5.3346, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.9151271069061773, | |
| "grad_norm": 2.1106550693511963, | |
| "learning_rate": 0.00036350794081134685, | |
| "loss": 5.3317, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 1.9177907051077159, | |
| "grad_norm": 2.0819623470306396, | |
| "learning_rate": 0.00036261611563761934, | |
| "loss": 5.332, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.9204543033092545, | |
| "grad_norm": 1.9421486854553223, | |
| "learning_rate": 0.00036172607411423925, | |
| "loss": 5.3425, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 1.9231179015107929, | |
| "grad_norm": 2.304370641708374, | |
| "learning_rate": 0.0003608342489405117, | |
| "loss": 5.3278, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.9257814997123313, | |
| "grad_norm": 1.9409058094024658, | |
| "learning_rate": 0.00035994242376678417, | |
| "loss": 5.3364, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 1.9284450979138699, | |
| "grad_norm": 2.199068307876587, | |
| "learning_rate": 0.0003590505985930566, | |
| "loss": 5.3375, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.9311086961154085, | |
| "grad_norm": 2.4809699058532715, | |
| "learning_rate": 0.0003581587734193291, | |
| "loss": 5.3304, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 1.9337722943169469, | |
| "grad_norm": 1.8762375116348267, | |
| "learning_rate": 0.000357268731895949, | |
| "loss": 5.3396, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.9364358925184852, | |
| "grad_norm": 2.14876651763916, | |
| "learning_rate": 0.00035637690672222144, | |
| "loss": 5.3295, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 1.9390994907200239, | |
| "grad_norm": 2.0710737705230713, | |
| "learning_rate": 0.0003554850815484939, | |
| "loss": 5.3319, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.9417630889215625, | |
| "grad_norm": 2.1879022121429443, | |
| "learning_rate": 0.00035459325637476636, | |
| "loss": 5.3353, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 1.9444266871231008, | |
| "grad_norm": 2.2101471424102783, | |
| "learning_rate": 0.0003537014312010388, | |
| "loss": 5.3365, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.9470902853246392, | |
| "grad_norm": 2.1538619995117188, | |
| "learning_rate": 0.0003528113896776587, | |
| "loss": 5.3345, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 1.9497538835261778, | |
| "grad_norm": 2.3958141803741455, | |
| "learning_rate": 0.0003519195645039312, | |
| "loss": 5.3298, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.9524174817277165, | |
| "grad_norm": 2.2059667110443115, | |
| "learning_rate": 0.0003510277393302037, | |
| "loss": 5.3228, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 1.9550810799292548, | |
| "grad_norm": 2.0048577785491943, | |
| "learning_rate": 0.0003501359141564761, | |
| "loss": 5.3336, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.9577446781307932, | |
| "grad_norm": 2.0165789127349854, | |
| "learning_rate": 0.00034924408898274854, | |
| "loss": 5.3342, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.9604082763323318, | |
| "grad_norm": 2.2053885459899902, | |
| "learning_rate": 0.00034835226380902103, | |
| "loss": 5.3359, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.9630718745338704, | |
| "grad_norm": 2.316288948059082, | |
| "learning_rate": 0.0003474622222856409, | |
| "loss": 5.3344, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.9657354727354088, | |
| "grad_norm": 2.385871410369873, | |
| "learning_rate": 0.0003465703971119133, | |
| "loss": 5.3364, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.9683990709369472, | |
| "grad_norm": 2.3206396102905273, | |
| "learning_rate": 0.0003456785719381858, | |
| "loss": 5.3309, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.9710626691384858, | |
| "grad_norm": 2.172229766845703, | |
| "learning_rate": 0.00034478674676445824, | |
| "loss": 5.3338, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.9737262673400244, | |
| "grad_norm": 2.3812954425811768, | |
| "learning_rate": 0.0003438967052410782, | |
| "loss": 5.3306, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.9763898655415628, | |
| "grad_norm": 2.1423757076263428, | |
| "learning_rate": 0.00034300488006735064, | |
| "loss": 5.3406, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.9790534637431012, | |
| "grad_norm": 2.2044973373413086, | |
| "learning_rate": 0.0003421130548936231, | |
| "loss": 5.3371, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.9817170619446398, | |
| "grad_norm": 1.944014549255371, | |
| "learning_rate": 0.00034122122971989556, | |
| "loss": 5.3348, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.9843806601461784, | |
| "grad_norm": 2.3091371059417725, | |
| "learning_rate": 0.000340329404546168, | |
| "loss": 5.3283, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.9870442583477168, | |
| "grad_norm": 2.600417137145996, | |
| "learning_rate": 0.0003394375793724405, | |
| "loss": 5.3292, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.9897078565492552, | |
| "grad_norm": 2.0236728191375732, | |
| "learning_rate": 0.0003385457541987129, | |
| "loss": 5.3353, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.9923714547507938, | |
| "grad_norm": 2.298342227935791, | |
| "learning_rate": 0.00033765392902498535, | |
| "loss": 5.3355, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.9950350529523324, | |
| "grad_norm": 1.945620059967041, | |
| "learning_rate": 0.0003367638875016053, | |
| "loss": 5.3302, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.9976986511538706, | |
| "grad_norm": 2.1642651557922363, | |
| "learning_rate": 0.0003358738459782252, | |
| "loss": 5.3259, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.000362249355409, | |
| "grad_norm": 2.149771213531494, | |
| "learning_rate": 0.0003349820208044976, | |
| "loss": 5.3347, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 2.003025847556948, | |
| "grad_norm": 2.2164316177368164, | |
| "learning_rate": 0.0003340901956307701, | |
| "loss": 5.3308, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 2.0056894457584864, | |
| "grad_norm": 2.2055323123931885, | |
| "learning_rate": 0.0003331983704570426, | |
| "loss": 5.332, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 2.0083530439600246, | |
| "grad_norm": 2.1814560890197754, | |
| "learning_rate": 0.00033230654528331507, | |
| "loss": 5.3239, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 2.011016642161563, | |
| "grad_norm": 2.1237363815307617, | |
| "learning_rate": 0.0003314147201095875, | |
| "loss": 5.3364, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 2.013680240363102, | |
| "grad_norm": 2.1073851585388184, | |
| "learning_rate": 0.00033052467858620736, | |
| "loss": 5.3209, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 2.0163438385646404, | |
| "grad_norm": 1.9759477376937866, | |
| "learning_rate": 0.00032963285341247985, | |
| "loss": 5.3272, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 2.0190074367661786, | |
| "grad_norm": 2.100966691970825, | |
| "learning_rate": 0.0003287410282387523, | |
| "loss": 5.3226, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 2.021671034967717, | |
| "grad_norm": 2.141537666320801, | |
| "learning_rate": 0.00032784920306502477, | |
| "loss": 5.3305, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 2.0243346331692558, | |
| "grad_norm": 2.2714550495147705, | |
| "learning_rate": 0.0003269573778912972, | |
| "loss": 5.3335, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.0269982313707944, | |
| "grad_norm": 2.1945018768310547, | |
| "learning_rate": 0.0003260673363679171, | |
| "loss": 5.3267, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 2.0296618295723325, | |
| "grad_norm": 2.269015312194824, | |
| "learning_rate": 0.0003251755111941896, | |
| "loss": 5.3346, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 2.032325427773871, | |
| "grad_norm": 2.194460391998291, | |
| "learning_rate": 0.00032428368602046203, | |
| "loss": 5.3216, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 2.0349890259754098, | |
| "grad_norm": 2.1248984336853027, | |
| "learning_rate": 0.0003233918608467345, | |
| "loss": 5.3294, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 2.0376526241769484, | |
| "grad_norm": 2.213801622390747, | |
| "learning_rate": 0.00032250003567300695, | |
| "loss": 5.3282, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 2.0403162223784865, | |
| "grad_norm": 2.0801334381103516, | |
| "learning_rate": 0.0003216082104992794, | |
| "loss": 5.3293, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 2.042979820580025, | |
| "grad_norm": 2.191882371902466, | |
| "learning_rate": 0.00032071816897589935, | |
| "loss": 5.3297, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 2.0456434187815637, | |
| "grad_norm": 2.238471031188965, | |
| "learning_rate": 0.0003198263438021718, | |
| "loss": 5.3274, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 2.0483070169831024, | |
| "grad_norm": 2.0454585552215576, | |
| "learning_rate": 0.0003189345186284443, | |
| "loss": 5.3335, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 2.0509706151846405, | |
| "grad_norm": 2.449857473373413, | |
| "learning_rate": 0.0003180426934547167, | |
| "loss": 5.3243, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.053634213386179, | |
| "grad_norm": 2.182969331741333, | |
| "learning_rate": 0.00031715265193133657, | |
| "loss": 5.3239, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 2.0562978115877177, | |
| "grad_norm": 2.3800108432769775, | |
| "learning_rate": 0.00031626082675760905, | |
| "loss": 5.3263, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 2.058961409789256, | |
| "grad_norm": 2.4917428493499756, | |
| "learning_rate": 0.0003153690015838815, | |
| "loss": 5.3252, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 2.0616250079907945, | |
| "grad_norm": 2.25253963470459, | |
| "learning_rate": 0.00031447717641015397, | |
| "loss": 5.3323, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 2.064288606192333, | |
| "grad_norm": 2.1959807872772217, | |
| "learning_rate": 0.00031358535123642646, | |
| "loss": 5.3257, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 2.0669522043938717, | |
| "grad_norm": 2.202449321746826, | |
| "learning_rate": 0.0003126935260626989, | |
| "loss": 5.3256, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 2.06961580259541, | |
| "grad_norm": 2.093303918838501, | |
| "learning_rate": 0.0003118017008889714, | |
| "loss": 5.3302, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 2.0722794007969485, | |
| "grad_norm": 2.139282464981079, | |
| "learning_rate": 0.0003109098757152438, | |
| "loss": 5.3298, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 2.074942998998487, | |
| "grad_norm": 2.004852533340454, | |
| "learning_rate": 0.00031001983419186367, | |
| "loss": 5.3329, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 2.0776065972000257, | |
| "grad_norm": 2.385274648666382, | |
| "learning_rate": 0.00030912800901813616, | |
| "loss": 5.3266, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.080270195401564, | |
| "grad_norm": 2.218735456466675, | |
| "learning_rate": 0.0003082361838444086, | |
| "loss": 5.329, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 2.0829337936031025, | |
| "grad_norm": 2.271380662918091, | |
| "learning_rate": 0.0003073443586706811, | |
| "loss": 5.3239, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 2.085597391804641, | |
| "grad_norm": 2.526583433151245, | |
| "learning_rate": 0.000306454317147301, | |
| "loss": 5.3287, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 2.0882609900061797, | |
| "grad_norm": 2.1075544357299805, | |
| "learning_rate": 0.0003055624919735734, | |
| "loss": 5.3264, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 2.090924588207718, | |
| "grad_norm": 2.0297112464904785, | |
| "learning_rate": 0.0003046706667998459, | |
| "loss": 5.3279, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 2.0935881864092565, | |
| "grad_norm": 2.0166475772857666, | |
| "learning_rate": 0.00030377884162611834, | |
| "loss": 5.3279, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 2.096251784610795, | |
| "grad_norm": 2.398573398590088, | |
| "learning_rate": 0.00030288880010273826, | |
| "loss": 5.325, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 2.0989153828123337, | |
| "grad_norm": 2.2096564769744873, | |
| "learning_rate": 0.00030199697492901075, | |
| "loss": 5.3241, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 2.101578981013872, | |
| "grad_norm": 2.2474560737609863, | |
| "learning_rate": 0.0003011051497552832, | |
| "loss": 5.3232, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 2.1042425792154105, | |
| "grad_norm": 2.2487635612487793, | |
| "learning_rate": 0.00030021332458155566, | |
| "loss": 5.3191, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.106906177416949, | |
| "grad_norm": 2.094921112060547, | |
| "learning_rate": 0.0002993214994078281, | |
| "loss": 5.3354, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 2.1095697756184877, | |
| "grad_norm": 2.2288858890533447, | |
| "learning_rate": 0.00029843145788444796, | |
| "loss": 5.3254, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 2.112233373820026, | |
| "grad_norm": 2.166731595993042, | |
| "learning_rate": 0.00029753963271072044, | |
| "loss": 5.3239, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 2.1148969720215645, | |
| "grad_norm": 2.05653715133667, | |
| "learning_rate": 0.00029664780753699293, | |
| "loss": 5.3305, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 2.117560570223103, | |
| "grad_norm": 2.08963942527771, | |
| "learning_rate": 0.0002957559823632654, | |
| "loss": 5.3255, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 2.1202241684246417, | |
| "grad_norm": 2.268559217453003, | |
| "learning_rate": 0.0002948659408398853, | |
| "loss": 5.3238, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 2.12288776662618, | |
| "grad_norm": 2.9195141792297363, | |
| "learning_rate": 0.0002939741156661577, | |
| "loss": 5.3211, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 2.1255513648277184, | |
| "grad_norm": 2.2552900314331055, | |
| "learning_rate": 0.0002930822904924302, | |
| "loss": 5.3251, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 2.128214963029257, | |
| "grad_norm": 2.294832706451416, | |
| "learning_rate": 0.00029219046531870263, | |
| "loss": 5.32, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 2.1308785612307957, | |
| "grad_norm": 2.3486320972442627, | |
| "learning_rate": 0.0002912986401449751, | |
| "loss": 5.3197, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.133542159432334, | |
| "grad_norm": 2.497387647628784, | |
| "learning_rate": 0.00029040681497124755, | |
| "loss": 5.3235, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 2.1362057576338724, | |
| "grad_norm": 2.3829433917999268, | |
| "learning_rate": 0.00028951498979752, | |
| "loss": 5.3145, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 2.138869355835411, | |
| "grad_norm": 2.064811944961548, | |
| "learning_rate": 0.00028862316462379247, | |
| "loss": 5.3168, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 2.1415329540369497, | |
| "grad_norm": 2.194028377532959, | |
| "learning_rate": 0.0002877331231004124, | |
| "loss": 5.3221, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 2.144196552238488, | |
| "grad_norm": 2.1182937622070312, | |
| "learning_rate": 0.0002868412979266848, | |
| "loss": 5.321, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 2.1468601504400264, | |
| "grad_norm": 2.3992223739624023, | |
| "learning_rate": 0.0002859494727529573, | |
| "loss": 5.3237, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 2.149523748641565, | |
| "grad_norm": 2.256955623626709, | |
| "learning_rate": 0.00028505764757922973, | |
| "loss": 5.3144, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 2.152187346843103, | |
| "grad_norm": 2.3727059364318848, | |
| "learning_rate": 0.0002841658224055022, | |
| "loss": 5.3238, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.154850945044642, | |
| "grad_norm": 2.1184160709381104, | |
| "learning_rate": 0.00028327399723177465, | |
| "loss": 5.3196, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 2.1575145432461804, | |
| "grad_norm": 2.1502108573913574, | |
| "learning_rate": 0.00028238217205804714, | |
| "loss": 5.3141, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.160178141447719, | |
| "grad_norm": 2.176964521408081, | |
| "learning_rate": 0.00028149034688431957, | |
| "loss": 5.3187, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 2.162841739649257, | |
| "grad_norm": 2.144890069961548, | |
| "learning_rate": 0.0002806003053609395, | |
| "loss": 5.3199, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.165505337850796, | |
| "grad_norm": 2.17976975440979, | |
| "learning_rate": 0.000279708480187212, | |
| "loss": 5.318, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 2.1681689360523344, | |
| "grad_norm": 2.181568145751953, | |
| "learning_rate": 0.0002788166550134844, | |
| "loss": 5.3214, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.170832534253873, | |
| "grad_norm": 2.299090623855591, | |
| "learning_rate": 0.0002779248298397569, | |
| "loss": 5.3225, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 2.173496132455411, | |
| "grad_norm": 2.189419746398926, | |
| "learning_rate": 0.0002770347883163768, | |
| "loss": 5.3193, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.17615973065695, | |
| "grad_norm": 2.274648904800415, | |
| "learning_rate": 0.00027614296314264924, | |
| "loss": 5.3218, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 2.1788233288584884, | |
| "grad_norm": 2.1534972190856934, | |
| "learning_rate": 0.0002752511379689217, | |
| "loss": 5.3173, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.181486927060027, | |
| "grad_norm": 2.3284084796905518, | |
| "learning_rate": 0.00027435931279519416, | |
| "loss": 5.3126, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 2.184150525261565, | |
| "grad_norm": 2.286384344100952, | |
| "learning_rate": 0.0002734674876214666, | |
| "loss": 5.3232, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.1868141234631038, | |
| "grad_norm": 2.111091375350952, | |
| "learning_rate": 0.0002725774460980865, | |
| "loss": 5.3163, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 2.1894777216646424, | |
| "grad_norm": 2.361741304397583, | |
| "learning_rate": 0.00027168562092435894, | |
| "loss": 5.3212, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.192141319866181, | |
| "grad_norm": 2.497840642929077, | |
| "learning_rate": 0.0002707937957506314, | |
| "loss": 5.3238, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 2.194804918067719, | |
| "grad_norm": 2.227203607559204, | |
| "learning_rate": 0.00026990197057690386, | |
| "loss": 5.323, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.1974685162692578, | |
| "grad_norm": 2.2768001556396484, | |
| "learning_rate": 0.00026901192905352377, | |
| "loss": 5.3182, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 2.2001321144707964, | |
| "grad_norm": 2.157787799835205, | |
| "learning_rate": 0.00026812010387979626, | |
| "loss": 5.3246, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.202795712672335, | |
| "grad_norm": 2.3759965896606445, | |
| "learning_rate": 0.0002672282787060687, | |
| "loss": 5.3207, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 2.205459310873873, | |
| "grad_norm": 2.210963487625122, | |
| "learning_rate": 0.0002663364535323411, | |
| "loss": 5.3155, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.2081229090754118, | |
| "grad_norm": 2.265197277069092, | |
| "learning_rate": 0.0002654446283586136, | |
| "loss": 5.3194, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 2.2107865072769504, | |
| "grad_norm": 2.110173225402832, | |
| "learning_rate": 0.0002645545868352335, | |
| "loss": 5.3144, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.213450105478489, | |
| "grad_norm": 2.235196590423584, | |
| "learning_rate": 0.000263662761661506, | |
| "loss": 5.323, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 2.216113703680027, | |
| "grad_norm": 2.305601119995117, | |
| "learning_rate": 0.00026277093648777844, | |
| "loss": 5.3187, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.2187773018815657, | |
| "grad_norm": 2.401959180831909, | |
| "learning_rate": 0.0002618791113140509, | |
| "loss": 5.3175, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 2.2214409000831044, | |
| "grad_norm": 2.163121223449707, | |
| "learning_rate": 0.0002609890697906708, | |
| "loss": 5.3169, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.224104498284643, | |
| "grad_norm": 2.265998363494873, | |
| "learning_rate": 0.0002600972446169432, | |
| "loss": 5.3173, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 2.226768096486181, | |
| "grad_norm": 2.236154317855835, | |
| "learning_rate": 0.00025920541944321577, | |
| "loss": 5.3167, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.2294316946877197, | |
| "grad_norm": 2.1707651615142822, | |
| "learning_rate": 0.0002583135942694882, | |
| "loss": 5.3184, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 2.2320952928892583, | |
| "grad_norm": 2.121073007583618, | |
| "learning_rate": 0.00025742355274610806, | |
| "loss": 5.3171, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.234758891090797, | |
| "grad_norm": 2.2292840480804443, | |
| "learning_rate": 0.00025653172757238055, | |
| "loss": 5.3185, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 2.237422489292335, | |
| "grad_norm": 2.2376914024353027, | |
| "learning_rate": 0.000255639902398653, | |
| "loss": 5.3143, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.2400860874938737, | |
| "grad_norm": 2.2844974994659424, | |
| "learning_rate": 0.0002547480772249254, | |
| "loss": 5.3039, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 2.2427496856954123, | |
| "grad_norm": 2.278136968612671, | |
| "learning_rate": 0.0002538562520511979, | |
| "loss": 5.3159, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.2454132838969505, | |
| "grad_norm": 2.3182220458984375, | |
| "learning_rate": 0.00025296442687747033, | |
| "loss": 5.319, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 2.248076882098489, | |
| "grad_norm": 2.5095927715301514, | |
| "learning_rate": 0.0002520743853540903, | |
| "loss": 5.3174, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.2507404803000277, | |
| "grad_norm": 2.3167264461517334, | |
| "learning_rate": 0.00025118256018036273, | |
| "loss": 5.3131, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 2.2534040785015663, | |
| "grad_norm": 2.211766481399536, | |
| "learning_rate": 0.00025029073500663516, | |
| "loss": 5.325, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.256067676703105, | |
| "grad_norm": 2.1502010822296143, | |
| "learning_rate": 0.00024939890983290765, | |
| "loss": 5.3139, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 2.258731274904643, | |
| "grad_norm": 2.1429567337036133, | |
| "learning_rate": 0.00024850886830952756, | |
| "loss": 5.3147, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.2613948731061817, | |
| "grad_norm": 2.272367238998413, | |
| "learning_rate": 0.0002476170431358, | |
| "loss": 5.3128, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 2.2640584713077203, | |
| "grad_norm": 2.6372079849243164, | |
| "learning_rate": 0.00024672521796207243, | |
| "loss": 5.3134, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.2667220695092585, | |
| "grad_norm": 2.4213263988494873, | |
| "learning_rate": 0.0002458333927883449, | |
| "loss": 5.3142, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 2.269385667710797, | |
| "grad_norm": 2.2919113636016846, | |
| "learning_rate": 0.0002449415676146174, | |
| "loss": 5.3199, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.2720492659123357, | |
| "grad_norm": 2.1887030601501465, | |
| "learning_rate": 0.0002440515260912373, | |
| "loss": 5.3168, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 2.2747128641138743, | |
| "grad_norm": 2.2401158809661865, | |
| "learning_rate": 0.00024315970091750975, | |
| "loss": 5.3142, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.2773764623154125, | |
| "grad_norm": 2.264155864715576, | |
| "learning_rate": 0.0002422678757437822, | |
| "loss": 5.3063, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 2.280040060516951, | |
| "grad_norm": 2.372823476791382, | |
| "learning_rate": 0.00024137605057005467, | |
| "loss": 5.3146, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.2827036587184897, | |
| "grad_norm": 2.5441572666168213, | |
| "learning_rate": 0.00024048600904667456, | |
| "loss": 5.3129, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 2.2853672569200283, | |
| "grad_norm": 2.107741594314575, | |
| "learning_rate": 0.00023959418387294702, | |
| "loss": 5.3112, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.2880308551215665, | |
| "grad_norm": 2.1812095642089844, | |
| "learning_rate": 0.00023870235869921948, | |
| "loss": 5.3144, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 2.290694453323105, | |
| "grad_norm": 2.3959500789642334, | |
| "learning_rate": 0.00023781053352549194, | |
| "loss": 5.3108, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.2933580515246437, | |
| "grad_norm": 2.3315865993499756, | |
| "learning_rate": 0.00023691870835176437, | |
| "loss": 5.3093, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 2.2960216497261823, | |
| "grad_norm": 2.0199296474456787, | |
| "learning_rate": 0.00023602688317803685, | |
| "loss": 5.3209, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.2986852479277204, | |
| "grad_norm": 2.2393200397491455, | |
| "learning_rate": 0.00023513684165465677, | |
| "loss": 5.3074, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 2.301348846129259, | |
| "grad_norm": 2.4474637508392334, | |
| "learning_rate": 0.00023424501648092923, | |
| "loss": 5.3158, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.3040124443307977, | |
| "grad_norm": 2.3248863220214844, | |
| "learning_rate": 0.00023335319130720166, | |
| "loss": 5.3157, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 2.3066760425323363, | |
| "grad_norm": 2.4158935546875, | |
| "learning_rate": 0.00023246136613347412, | |
| "loss": 5.3092, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.3093396407338744, | |
| "grad_norm": 2.084850549697876, | |
| "learning_rate": 0.00023156954095974658, | |
| "loss": 5.3178, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 2.312003238935413, | |
| "grad_norm": 2.319776773452759, | |
| "learning_rate": 0.0002306794994363665, | |
| "loss": 5.3074, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.3146668371369516, | |
| "grad_norm": 2.2137837409973145, | |
| "learning_rate": 0.00022978767426263893, | |
| "loss": 5.3073, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 2.31733043533849, | |
| "grad_norm": 2.4062960147857666, | |
| "learning_rate": 0.0002288958490889114, | |
| "loss": 5.3112, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.3199940335400284, | |
| "grad_norm": 2.27229380607605, | |
| "learning_rate": 0.00022800402391518385, | |
| "loss": 5.3114, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 2.322657631741567, | |
| "grad_norm": 2.499032974243164, | |
| "learning_rate": 0.00022711219874145633, | |
| "loss": 5.3132, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.3253212299431056, | |
| "grad_norm": 2.071829080581665, | |
| "learning_rate": 0.00022622215721807625, | |
| "loss": 5.3181, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 2.3279848281446442, | |
| "grad_norm": 2.4178686141967773, | |
| "learning_rate": 0.00022533033204434868, | |
| "loss": 5.3079, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.3306484263461824, | |
| "grad_norm": 2.431913375854492, | |
| "learning_rate": 0.00022443850687062114, | |
| "loss": 5.311, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 2.333312024547721, | |
| "grad_norm": 2.3519508838653564, | |
| "learning_rate": 0.0002235466816968936, | |
| "loss": 5.3149, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.3359756227492596, | |
| "grad_norm": 2.286878824234009, | |
| "learning_rate": 0.00022265485652316606, | |
| "loss": 5.312, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 2.338639220950798, | |
| "grad_norm": 2.3200433254241943, | |
| "learning_rate": 0.00022176481499978595, | |
| "loss": 5.2989, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.3413028191523364, | |
| "grad_norm": 2.165735960006714, | |
| "learning_rate": 0.0002208729898260584, | |
| "loss": 5.3169, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 2.343966417353875, | |
| "grad_norm": 2.0269339084625244, | |
| "learning_rate": 0.00021998116465233087, | |
| "loss": 5.3088, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.3466300155554136, | |
| "grad_norm": 2.2074029445648193, | |
| "learning_rate": 0.00021908933947860333, | |
| "loss": 5.3096, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 2.3492936137569522, | |
| "grad_norm": 2.7109835147857666, | |
| "learning_rate": 0.0002181975143048758, | |
| "loss": 5.3089, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.3519572119584904, | |
| "grad_norm": 2.2240071296691895, | |
| "learning_rate": 0.0002173074727814957, | |
| "loss": 5.3148, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 2.354620810160029, | |
| "grad_norm": 2.26788330078125, | |
| "learning_rate": 0.00021641564760776816, | |
| "loss": 5.3113, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.3572844083615676, | |
| "grad_norm": 2.389122486114502, | |
| "learning_rate": 0.00021552382243404062, | |
| "loss": 5.3133, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 2.3599480065631058, | |
| "grad_norm": 2.382267475128174, | |
| "learning_rate": 0.00021463199726031308, | |
| "loss": 5.3129, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.3626116047646444, | |
| "grad_norm": 2.411574363708496, | |
| "learning_rate": 0.00021374195573693297, | |
| "loss": 5.3022, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 2.365275202966183, | |
| "grad_norm": 2.348522424697876, | |
| "learning_rate": 0.00021285013056320543, | |
| "loss": 5.3137, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.3679388011677216, | |
| "grad_norm": 2.3230319023132324, | |
| "learning_rate": 0.00021195830538947789, | |
| "loss": 5.3059, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 2.3706023993692598, | |
| "grad_norm": 2.2816174030303955, | |
| "learning_rate": 0.00021106648021575035, | |
| "loss": 5.3117, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.3732659975707984, | |
| "grad_norm": 2.400097370147705, | |
| "learning_rate": 0.0002101746550420228, | |
| "loss": 5.3095, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 2.375929595772337, | |
| "grad_norm": 2.470815896987915, | |
| "learning_rate": 0.00020928461351864272, | |
| "loss": 5.3027, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.3785931939738756, | |
| "grad_norm": 2.1947262287139893, | |
| "learning_rate": 0.00020839278834491518, | |
| "loss": 5.3031, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 2.3812567921754138, | |
| "grad_norm": 2.3549935817718506, | |
| "learning_rate": 0.00020750096317118764, | |
| "loss": 5.3083, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.3839203903769524, | |
| "grad_norm": 2.457932949066162, | |
| "learning_rate": 0.0002066091379974601, | |
| "loss": 5.3052, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 2.386583988578491, | |
| "grad_norm": 2.2867889404296875, | |
| "learning_rate": 0.00020571909647407999, | |
| "loss": 5.3155, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.3892475867800296, | |
| "grad_norm": 2.061497688293457, | |
| "learning_rate": 0.00020482727130035245, | |
| "loss": 5.3087, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 2.3919111849815677, | |
| "grad_norm": 2.2757697105407715, | |
| "learning_rate": 0.0002039354461266249, | |
| "loss": 5.3095, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.3945747831831063, | |
| "grad_norm": 2.4835853576660156, | |
| "learning_rate": 0.00020304362095289736, | |
| "loss": 5.3091, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 2.397238381384645, | |
| "grad_norm": 2.2896037101745605, | |
| "learning_rate": 0.00020215357942951728, | |
| "loss": 5.3124, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.3999019795861836, | |
| "grad_norm": 2.31545090675354, | |
| "learning_rate": 0.00020126175425578974, | |
| "loss": 5.31, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 2.4025655777877217, | |
| "grad_norm": 2.296827554702759, | |
| "learning_rate": 0.0002003699290820622, | |
| "loss": 5.3027, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.4052291759892603, | |
| "grad_norm": 2.60396671295166, | |
| "learning_rate": 0.00019947810390833466, | |
| "loss": 5.312, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 2.407892774190799, | |
| "grad_norm": 2.500142812728882, | |
| "learning_rate": 0.00019858627873460712, | |
| "loss": 5.2995, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.4105563723923376, | |
| "grad_norm": 2.179241180419922, | |
| "learning_rate": 0.000197696237211227, | |
| "loss": 5.3034, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 2.4132199705938757, | |
| "grad_norm": 2.5400588512420654, | |
| "learning_rate": 0.00019680441203749947, | |
| "loss": 5.3074, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.4158835687954143, | |
| "grad_norm": 2.4482738971710205, | |
| "learning_rate": 0.00019591258686377192, | |
| "loss": 5.301, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 2.418547166996953, | |
| "grad_norm": 2.3452165126800537, | |
| "learning_rate": 0.00019502076169004438, | |
| "loss": 5.311, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.4212107651984915, | |
| "grad_norm": 2.1771457195281982, | |
| "learning_rate": 0.0001941307201666643, | |
| "loss": 5.3035, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 2.4238743634000297, | |
| "grad_norm": 2.195034980773926, | |
| "learning_rate": 0.00019323889499293676, | |
| "loss": 5.3069, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.4265379616015683, | |
| "grad_norm": 2.3099453449249268, | |
| "learning_rate": 0.00019234706981920922, | |
| "loss": 5.3075, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 2.429201559803107, | |
| "grad_norm": 2.5112428665161133, | |
| "learning_rate": 0.00019145524464548168, | |
| "loss": 5.3093, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.431865158004645, | |
| "grad_norm": 2.470879316329956, | |
| "learning_rate": 0.00019056520312210157, | |
| "loss": 5.3021, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 2.4345287562061837, | |
| "grad_norm": 2.381201982498169, | |
| "learning_rate": 0.00018967337794837403, | |
| "loss": 5.304, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.4371923544077223, | |
| "grad_norm": 2.30584454536438, | |
| "learning_rate": 0.00018878155277464648, | |
| "loss": 5.3063, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 2.439855952609261, | |
| "grad_norm": 2.1264095306396484, | |
| "learning_rate": 0.00018788972760091894, | |
| "loss": 5.303, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.4425195508107995, | |
| "grad_norm": 2.5097908973693848, | |
| "learning_rate": 0.0001869979024271914, | |
| "loss": 5.3028, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 2.4451831490123377, | |
| "grad_norm": 2.1753334999084473, | |
| "learning_rate": 0.00018610786090381132, | |
| "loss": 5.303, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.4478467472138763, | |
| "grad_norm": 2.393508195877075, | |
| "learning_rate": 0.00018521603573008378, | |
| "loss": 5.3065, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 2.450510345415415, | |
| "grad_norm": 2.4845023155212402, | |
| "learning_rate": 0.00018432421055635624, | |
| "loss": 5.3055, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.453173943616953, | |
| "grad_norm": 2.286433458328247, | |
| "learning_rate": 0.0001834323853826287, | |
| "loss": 5.3093, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 2.4558375418184917, | |
| "grad_norm": 2.3205184936523438, | |
| "learning_rate": 0.00018254056020890113, | |
| "loss": 5.3046, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.4585011400200303, | |
| "grad_norm": 2.2458608150482178, | |
| "learning_rate": 0.00018165051868552104, | |
| "loss": 5.3034, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 2.461164738221569, | |
| "grad_norm": 2.4838719367980957, | |
| "learning_rate": 0.0001807586935117935, | |
| "loss": 5.3067, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.463828336423107, | |
| "grad_norm": 2.363417148590088, | |
| "learning_rate": 0.00017986686833806596, | |
| "loss": 5.3075, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 2.4664919346246457, | |
| "grad_norm": 2.1464176177978516, | |
| "learning_rate": 0.0001789750431643384, | |
| "loss": 5.2936, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.4691555328261843, | |
| "grad_norm": 2.1444778442382812, | |
| "learning_rate": 0.00017808321799061086, | |
| "loss": 5.3012, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 2.471819131027723, | |
| "grad_norm": 2.1136202812194824, | |
| "learning_rate": 0.0001771931764672308, | |
| "loss": 5.2991, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.474482729229261, | |
| "grad_norm": 2.325840950012207, | |
| "learning_rate": 0.00017630135129350326, | |
| "loss": 5.3005, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 2.4771463274307997, | |
| "grad_norm": 2.1854569911956787, | |
| "learning_rate": 0.00017540952611977572, | |
| "loss": 5.3041, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.4798099256323383, | |
| "grad_norm": 2.247187614440918, | |
| "learning_rate": 0.00017451770094604815, | |
| "loss": 5.3038, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 2.482473523833877, | |
| "grad_norm": 2.3324661254882812, | |
| "learning_rate": 0.0001736258757723206, | |
| "loss": 5.2999, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.485137122035415, | |
| "grad_norm": 2.3304693698883057, | |
| "learning_rate": 0.00017273583424894052, | |
| "loss": 5.3022, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 2.4878007202369536, | |
| "grad_norm": 2.5459063053131104, | |
| "learning_rate": 0.00017184400907521298, | |
| "loss": 5.3082, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.4904643184384923, | |
| "grad_norm": 2.280992031097412, | |
| "learning_rate": 0.00017095218390148542, | |
| "loss": 5.3027, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 2.493127916640031, | |
| "grad_norm": 2.204409599304199, | |
| "learning_rate": 0.00017006035872775787, | |
| "loss": 5.3056, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 2.495791514841569, | |
| "grad_norm": 2.7257113456726074, | |
| "learning_rate": 0.0001691703172043778, | |
| "loss": 5.3043, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 2.4984551130431076, | |
| "grad_norm": 2.262225866317749, | |
| "learning_rate": 0.00016827849203065025, | |
| "loss": 5.3022, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 2.5011187112446462, | |
| "grad_norm": 2.167947769165039, | |
| "learning_rate": 0.0001673866668569227, | |
| "loss": 5.2977, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 2.5037823094461844, | |
| "grad_norm": 2.434269428253174, | |
| "learning_rate": 0.00016649484168319517, | |
| "loss": 5.3003, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.506445907647723, | |
| "grad_norm": 2.2088136672973633, | |
| "learning_rate": 0.00016560480015981508, | |
| "loss": 5.3048, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 2.5091095058492616, | |
| "grad_norm": 2.268261194229126, | |
| "learning_rate": 0.00016471297498608754, | |
| "loss": 5.3048, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 2.5117731040508002, | |
| "grad_norm": 2.462432384490967, | |
| "learning_rate": 0.00016382114981235998, | |
| "loss": 5.305, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 2.514436702252339, | |
| "grad_norm": 2.6072680950164795, | |
| "learning_rate": 0.00016292932463863243, | |
| "loss": 5.2986, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 2.517100300453877, | |
| "grad_norm": 2.600860118865967, | |
| "learning_rate": 0.0001620374994649049, | |
| "loss": 5.299, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 2.5197638986554156, | |
| "grad_norm": 2.3521888256073, | |
| "learning_rate": 0.00016114567429117735, | |
| "loss": 5.2936, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 2.522427496856954, | |
| "grad_norm": 2.712414026260376, | |
| "learning_rate": 0.00016025563276779724, | |
| "loss": 5.303, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 2.5250910950584924, | |
| "grad_norm": 2.267749071121216, | |
| "learning_rate": 0.0001593638075940697, | |
| "loss": 5.3026, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 2.527754693260031, | |
| "grad_norm": 2.206207275390625, | |
| "learning_rate": 0.0001584719824203422, | |
| "loss": 5.2967, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 2.5304182914615696, | |
| "grad_norm": 2.3536181449890137, | |
| "learning_rate": 0.00015758015724661465, | |
| "loss": 5.2971, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 2.533081889663108, | |
| "grad_norm": 2.229966163635254, | |
| "learning_rate": 0.00015669011572323456, | |
| "loss": 5.3002, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 2.535745487864647, | |
| "grad_norm": 2.391902208328247, | |
| "learning_rate": 0.000155798290549507, | |
| "loss": 5.3046, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 2.538409086066185, | |
| "grad_norm": 2.367274522781372, | |
| "learning_rate": 0.00015490646537577945, | |
| "loss": 5.3014, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 2.5410726842677236, | |
| "grad_norm": 2.398796319961548, | |
| "learning_rate": 0.00015401464020205191, | |
| "loss": 5.3012, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.543736282469262, | |
| "grad_norm": 2.2506918907165527, | |
| "learning_rate": 0.00015312281502832437, | |
| "loss": 5.3034, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 2.5463998806708004, | |
| "grad_norm": 2.4038991928100586, | |
| "learning_rate": 0.00015223277350494426, | |
| "loss": 5.298, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.549063478872339, | |
| "grad_norm": 2.2355668544769287, | |
| "learning_rate": 0.00015134094833121672, | |
| "loss": 5.2999, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 2.5517270770738776, | |
| "grad_norm": 2.312537908554077, | |
| "learning_rate": 0.00015044912315748918, | |
| "loss": 5.2987, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.554390675275416, | |
| "grad_norm": 2.4338889122009277, | |
| "learning_rate": 0.00014955729798376164, | |
| "loss": 5.2936, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 2.557054273476955, | |
| "grad_norm": 2.303349018096924, | |
| "learning_rate": 0.00014866725646038155, | |
| "loss": 5.2941, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.559717871678493, | |
| "grad_norm": 2.27744197845459, | |
| "learning_rate": 0.00014777543128665401, | |
| "loss": 5.2961, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 2.5623814698800316, | |
| "grad_norm": 2.364135265350342, | |
| "learning_rate": 0.00014688360611292647, | |
| "loss": 5.2982, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.56504506808157, | |
| "grad_norm": 2.652825355529785, | |
| "learning_rate": 0.00014599178093919893, | |
| "loss": 5.3072, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 2.5677086662831083, | |
| "grad_norm": 2.2864181995391846, | |
| "learning_rate": 0.00014510173941581882, | |
| "loss": 5.3027, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.570372264484647, | |
| "grad_norm": 2.1780378818511963, | |
| "learning_rate": 0.00014420991424209128, | |
| "loss": 5.2988, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 2.5730358626861856, | |
| "grad_norm": 2.4762122631073, | |
| "learning_rate": 0.00014331808906836374, | |
| "loss": 5.2963, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.575699460887724, | |
| "grad_norm": 2.3064920902252197, | |
| "learning_rate": 0.0001424262638946362, | |
| "loss": 5.304, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 2.5783630590892628, | |
| "grad_norm": 2.17753529548645, | |
| "learning_rate": 0.00014153443872090866, | |
| "loss": 5.2909, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.581026657290801, | |
| "grad_norm": 2.442643404006958, | |
| "learning_rate": 0.00014064439719752857, | |
| "loss": 5.3033, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 2.5836902554923395, | |
| "grad_norm": 2.5781943798065186, | |
| "learning_rate": 0.00013975257202380103, | |
| "loss": 5.2969, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.586353853693878, | |
| "grad_norm": 2.1409718990325928, | |
| "learning_rate": 0.0001388607468500735, | |
| "loss": 5.2987, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 2.5890174518954163, | |
| "grad_norm": 2.23543381690979, | |
| "learning_rate": 0.00013796892167634595, | |
| "loss": 5.2989, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.591681050096955, | |
| "grad_norm": 2.418957233428955, | |
| "learning_rate": 0.00013707888015296584, | |
| "loss": 5.2972, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 2.5943446482984935, | |
| "grad_norm": 2.292370080947876, | |
| "learning_rate": 0.0001361870549792383, | |
| "loss": 5.3005, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.5970082465000317, | |
| "grad_norm": 2.360339403152466, | |
| "learning_rate": 0.00013529522980551076, | |
| "loss": 5.2974, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 2.5996718447015703, | |
| "grad_norm": 2.2026000022888184, | |
| "learning_rate": 0.00013440340463178322, | |
| "loss": 5.3012, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.602335442903109, | |
| "grad_norm": 2.273235559463501, | |
| "learning_rate": 0.00013351336310840313, | |
| "loss": 5.2955, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 2.6049990411046475, | |
| "grad_norm": 2.349081516265869, | |
| "learning_rate": 0.0001326215379346756, | |
| "loss": 5.3026, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.607662639306186, | |
| "grad_norm": 2.4691007137298584, | |
| "learning_rate": 0.00013172971276094805, | |
| "loss": 5.2999, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 2.6103262375077243, | |
| "grad_norm": 2.3375978469848633, | |
| "learning_rate": 0.0001308378875872205, | |
| "loss": 5.2849, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.612989835709263, | |
| "grad_norm": 2.3784444332122803, | |
| "learning_rate": 0.0001299478460638404, | |
| "loss": 5.2937, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 2.6156534339108015, | |
| "grad_norm": 2.4842257499694824, | |
| "learning_rate": 0.00012905602089011286, | |
| "loss": 5.2919, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.6183170321123397, | |
| "grad_norm": 2.2826011180877686, | |
| "learning_rate": 0.00012816419571638532, | |
| "loss": 5.2902, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 2.6209806303138783, | |
| "grad_norm": 2.300616979598999, | |
| "learning_rate": 0.00012727237054265778, | |
| "loss": 5.3024, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.623644228515417, | |
| "grad_norm": 2.4524025917053223, | |
| "learning_rate": 0.00012638232901927772, | |
| "loss": 5.2908, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 2.6263078267169555, | |
| "grad_norm": 2.3518335819244385, | |
| "learning_rate": 0.00012549050384555015, | |
| "loss": 5.2977, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.628971424918494, | |
| "grad_norm": 2.5559749603271484, | |
| "learning_rate": 0.0001245986786718226, | |
| "loss": 5.2933, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 2.6316350231200323, | |
| "grad_norm": 2.32487416267395, | |
| "learning_rate": 0.00012370685349809507, | |
| "loss": 5.2941, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.634298621321571, | |
| "grad_norm": 2.384162187576294, | |
| "learning_rate": 0.000122816811974715, | |
| "loss": 5.2978, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 2.6369622195231095, | |
| "grad_norm": 2.7350683212280273, | |
| "learning_rate": 0.00012192498680098743, | |
| "loss": 5.3015, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.6396258177246477, | |
| "grad_norm": 2.5397427082061768, | |
| "learning_rate": 0.00012103316162725988, | |
| "loss": 5.2924, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 2.6422894159261863, | |
| "grad_norm": 2.4719595909118652, | |
| "learning_rate": 0.00012014133645353234, | |
| "loss": 5.2982, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.644953014127725, | |
| "grad_norm": 2.7110893726348877, | |
| "learning_rate": 0.0001192495112798048, | |
| "loss": 5.2908, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 2.6476166123292635, | |
| "grad_norm": 2.5090041160583496, | |
| "learning_rate": 0.00011835946975642471, | |
| "loss": 5.2939, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.650280210530802, | |
| "grad_norm": 2.5113580226898193, | |
| "learning_rate": 0.00011746764458269717, | |
| "loss": 5.2935, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 2.6529438087323403, | |
| "grad_norm": 2.4266409873962402, | |
| "learning_rate": 0.00011657581940896962, | |
| "loss": 5.2931, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.655607406933879, | |
| "grad_norm": 2.4426701068878174, | |
| "learning_rate": 0.00011568399423524208, | |
| "loss": 5.2909, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 2.6582710051354175, | |
| "grad_norm": 2.5790412425994873, | |
| "learning_rate": 0.00011479216906151452, | |
| "loss": 5.2919, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.6609346033369556, | |
| "grad_norm": 2.309144973754883, | |
| "learning_rate": 0.00011390212753813445, | |
| "loss": 5.2967, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 2.6635982015384942, | |
| "grad_norm": 2.297360420227051, | |
| "learning_rate": 0.0001130103023644069, | |
| "loss": 5.2918, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.666261799740033, | |
| "grad_norm": 2.539792776107788, | |
| "learning_rate": 0.00011211847719067936, | |
| "loss": 5.2914, | |
| "step": 500500 | |
| }, | |
| { | |
| "epoch": 2.6689253979415715, | |
| "grad_norm": 2.246025800704956, | |
| "learning_rate": 0.00011122665201695182, | |
| "loss": 5.2968, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.67158899614311, | |
| "grad_norm": 2.34342885017395, | |
| "learning_rate": 0.00011033661049357173, | |
| "loss": 5.2885, | |
| "step": 501500 | |
| }, | |
| { | |
| "epoch": 2.6742525943446482, | |
| "grad_norm": 2.4776382446289062, | |
| "learning_rate": 0.00010944478531984418, | |
| "loss": 5.2944, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.676916192546187, | |
| "grad_norm": 2.583674907684326, | |
| "learning_rate": 0.00010855296014611664, | |
| "loss": 5.2907, | |
| "step": 502500 | |
| }, | |
| { | |
| "epoch": 2.6795797907477255, | |
| "grad_norm": 2.3661584854125977, | |
| "learning_rate": 0.0001076611349723891, | |
| "loss": 5.2969, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.6822433889492636, | |
| "grad_norm": 2.3716771602630615, | |
| "learning_rate": 0.00010677109344900901, | |
| "loss": 5.2993, | |
| "step": 503500 | |
| }, | |
| { | |
| "epoch": 2.6849069871508022, | |
| "grad_norm": 2.3315460681915283, | |
| "learning_rate": 0.00010587926827528146, | |
| "loss": 5.2914, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.687570585352341, | |
| "grad_norm": 2.2361655235290527, | |
| "learning_rate": 0.00010498744310155392, | |
| "loss": 5.288, | |
| "step": 504500 | |
| }, | |
| { | |
| "epoch": 2.690234183553879, | |
| "grad_norm": 2.3718972206115723, | |
| "learning_rate": 0.00010409561792782638, | |
| "loss": 5.2933, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.6928977817554176, | |
| "grad_norm": 2.414783477783203, | |
| "learning_rate": 0.0001032055764044463, | |
| "loss": 5.2905, | |
| "step": 505500 | |
| }, | |
| { | |
| "epoch": 2.695561379956956, | |
| "grad_norm": 2.5909764766693115, | |
| "learning_rate": 0.00010231375123071875, | |
| "loss": 5.2889, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.698224978158495, | |
| "grad_norm": 2.2361748218536377, | |
| "learning_rate": 0.0001014219260569912, | |
| "loss": 5.2884, | |
| "step": 506500 | |
| }, | |
| { | |
| "epoch": 2.7008885763600334, | |
| "grad_norm": 2.3554787635803223, | |
| "learning_rate": 0.00010053010088326366, | |
| "loss": 5.283, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.7035521745615716, | |
| "grad_norm": 2.4235968589782715, | |
| "learning_rate": 9.96382757095361e-05, | |
| "loss": 5.2991, | |
| "step": 507500 | |
| }, | |
| { | |
| "epoch": 2.70621577276311, | |
| "grad_norm": 2.334272861480713, | |
| "learning_rate": 9.874645053580856e-05, | |
| "loss": 5.2921, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.708879370964649, | |
| "grad_norm": 2.443535566329956, | |
| "learning_rate": 9.785640901242848e-05, | |
| "loss": 5.2934, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 2.711542969166187, | |
| "grad_norm": 2.4466655254364014, | |
| "learning_rate": 9.696458383870094e-05, | |
| "loss": 5.2915, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.7142065673677256, | |
| "grad_norm": 2.1013219356536865, | |
| "learning_rate": 9.60727586649734e-05, | |
| "loss": 5.2942, | |
| "step": 509500 | |
| }, | |
| { | |
| "epoch": 2.716870165569264, | |
| "grad_norm": 2.486953020095825, | |
| "learning_rate": 9.518093349124584e-05, | |
| "loss": 5.2948, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.719533763770803, | |
| "grad_norm": 2.246967077255249, | |
| "learning_rate": 9.429089196786576e-05, | |
| "loss": 5.288, | |
| "step": 510500 | |
| }, | |
| { | |
| "epoch": 2.7221973619723414, | |
| "grad_norm": 2.308177947998047, | |
| "learning_rate": 9.339906679413822e-05, | |
| "loss": 5.2925, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.7248609601738796, | |
| "grad_norm": 2.3832600116729736, | |
| "learning_rate": 9.250724162041068e-05, | |
| "loss": 5.2925, | |
| "step": 511500 | |
| }, | |
| { | |
| "epoch": 2.727524558375418, | |
| "grad_norm": 2.2219245433807373, | |
| "learning_rate": 9.161541644668312e-05, | |
| "loss": 5.294, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.730188156576957, | |
| "grad_norm": 2.4265191555023193, | |
| "learning_rate": 9.072537492330303e-05, | |
| "loss": 5.2875, | |
| "step": 512500 | |
| }, | |
| { | |
| "epoch": 2.732851754778495, | |
| "grad_norm": 2.553427219390869, | |
| "learning_rate": 8.98335497495755e-05, | |
| "loss": 5.2984, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.7355153529800336, | |
| "grad_norm": 2.3475024700164795, | |
| "learning_rate": 8.894172457584796e-05, | |
| "loss": 5.2827, | |
| "step": 513500 | |
| }, | |
| { | |
| "epoch": 2.738178951181572, | |
| "grad_norm": 2.5305187702178955, | |
| "learning_rate": 8.80498994021204e-05, | |
| "loss": 5.2937, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.740842549383111, | |
| "grad_norm": 2.4398436546325684, | |
| "learning_rate": 8.71598578787403e-05, | |
| "loss": 5.2948, | |
| "step": 514500 | |
| }, | |
| { | |
| "epoch": 2.7435061475846494, | |
| "grad_norm": 2.4077444076538086, | |
| "learning_rate": 8.626803270501276e-05, | |
| "loss": 5.2882, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.7461697457861876, | |
| "grad_norm": 2.346778392791748, | |
| "learning_rate": 8.537620753128524e-05, | |
| "loss": 5.2875, | |
| "step": 515500 | |
| }, | |
| { | |
| "epoch": 2.748833343987726, | |
| "grad_norm": 2.4900453090667725, | |
| "learning_rate": 8.448438235755768e-05, | |
| "loss": 5.2835, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.7514969421892648, | |
| "grad_norm": 2.4355154037475586, | |
| "learning_rate": 8.359255718383014e-05, | |
| "loss": 5.29, | |
| "step": 516500 | |
| }, | |
| { | |
| "epoch": 2.754160540390803, | |
| "grad_norm": 2.18061900138855, | |
| "learning_rate": 8.270251566045004e-05, | |
| "loss": 5.288, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.7568241385923415, | |
| "grad_norm": 2.3646693229675293, | |
| "learning_rate": 8.18106904867225e-05, | |
| "loss": 5.2789, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 2.75948773679388, | |
| "grad_norm": 2.369717836380005, | |
| "learning_rate": 8.091886531299498e-05, | |
| "loss": 5.2901, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.7621513349954188, | |
| "grad_norm": 2.4666647911071777, | |
| "learning_rate": 8.002704013926742e-05, | |
| "loss": 5.2858, | |
| "step": 518500 | |
| }, | |
| { | |
| "epoch": 2.7648149331969574, | |
| "grad_norm": 2.3375349044799805, | |
| "learning_rate": 7.913699861588732e-05, | |
| "loss": 5.2854, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.7674785313984955, | |
| "grad_norm": 2.2538347244262695, | |
| "learning_rate": 7.824517344215978e-05, | |
| "loss": 5.2899, | |
| "step": 519500 | |
| }, | |
| { | |
| "epoch": 2.770142129600034, | |
| "grad_norm": 2.5232772827148438, | |
| "learning_rate": 7.735334826843224e-05, | |
| "loss": 5.2948, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.7728057278015728, | |
| "grad_norm": 2.3963685035705566, | |
| "learning_rate": 7.646152309470469e-05, | |
| "loss": 5.2919, | |
| "step": 520500 | |
| }, | |
| { | |
| "epoch": 2.775469326003111, | |
| "grad_norm": 2.0667736530303955, | |
| "learning_rate": 7.55714815713246e-05, | |
| "loss": 5.2825, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.7781329242046495, | |
| "grad_norm": 2.421602725982666, | |
| "learning_rate": 7.467965639759706e-05, | |
| "loss": 5.2949, | |
| "step": 521500 | |
| }, | |
| { | |
| "epoch": 2.780796522406188, | |
| "grad_norm": 2.3447656631469727, | |
| "learning_rate": 7.378783122386952e-05, | |
| "loss": 5.2871, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.7834601206077263, | |
| "grad_norm": 2.1411802768707275, | |
| "learning_rate": 7.289600605014197e-05, | |
| "loss": 5.2861, | |
| "step": 522500 | |
| }, | |
| { | |
| "epoch": 2.786123718809265, | |
| "grad_norm": 2.5163323879241943, | |
| "learning_rate": 7.200418087641443e-05, | |
| "loss": 5.286, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.7887873170108035, | |
| "grad_norm": 2.482067108154297, | |
| "learning_rate": 7.111413935303434e-05, | |
| "loss": 5.2863, | |
| "step": 523500 | |
| }, | |
| { | |
| "epoch": 2.791450915212342, | |
| "grad_norm": 2.3614418506622314, | |
| "learning_rate": 7.02223141793068e-05, | |
| "loss": 5.2799, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.7941145134138807, | |
| "grad_norm": 2.333521842956543, | |
| "learning_rate": 6.933048900557925e-05, | |
| "loss": 5.2873, | |
| "step": 524500 | |
| }, | |
| { | |
| "epoch": 2.796778111615419, | |
| "grad_norm": 2.2536137104034424, | |
| "learning_rate": 6.843866383185171e-05, | |
| "loss": 5.2909, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.7994417098169575, | |
| "grad_norm": 2.516286849975586, | |
| "learning_rate": 6.754862230847162e-05, | |
| "loss": 5.2944, | |
| "step": 525500 | |
| }, | |
| { | |
| "epoch": 2.802105308018496, | |
| "grad_norm": 2.361598253250122, | |
| "learning_rate": 6.665679713474408e-05, | |
| "loss": 5.2872, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.8047689062200343, | |
| "grad_norm": 2.387085199356079, | |
| "learning_rate": 6.576497196101654e-05, | |
| "loss": 5.291, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 2.807432504421573, | |
| "grad_norm": 2.2874443531036377, | |
| "learning_rate": 6.487314678728899e-05, | |
| "loss": 5.29, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.8100961026231115, | |
| "grad_norm": 2.4107890129089355, | |
| "learning_rate": 6.39831052639089e-05, | |
| "loss": 5.2781, | |
| "step": 527500 | |
| }, | |
| { | |
| "epoch": 2.81275970082465, | |
| "grad_norm": 2.3214197158813477, | |
| "learning_rate": 6.309128009018136e-05, | |
| "loss": 5.2851, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.8154232990261887, | |
| "grad_norm": 2.3806910514831543, | |
| "learning_rate": 6.219945491645382e-05, | |
| "loss": 5.2824, | |
| "step": 528500 | |
| }, | |
| { | |
| "epoch": 2.818086897227727, | |
| "grad_norm": 2.4679012298583984, | |
| "learning_rate": 6.130762974272627e-05, | |
| "loss": 5.291, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.8207504954292655, | |
| "grad_norm": 2.30574631690979, | |
| "learning_rate": 6.041758821934619e-05, | |
| "loss": 5.2901, | |
| "step": 529500 | |
| }, | |
| { | |
| "epoch": 2.823414093630804, | |
| "grad_norm": 2.309056043624878, | |
| "learning_rate": 5.9525763045618644e-05, | |
| "loss": 5.2778, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.8260776918323423, | |
| "grad_norm": 2.378755569458008, | |
| "learning_rate": 5.8633937871891097e-05, | |
| "loss": 5.2815, | |
| "step": 530500 | |
| }, | |
| { | |
| "epoch": 2.828741290033881, | |
| "grad_norm": 2.6057322025299072, | |
| "learning_rate": 5.7742112698163556e-05, | |
| "loss": 5.2866, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.8314048882354195, | |
| "grad_norm": 2.3079919815063477, | |
| "learning_rate": 5.685028752443601e-05, | |
| "loss": 5.2791, | |
| "step": 531500 | |
| }, | |
| { | |
| "epoch": 2.834068486436958, | |
| "grad_norm": 2.2242472171783447, | |
| "learning_rate": 5.5960246001055924e-05, | |
| "loss": 5.2865, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.8367320846384967, | |
| "grad_norm": 2.3489010334014893, | |
| "learning_rate": 5.5068420827328383e-05, | |
| "loss": 5.2872, | |
| "step": 532500 | |
| }, | |
| { | |
| "epoch": 2.839395682840035, | |
| "grad_norm": 2.9294140338897705, | |
| "learning_rate": 5.4176595653600836e-05, | |
| "loss": 5.2796, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.8420592810415735, | |
| "grad_norm": 2.325824499130249, | |
| "learning_rate": 5.328477047987329e-05, | |
| "loss": 5.2878, | |
| "step": 533500 | |
| }, | |
| { | |
| "epoch": 2.844722879243112, | |
| "grad_norm": 2.3206863403320312, | |
| "learning_rate": 5.23947289564932e-05, | |
| "loss": 5.2827, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.8473864774446502, | |
| "grad_norm": 2.241338014602661, | |
| "learning_rate": 5.150290378276566e-05, | |
| "loss": 5.2862, | |
| "step": 534500 | |
| }, | |
| { | |
| "epoch": 2.850050075646189, | |
| "grad_norm": 2.3662049770355225, | |
| "learning_rate": 5.0611078609038116e-05, | |
| "loss": 5.2868, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.8527136738477274, | |
| "grad_norm": 2.0729544162750244, | |
| "learning_rate": 4.971925343531057e-05, | |
| "loss": 5.2851, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 2.855377272049266, | |
| "grad_norm": 2.1059601306915283, | |
| "learning_rate": 4.8829211911930484e-05, | |
| "loss": 5.2809, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.8580408702508047, | |
| "grad_norm": 2.70766282081604, | |
| "learning_rate": 4.793738673820294e-05, | |
| "loss": 5.2896, | |
| "step": 536500 | |
| }, | |
| { | |
| "epoch": 2.860704468452343, | |
| "grad_norm": 2.526292562484741, | |
| "learning_rate": 4.704556156447539e-05, | |
| "loss": 5.2828, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.8633680666538814, | |
| "grad_norm": 2.246443510055542, | |
| "learning_rate": 4.6153736390747856e-05, | |
| "loss": 5.2847, | |
| "step": 537500 | |
| }, | |
| { | |
| "epoch": 2.86603166485542, | |
| "grad_norm": 2.5226643085479736, | |
| "learning_rate": 4.5263694867367764e-05, | |
| "loss": 5.2871, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.868695263056958, | |
| "grad_norm": 2.416816473007202, | |
| "learning_rate": 4.437186969364022e-05, | |
| "loss": 5.2825, | |
| "step": 538500 | |
| }, | |
| { | |
| "epoch": 2.871358861258497, | |
| "grad_norm": 2.5631511211395264, | |
| "learning_rate": 4.348004451991267e-05, | |
| "loss": 5.2815, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.8740224594600354, | |
| "grad_norm": 2.2883377075195312, | |
| "learning_rate": 4.258821934618513e-05, | |
| "loss": 5.2824, | |
| "step": 539500 | |
| }, | |
| { | |
| "epoch": 2.8766860576615736, | |
| "grad_norm": 2.4545071125030518, | |
| "learning_rate": 4.1698177822805044e-05, | |
| "loss": 5.278, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.879349655863112, | |
| "grad_norm": 2.2015092372894287, | |
| "learning_rate": 4.08063526490775e-05, | |
| "loss": 5.2806, | |
| "step": 540500 | |
| }, | |
| { | |
| "epoch": 2.882013254064651, | |
| "grad_norm": 2.7558255195617676, | |
| "learning_rate": 3.9914527475349956e-05, | |
| "loss": 5.2857, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.8846768522661894, | |
| "grad_norm": 2.376549005508423, | |
| "learning_rate": 3.902270230162241e-05, | |
| "loss": 5.2792, | |
| "step": 541500 | |
| }, | |
| { | |
| "epoch": 2.887340450467728, | |
| "grad_norm": 2.3727259635925293, | |
| "learning_rate": 3.813266077824232e-05, | |
| "loss": 5.2843, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.890004048669266, | |
| "grad_norm": 2.3833839893341064, | |
| "learning_rate": 3.724083560451478e-05, | |
| "loss": 5.2785, | |
| "step": 542500 | |
| }, | |
| { | |
| "epoch": 2.892667646870805, | |
| "grad_norm": 2.4702396392822266, | |
| "learning_rate": 3.6349010430787236e-05, | |
| "loss": 5.2785, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.8953312450723434, | |
| "grad_norm": 2.54264497756958, | |
| "learning_rate": 3.545718525705969e-05, | |
| "loss": 5.2813, | |
| "step": 543500 | |
| }, | |
| { | |
| "epoch": 2.8979948432738816, | |
| "grad_norm": 2.356501579284668, | |
| "learning_rate": 3.456536008333214e-05, | |
| "loss": 5.2886, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.90065844147542, | |
| "grad_norm": 2.546325445175171, | |
| "learning_rate": 3.367531855995206e-05, | |
| "loss": 5.2778, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 2.903322039676959, | |
| "grad_norm": 2.3812687397003174, | |
| "learning_rate": 3.2783493386224516e-05, | |
| "loss": 5.284, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.9059856378784974, | |
| "grad_norm": 2.3538711071014404, | |
| "learning_rate": 3.189166821249697e-05, | |
| "loss": 5.2755, | |
| "step": 545500 | |
| }, | |
| { | |
| "epoch": 2.908649236080036, | |
| "grad_norm": 2.2477262020111084, | |
| "learning_rate": 3.099984303876943e-05, | |
| "loss": 5.2876, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.911312834281574, | |
| "grad_norm": 2.2652475833892822, | |
| "learning_rate": 3.0109801515389333e-05, | |
| "loss": 5.2777, | |
| "step": 546500 | |
| }, | |
| { | |
| "epoch": 2.9139764324831128, | |
| "grad_norm": 2.468841791152954, | |
| "learning_rate": 2.9217976341661793e-05, | |
| "loss": 5.2779, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.9166400306846514, | |
| "grad_norm": 2.151130437850952, | |
| "learning_rate": 2.832615116793425e-05, | |
| "loss": 5.2883, | |
| "step": 547500 | |
| }, | |
| { | |
| "epoch": 2.9193036288861895, | |
| "grad_norm": 2.464799404144287, | |
| "learning_rate": 2.74343259942067e-05, | |
| "loss": 5.2843, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.921967227087728, | |
| "grad_norm": 2.6122734546661377, | |
| "learning_rate": 2.6544284470826617e-05, | |
| "loss": 5.2854, | |
| "step": 548500 | |
| }, | |
| { | |
| "epoch": 2.9246308252892668, | |
| "grad_norm": 2.257554769515991, | |
| "learning_rate": 2.565245929709907e-05, | |
| "loss": 5.277, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.9272944234908054, | |
| "grad_norm": 2.2422280311584473, | |
| "learning_rate": 2.476063412337153e-05, | |
| "loss": 5.2804, | |
| "step": 549500 | |
| }, | |
| { | |
| "epoch": 2.929958021692344, | |
| "grad_norm": 2.4912326335906982, | |
| "learning_rate": 2.3868808949643985e-05, | |
| "loss": 5.2758, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.932621619893882, | |
| "grad_norm": 2.305392265319824, | |
| "learning_rate": 2.2978767426263897e-05, | |
| "loss": 5.2831, | |
| "step": 550500 | |
| }, | |
| { | |
| "epoch": 2.9352852180954208, | |
| "grad_norm": 2.699528217315674, | |
| "learning_rate": 2.2086942252536353e-05, | |
| "loss": 5.2841, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.9379488162969594, | |
| "grad_norm": 2.3196749687194824, | |
| "learning_rate": 2.1195117078808806e-05, | |
| "loss": 5.2792, | |
| "step": 551500 | |
| }, | |
| { | |
| "epoch": 2.9406124144984975, | |
| "grad_norm": 2.134294033050537, | |
| "learning_rate": 2.0303291905081265e-05, | |
| "loss": 5.2845, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.943276012700036, | |
| "grad_norm": 2.25675892829895, | |
| "learning_rate": 1.941146673135372e-05, | |
| "loss": 5.2778, | |
| "step": 552500 | |
| }, | |
| { | |
| "epoch": 2.9459396109015747, | |
| "grad_norm": 2.141127824783325, | |
| "learning_rate": 1.852142520797363e-05, | |
| "loss": 5.2738, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.9486032091031134, | |
| "grad_norm": 2.3503618240356445, | |
| "learning_rate": 1.762960003424609e-05, | |
| "loss": 5.277, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 2.951266807304652, | |
| "grad_norm": 2.2987284660339355, | |
| "learning_rate": 1.673777486051854e-05, | |
| "loss": 5.2864, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.95393040550619, | |
| "grad_norm": 2.384070873260498, | |
| "learning_rate": 1.5845949686791e-05, | |
| "loss": 5.2798, | |
| "step": 554500 | |
| }, | |
| { | |
| "epoch": 2.9565940037077287, | |
| "grad_norm": 2.272744655609131, | |
| "learning_rate": 1.4954124513063455e-05, | |
| "loss": 5.2806, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.9592576019092673, | |
| "grad_norm": 2.2945611476898193, | |
| "learning_rate": 1.4064082989683367e-05, | |
| "loss": 5.2852, | |
| "step": 555500 | |
| }, | |
| { | |
| "epoch": 2.9619212001108055, | |
| "grad_norm": 2.5340495109558105, | |
| "learning_rate": 1.3172257815955822e-05, | |
| "loss": 5.2764, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.964584798312344, | |
| "grad_norm": 2.3637685775756836, | |
| "learning_rate": 1.228043264222828e-05, | |
| "loss": 5.28, | |
| "step": 556500 | |
| }, | |
| { | |
| "epoch": 2.9672483965138827, | |
| "grad_norm": 2.401252031326294, | |
| "learning_rate": 1.1388607468500735e-05, | |
| "loss": 5.2809, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.9699119947154213, | |
| "grad_norm": 2.256577253341675, | |
| "learning_rate": 1.0498565945120647e-05, | |
| "loss": 5.2798, | |
| "step": 557500 | |
| }, | |
| { | |
| "epoch": 2.9725755929169595, | |
| "grad_norm": 2.1444365978240967, | |
| "learning_rate": 9.606740771393103e-06, | |
| "loss": 5.2761, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.975239191118498, | |
| "grad_norm": 2.325979471206665, | |
| "learning_rate": 8.714915597665558e-06, | |
| "loss": 5.2804, | |
| "step": 558500 | |
| }, | |
| { | |
| "epoch": 2.9779027893200367, | |
| "grad_norm": 2.1250107288360596, | |
| "learning_rate": 7.823090423938014e-06, | |
| "loss": 5.2767, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.9805663875215753, | |
| "grad_norm": 2.4525716304779053, | |
| "learning_rate": 6.933048900557926e-06, | |
| "loss": 5.2805, | |
| "step": 559500 | |
| }, | |
| { | |
| "epoch": 2.9832299857231135, | |
| "grad_norm": 2.176084041595459, | |
| "learning_rate": 6.0412237268303826e-06, | |
| "loss": 5.278, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.985893583924652, | |
| "grad_norm": 2.607921600341797, | |
| "learning_rate": 5.149398553102839e-06, | |
| "loss": 5.2778, | |
| "step": 560500 | |
| }, | |
| { | |
| "epoch": 2.9885571821261907, | |
| "grad_norm": 2.287775993347168, | |
| "learning_rate": 4.257573379375294e-06, | |
| "loss": 5.2721, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.991220780327729, | |
| "grad_norm": 2.258080005645752, | |
| "learning_rate": 3.3657482056477507e-06, | |
| "loss": 5.2754, | |
| "step": 561500 | |
| }, | |
| { | |
| "epoch": 2.9938843785292675, | |
| "grad_norm": 2.214787244796753, | |
| "learning_rate": 2.475706682267662e-06, | |
| "loss": 5.2853, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.996547976730806, | |
| "grad_norm": 2.4470176696777344, | |
| "learning_rate": 1.583881508540118e-06, | |
| "loss": 5.2732, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 2.9992115749323447, | |
| "grad_norm": 2.2027597427368164, | |
| "learning_rate": 6.920563348125741e-07, | |
| "loss": 5.2723, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 563148, | |
| "total_flos": 1.7947279651188326e+17, | |
| "train_loss": 5.458770358526144, | |
| "train_runtime": 36904.8634, | |
| "train_samples_per_second": 976.604, | |
| "train_steps_per_second": 15.259 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 563148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7947279651188326e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |