| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1565, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 7.2823718978969225, | |
| "learning_rate": 1.0191082802547772e-06, | |
| "loss": 0.9132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44200366735458374, | |
| "step": 5, | |
| "valid_targets_mean": 2732.6, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 6.257095873405941, | |
| "learning_rate": 2.2929936305732485e-06, | |
| "loss": 0.9252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46359115839004517, | |
| "step": 10, | |
| "valid_targets_mean": 3118.3, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 4.411856550169914, | |
| "learning_rate": 3.56687898089172e-06, | |
| "loss": 0.8594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.478965699672699, | |
| "step": 15, | |
| "valid_targets_mean": 3442.2, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 2.5700848407864276, | |
| "learning_rate": 4.840764331210192e-06, | |
| "loss": 0.8379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3646432161331177, | |
| "step": 20, | |
| "valid_targets_mean": 2799.1, | |
| "valid_targets_min": 979 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.5191408406019358, | |
| "learning_rate": 6.114649681528663e-06, | |
| "loss": 0.7846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3868666887283325, | |
| "step": 25, | |
| "valid_targets_mean": 3145.7, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 1.0745656000903365, | |
| "learning_rate": 7.388535031847134e-06, | |
| "loss": 0.7665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3939126133918762, | |
| "step": 30, | |
| "valid_targets_mean": 2892.2, | |
| "valid_targets_min": 1503 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.8892773246940794, | |
| "learning_rate": 8.662420382165606e-06, | |
| "loss": 0.7494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37320828437805176, | |
| "step": 35, | |
| "valid_targets_mean": 3300.9, | |
| "valid_targets_min": 1886 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.7556196584442465, | |
| "learning_rate": 9.936305732484078e-06, | |
| "loss": 0.7265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.355090469121933, | |
| "step": 40, | |
| "valid_targets_mean": 3008.4, | |
| "valid_targets_min": 958 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.585319744877401, | |
| "learning_rate": 1.1210191082802548e-05, | |
| "loss": 0.7227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33015894889831543, | |
| "step": 45, | |
| "valid_targets_mean": 3461.2, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.60549778657053, | |
| "learning_rate": 1.248407643312102e-05, | |
| "loss": 0.6643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3755006790161133, | |
| "step": 50, | |
| "valid_targets_mean": 3302.6, | |
| "valid_targets_min": 1783 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.5913196880977777, | |
| "learning_rate": 1.375796178343949e-05, | |
| "loss": 0.6731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3550243377685547, | |
| "step": 55, | |
| "valid_targets_mean": 2947.4, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.48189240583131276, | |
| "learning_rate": 1.5031847133757964e-05, | |
| "loss": 0.6445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24084284901618958, | |
| "step": 60, | |
| "valid_targets_mean": 3061.4, | |
| "valid_targets_min": 904 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.5115869977478013, | |
| "learning_rate": 1.6305732484076436e-05, | |
| "loss": 0.6592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34680789709091187, | |
| "step": 65, | |
| "valid_targets_mean": 3287.8, | |
| "valid_targets_min": 2333 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.49192101799682225, | |
| "learning_rate": 1.7579617834394907e-05, | |
| "loss": 0.6546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3077095150947571, | |
| "step": 70, | |
| "valid_targets_mean": 2793.4, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.43541915794559505, | |
| "learning_rate": 1.8853503184713376e-05, | |
| "loss": 0.6365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29219257831573486, | |
| "step": 75, | |
| "valid_targets_mean": 3412.2, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.489452472152694, | |
| "learning_rate": 2.0127388535031848e-05, | |
| "loss": 0.6261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3591298460960388, | |
| "step": 80, | |
| "valid_targets_mean": 3012.2, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.5019386951535992, | |
| "learning_rate": 2.140127388535032e-05, | |
| "loss": 0.6193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3157939612865448, | |
| "step": 85, | |
| "valid_targets_mean": 2860.2, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.445620357117832, | |
| "learning_rate": 2.267515923566879e-05, | |
| "loss": 0.6056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2639375329017639, | |
| "step": 90, | |
| "valid_targets_mean": 3043.7, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.4702040139646772, | |
| "learning_rate": 2.3949044585987263e-05, | |
| "loss": 0.593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3165432810783386, | |
| "step": 95, | |
| "valid_targets_mean": 3198.7, | |
| "valid_targets_min": 1785 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.5070089852236436, | |
| "learning_rate": 2.5222929936305732e-05, | |
| "loss": 0.6156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2870636582374573, | |
| "step": 100, | |
| "valid_targets_mean": 2585.8, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.4954031096812358, | |
| "learning_rate": 2.6496815286624204e-05, | |
| "loss": 0.606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30892717838287354, | |
| "step": 105, | |
| "valid_targets_mean": 2920.3, | |
| "valid_targets_min": 1672 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.501722029021901, | |
| "learning_rate": 2.7770700636942676e-05, | |
| "loss": 0.5943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3055849075317383, | |
| "step": 110, | |
| "valid_targets_mean": 3229.6, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.4739878082204572, | |
| "learning_rate": 2.9044585987261148e-05, | |
| "loss": 0.5894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24536016583442688, | |
| "step": 115, | |
| "valid_targets_mean": 2898.1, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.5550911619314564, | |
| "learning_rate": 3.0318471337579623e-05, | |
| "loss": 0.6026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2870926856994629, | |
| "step": 120, | |
| "valid_targets_mean": 2879.2, | |
| "valid_targets_min": 1154 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.4893227434987029, | |
| "learning_rate": 3.1592356687898095e-05, | |
| "loss": 0.585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26536059379577637, | |
| "step": 125, | |
| "valid_targets_mean": 2657.9, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.4851334401577497, | |
| "learning_rate": 3.286624203821656e-05, | |
| "loss": 0.5725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26549556851387024, | |
| "step": 130, | |
| "valid_targets_mean": 2905.2, | |
| "valid_targets_min": 1474 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.46210757965881916, | |
| "learning_rate": 3.414012738853504e-05, | |
| "loss": 0.5531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25114014744758606, | |
| "step": 135, | |
| "valid_targets_mean": 2985.4, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.4967800770084451, | |
| "learning_rate": 3.541401273885351e-05, | |
| "loss": 0.5611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2707679569721222, | |
| "step": 140, | |
| "valid_targets_mean": 3059.8, | |
| "valid_targets_min": 1204 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.5351660385430722, | |
| "learning_rate": 3.6687898089171976e-05, | |
| "loss": 0.5799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2705070972442627, | |
| "step": 145, | |
| "valid_targets_mean": 2721.8, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.4785944656217512, | |
| "learning_rate": 3.796178343949045e-05, | |
| "loss": 0.5591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26655346155166626, | |
| "step": 150, | |
| "valid_targets_mean": 2875.8, | |
| "valid_targets_min": 1844 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.4805531707541487, | |
| "learning_rate": 3.923566878980892e-05, | |
| "loss": 0.5505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2606361508369446, | |
| "step": 155, | |
| "valid_targets_mean": 3232.9, | |
| "valid_targets_min": 2114 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.4682743359582423, | |
| "learning_rate": 3.999980086219931e-05, | |
| "loss": 0.569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3145064413547516, | |
| "step": 160, | |
| "valid_targets_mean": 3246.5, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.49393836582356365, | |
| "learning_rate": 3.9997560607483595e-05, | |
| "loss": 0.5474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2361571043729782, | |
| "step": 165, | |
| "valid_targets_mean": 2631.5, | |
| "valid_targets_min": 775 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.5146422635244974, | |
| "learning_rate": 3.999283145555291e-05, | |
| "loss": 0.5428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2787424921989441, | |
| "step": 170, | |
| "valid_targets_mean": 3305.2, | |
| "valid_targets_min": 1540 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.4912681156590772, | |
| "learning_rate": 3.998561399499772e-05, | |
| "loss": 0.5593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24612675607204437, | |
| "step": 175, | |
| "valid_targets_mean": 3071.1, | |
| "valid_targets_min": 1315 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.4698358070989407, | |
| "learning_rate": 3.997590912410345e-05, | |
| "loss": 0.5554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2255033552646637, | |
| "step": 180, | |
| "valid_targets_mean": 3202.2, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.532009785657136, | |
| "learning_rate": 3.996371805073874e-05, | |
| "loss": 0.5552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3014117181301117, | |
| "step": 185, | |
| "valid_targets_mean": 3159.7, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.552397732560268, | |
| "learning_rate": 3.994904229220507e-05, | |
| "loss": 0.5629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3167943060398102, | |
| "step": 190, | |
| "valid_targets_mean": 3193.0, | |
| "valid_targets_min": 1288 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.5695077158852391, | |
| "learning_rate": 3.9931883675047966e-05, | |
| "loss": 0.5757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3574255406856537, | |
| "step": 195, | |
| "valid_targets_mean": 3208.6, | |
| "valid_targets_min": 1117 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.5250356068856997, | |
| "learning_rate": 3.991224433482961e-05, | |
| "loss": 0.5672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27982521057128906, | |
| "step": 200, | |
| "valid_targets_mean": 2803.5, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.5191123720236969, | |
| "learning_rate": 3.98901267158631e-05, | |
| "loss": 0.5657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3286457657814026, | |
| "step": 205, | |
| "valid_targets_mean": 3622.6, | |
| "valid_targets_min": 1755 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.46894421435932004, | |
| "learning_rate": 3.98655335709082e-05, | |
| "loss": 0.5471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534596920013428, | |
| "step": 210, | |
| "valid_targets_mean": 3157.9, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.5511548565391978, | |
| "learning_rate": 3.9838467960828745e-05, | |
| "loss": 0.5581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24683982133865356, | |
| "step": 215, | |
| "valid_targets_mean": 2883.2, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.4361229714007437, | |
| "learning_rate": 3.9808933254211665e-05, | |
| "loss": 0.5488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2557350993156433, | |
| "step": 220, | |
| "valid_targets_mean": 3469.2, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.49040635805208715, | |
| "learning_rate": 3.977693312694778e-05, | |
| "loss": 0.559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2932126820087433, | |
| "step": 225, | |
| "valid_targets_mean": 3349.6, | |
| "valid_targets_min": 1923 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.5358635858571923, | |
| "learning_rate": 3.974247156177423e-05, | |
| "loss": 0.5719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2997221350669861, | |
| "step": 230, | |
| "valid_targets_mean": 2815.5, | |
| "valid_targets_min": 1484 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.4643809623394787, | |
| "learning_rate": 3.970555284777883e-05, | |
| "loss": 0.5625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2824307382106781, | |
| "step": 235, | |
| "valid_targets_mean": 3399.6, | |
| "valid_targets_min": 1375 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.46528415620681496, | |
| "learning_rate": 3.9666181579866244e-05, | |
| "loss": 0.5291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24767976999282837, | |
| "step": 240, | |
| "valid_targets_mean": 3110.1, | |
| "valid_targets_min": 1705 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.9577061650888293, | |
| "learning_rate": 3.962436265818611e-05, | |
| "loss": 0.55, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27534106373786926, | |
| "step": 245, | |
| "valid_targets_mean": 3277.2, | |
| "valid_targets_min": 2021 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.5583793522983518, | |
| "learning_rate": 3.9580101287523105e-05, | |
| "loss": 0.5752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29335466027259827, | |
| "step": 250, | |
| "valid_targets_mean": 2528.5, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.4663328652814683, | |
| "learning_rate": 3.953340297664928e-05, | |
| "loss": 0.5627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26815780997276306, | |
| "step": 255, | |
| "valid_targets_mean": 3442.6, | |
| "valid_targets_min": 927 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.4508138830740144, | |
| "learning_rate": 3.948427353763829e-05, | |
| "loss": 0.5436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2782423794269562, | |
| "step": 260, | |
| "valid_targets_mean": 3546.3, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.46589746239536295, | |
| "learning_rate": 3.943271908514216e-05, | |
| "loss": 0.537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28761547803878784, | |
| "step": 265, | |
| "valid_targets_mean": 3277.4, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.4690148588224579, | |
| "learning_rate": 3.937874603563015e-05, | |
| "loss": 0.5646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2736252546310425, | |
| "step": 270, | |
| "valid_targets_mean": 3230.6, | |
| "valid_targets_min": 1585 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.46883250413065714, | |
| "learning_rate": 3.932236110659023e-05, | |
| "loss": 0.5411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267468124628067, | |
| "step": 275, | |
| "valid_targets_mean": 3123.0, | |
| "valid_targets_min": 1295 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.4359333014470195, | |
| "learning_rate": 3.9263571315692976e-05, | |
| "loss": 0.5289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2379305064678192, | |
| "step": 280, | |
| "valid_targets_mean": 3212.1, | |
| "valid_targets_min": 1729 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.46436272764415976, | |
| "learning_rate": 3.920238397991818e-05, | |
| "loss": 0.5435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.275387704372406, | |
| "step": 285, | |
| "valid_targets_mean": 3243.3, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.448182212151269, | |
| "learning_rate": 3.913880671464418e-05, | |
| "loss": 0.5399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24673345685005188, | |
| "step": 290, | |
| "valid_targets_mean": 3588.0, | |
| "valid_targets_min": 1478 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.4982702796643091, | |
| "learning_rate": 3.907284743270001e-05, | |
| "loss": 0.5478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.260049045085907, | |
| "step": 295, | |
| "valid_targets_mean": 2819.7, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.5212191794351084, | |
| "learning_rate": 3.900451434338062e-05, | |
| "loss": 0.5497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2794175446033478, | |
| "step": 300, | |
| "valid_targets_mean": 3049.4, | |
| "valid_targets_min": 1476 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.4494652664686231, | |
| "learning_rate": 3.893381595142511e-05, | |
| "loss": 0.5379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26482099294662476, | |
| "step": 305, | |
| "valid_targets_mean": 3424.4, | |
| "valid_targets_min": 1810 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.4473537869984785, | |
| "learning_rate": 3.886076105595825e-05, | |
| "loss": 0.537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24100354313850403, | |
| "step": 310, | |
| "valid_targets_mean": 2968.4, | |
| "valid_targets_min": 1734 | |
| }, | |
| { | |
| "epoch": 1.0064, | |
| "grad_norm": 0.49277910872631514, | |
| "learning_rate": 3.878535874939532e-05, | |
| "loss": 0.5475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2600662410259247, | |
| "step": 315, | |
| "valid_targets_mean": 2880.3, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 1.0224, | |
| "grad_norm": 0.4962924059392822, | |
| "learning_rate": 3.870761841631051e-05, | |
| "loss": 0.5325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2709118127822876, | |
| "step": 320, | |
| "valid_targets_mean": 3084.9, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 1.0384, | |
| "grad_norm": 0.4765894000972697, | |
| "learning_rate": 3.862754973226887e-05, | |
| "loss": 0.5105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2291407436132431, | |
| "step": 325, | |
| "valid_targets_mean": 2385.7, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 1.0544, | |
| "grad_norm": 0.44554031986434506, | |
| "learning_rate": 3.85451626626221e-05, | |
| "loss": 0.5333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25396525859832764, | |
| "step": 330, | |
| "valid_targets_mean": 3322.8, | |
| "valid_targets_min": 1730 | |
| }, | |
| { | |
| "epoch": 1.0704, | |
| "grad_norm": 0.4203337288517843, | |
| "learning_rate": 3.846046746126827e-05, | |
| "loss": 0.512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2684890627861023, | |
| "step": 335, | |
| "valid_targets_mean": 3894.3, | |
| "valid_targets_min": 1782 | |
| }, | |
| { | |
| "epoch": 1.0864, | |
| "grad_norm": 0.475098411041102, | |
| "learning_rate": 3.837347466937562e-05, | |
| "loss": 0.4937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24573341012001038, | |
| "step": 340, | |
| "valid_targets_mean": 3154.6, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 1.1024, | |
| "grad_norm": 0.5827683965056836, | |
| "learning_rate": 3.828419511407062e-05, | |
| "loss": 0.5493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2953265607357025, | |
| "step": 345, | |
| "valid_targets_mean": 2745.6, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 1.1184, | |
| "grad_norm": 0.5124538925155401, | |
| "learning_rate": 3.819263990709037e-05, | |
| "loss": 0.511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27043575048446655, | |
| "step": 350, | |
| "valid_targets_mean": 3445.8, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 1.1344, | |
| "grad_norm": 0.47607071622443325, | |
| "learning_rate": 3.809882044339971e-05, | |
| "loss": 0.5367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24829918146133423, | |
| "step": 355, | |
| "valid_targets_mean": 2760.6, | |
| "valid_targets_min": 1474 | |
| }, | |
| { | |
| "epoch": 1.1504, | |
| "grad_norm": 0.5891156574165097, | |
| "learning_rate": 3.800274839977293e-05, | |
| "loss": 0.5211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24416720867156982, | |
| "step": 360, | |
| "valid_targets_mean": 2987.4, | |
| "valid_targets_min": 1684 | |
| }, | |
| { | |
| "epoch": 1.1663999999999999, | |
| "grad_norm": 0.4383115726025833, | |
| "learning_rate": 3.790443573334055e-05, | |
| "loss": 0.5167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25110000371932983, | |
| "step": 365, | |
| "valid_targets_mean": 2802.4, | |
| "valid_targets_min": 1491 | |
| }, | |
| { | |
| "epoch": 1.1824, | |
| "grad_norm": 0.4659780007043588, | |
| "learning_rate": 3.780389468010106e-05, | |
| "loss": 0.5442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2537109851837158, | |
| "step": 370, | |
| "valid_targets_mean": 3175.0, | |
| "valid_targets_min": 1648 | |
| }, | |
| { | |
| "epoch": 1.1984, | |
| "grad_norm": 0.41971726808284665, | |
| "learning_rate": 3.7701137753398075e-05, | |
| "loss": 0.5322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2624451816082001, | |
| "step": 375, | |
| "valid_targets_mean": 4199.4, | |
| "valid_targets_min": 1896 | |
| }, | |
| { | |
| "epoch": 1.2144, | |
| "grad_norm": 0.45669985830428217, | |
| "learning_rate": 3.759617774236292e-05, | |
| "loss": 0.5159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25252872705459595, | |
| "step": 380, | |
| "valid_targets_mean": 3041.3, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.2304, | |
| "grad_norm": 0.5219353962349923, | |
| "learning_rate": 3.748902771032288e-05, | |
| "loss": 0.5244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2761985659599304, | |
| "step": 385, | |
| "valid_targets_mean": 3167.9, | |
| "valid_targets_min": 1403 | |
| }, | |
| { | |
| "epoch": 1.2464, | |
| "grad_norm": 0.4599125403809234, | |
| "learning_rate": 3.737970099317535e-05, | |
| "loss": 0.5291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25756001472473145, | |
| "step": 390, | |
| "valid_targets_mean": 3301.4, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 1.2624, | |
| "grad_norm": 0.5264391888700657, | |
| "learning_rate": 3.726821119772803e-05, | |
| "loss": 0.5153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25718802213668823, | |
| "step": 395, | |
| "valid_targets_mean": 2716.5, | |
| "valid_targets_min": 1755 | |
| }, | |
| { | |
| "epoch": 1.2784, | |
| "grad_norm": 0.5265175609805931, | |
| "learning_rate": 3.7154572200005446e-05, | |
| "loss": 0.513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2895033359527588, | |
| "step": 400, | |
| "valid_targets_mean": 2866.0, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 1.2944, | |
| "grad_norm": 0.519454299106432, | |
| "learning_rate": 3.703879814352193e-05, | |
| "loss": 0.5057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2757456600666046, | |
| "step": 405, | |
| "valid_targets_mean": 2979.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.3104, | |
| "grad_norm": 0.4815016307644221, | |
| "learning_rate": 3.6920903437521305e-05, | |
| "loss": 0.5227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2794150412082672, | |
| "step": 410, | |
| "valid_targets_mean": 3101.7, | |
| "valid_targets_min": 1352 | |
| }, | |
| { | |
| "epoch": 1.3264, | |
| "grad_norm": 0.5023423296160917, | |
| "learning_rate": 3.680090275518352e-05, | |
| "loss": 0.5242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29537904262542725, | |
| "step": 415, | |
| "valid_targets_mean": 2789.9, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 1.3424, | |
| "grad_norm": 0.4281900261017907, | |
| "learning_rate": 3.667881103179844e-05, | |
| "loss": 0.5068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23545397818088531, | |
| "step": 420, | |
| "valid_targets_mean": 3231.1, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 1.3584, | |
| "grad_norm": 0.43510100417753445, | |
| "learning_rate": 3.655464346290697e-05, | |
| "loss": 0.5173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26157039403915405, | |
| "step": 425, | |
| "valid_targets_mean": 3339.4, | |
| "valid_targets_min": 1406 | |
| }, | |
| { | |
| "epoch": 1.3744, | |
| "grad_norm": 0.4450583053213791, | |
| "learning_rate": 3.642841550240983e-05, | |
| "loss": 0.4996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654106020927429, | |
| "step": 430, | |
| "valid_targets_mean": 3275.6, | |
| "valid_targets_min": 1338 | |
| }, | |
| { | |
| "epoch": 1.3904, | |
| "grad_norm": 0.49405570189398096, | |
| "learning_rate": 3.630014286064419e-05, | |
| "loss": 0.5212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25802505016326904, | |
| "step": 435, | |
| "valid_targets_mean": 3087.0, | |
| "valid_targets_min": 1392 | |
| }, | |
| { | |
| "epoch": 1.4064, | |
| "grad_norm": 0.520492861665326, | |
| "learning_rate": 3.6169841502428285e-05, | |
| "loss": 0.5057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2416737824678421, | |
| "step": 440, | |
| "valid_targets_mean": 2791.6, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 1.4224, | |
| "grad_norm": 0.4559395788149373, | |
| "learning_rate": 3.603752764507454e-05, | |
| "loss": 0.5261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2236054390668869, | |
| "step": 445, | |
| "valid_targets_mean": 3045.8, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 1.4384000000000001, | |
| "grad_norm": 0.487958859838727, | |
| "learning_rate": 3.5903217756371066e-05, | |
| "loss": 0.5189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.256151407957077, | |
| "step": 450, | |
| "valid_targets_mean": 3211.8, | |
| "valid_targets_min": 1425 | |
| }, | |
| { | |
| "epoch": 1.4544000000000001, | |
| "grad_norm": 6.026861812432653, | |
| "learning_rate": 3.576692855253213e-05, | |
| "loss": 0.5059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.233452707529068, | |
| "step": 455, | |
| "valid_targets_mean": 3006.8, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 1.4704, | |
| "grad_norm": 0.44925860522978966, | |
| "learning_rate": 3.562867699611764e-05, | |
| "loss": 0.5316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26866137981414795, | |
| "step": 460, | |
| "valid_targets_mean": 3145.0, | |
| "valid_targets_min": 1974 | |
| }, | |
| { | |
| "epoch": 1.4864, | |
| "grad_norm": 0.43748519005117936, | |
| "learning_rate": 3.5488480293922e-05, | |
| "loss": 0.5127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2456567883491516, | |
| "step": 465, | |
| "valid_targets_mean": 3107.4, | |
| "valid_targets_min": 1723 | |
| }, | |
| { | |
| "epoch": 1.5024, | |
| "grad_norm": 0.5485416488660548, | |
| "learning_rate": 3.5346355894832515e-05, | |
| "loss": 0.5263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2427314817905426, | |
| "step": 470, | |
| "valid_targets_mean": 2881.4, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 1.5184, | |
| "grad_norm": 0.5492193241718348, | |
| "learning_rate": 3.520232148765774e-05, | |
| "loss": 0.5185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24047909677028656, | |
| "step": 475, | |
| "valid_targets_mean": 3304.2, | |
| "valid_targets_min": 628 | |
| }, | |
| { | |
| "epoch": 1.5344, | |
| "grad_norm": 0.509030601412652, | |
| "learning_rate": 3.505639499892591e-05, | |
| "loss": 0.5235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2926231026649475, | |
| "step": 480, | |
| "valid_targets_mean": 3097.2, | |
| "valid_targets_min": 1785 | |
| }, | |
| { | |
| "epoch": 1.5504, | |
| "grad_norm": 0.5556023004390762, | |
| "learning_rate": 3.490859459065382e-05, | |
| "loss": 0.4994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24574291706085205, | |
| "step": 485, | |
| "valid_targets_mean": 2863.5, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 1.5664, | |
| "grad_norm": 0.4355933250890639, | |
| "learning_rate": 3.475893865808633e-05, | |
| "loss": 0.5007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21669138967990875, | |
| "step": 490, | |
| "valid_targets_mean": 2690.6, | |
| "valid_targets_min": 880 | |
| }, | |
| { | |
| "epoch": 1.5824, | |
| "grad_norm": 0.5435140058892103, | |
| "learning_rate": 3.4607445827406984e-05, | |
| "loss": 0.5201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2567306160926819, | |
| "step": 495, | |
| "valid_targets_mean": 2844.2, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 1.5984, | |
| "grad_norm": 0.5099231583497758, | |
| "learning_rate": 3.445413495341971e-05, | |
| "loss": 0.514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.259105920791626, | |
| "step": 500, | |
| "valid_targets_mean": 3156.5, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 1.6143999999999998, | |
| "grad_norm": 0.4737224278917, | |
| "learning_rate": 3.429902511720216e-05, | |
| "loss": 0.5176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27495771646499634, | |
| "step": 505, | |
| "valid_targets_mean": 2925.5, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 1.6303999999999998, | |
| "grad_norm": 0.4904495643158072, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 0.5115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2774983048439026, | |
| "step": 510, | |
| "valid_targets_mean": 2726.0, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 1.6463999999999999, | |
| "grad_norm": 0.42906224936035436, | |
| "learning_rate": 3.398348599947888e-05, | |
| "loss": 0.4951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2206537127494812, | |
| "step": 515, | |
| "valid_targets_mean": 3334.7, | |
| "valid_targets_min": 1466 | |
| }, | |
| { | |
| "epoch": 1.6623999999999999, | |
| "grad_norm": 0.504508105224736, | |
| "learning_rate": 3.3823095989984697e-05, | |
| "loss": 0.53, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3056814670562744, | |
| "step": 520, | |
| "valid_targets_mean": 2991.4, | |
| "valid_targets_min": 1933 | |
| }, | |
| { | |
| "epoch": 1.6784, | |
| "grad_norm": 0.5351202359792724, | |
| "learning_rate": 3.366098555739557e-05, | |
| "loss": 0.5188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26875633001327515, | |
| "step": 525, | |
| "valid_targets_mean": 3094.2, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 1.6944, | |
| "grad_norm": 0.4573333991642385, | |
| "learning_rate": 3.349717487798261e-05, | |
| "loss": 0.5158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22667232155799866, | |
| "step": 530, | |
| "valid_targets_mean": 2724.5, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 1.7104, | |
| "grad_norm": 0.4661590895939061, | |
| "learning_rate": 3.3331684339629706e-05, | |
| "loss": 0.5164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2817540466785431, | |
| "step": 535, | |
| "valid_targets_mean": 3025.5, | |
| "valid_targets_min": 1911 | |
| }, | |
| { | |
| "epoch": 1.7264, | |
| "grad_norm": 0.5316604683020004, | |
| "learning_rate": 3.3164534539296056e-05, | |
| "loss": 0.4904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24950800836086273, | |
| "step": 540, | |
| "valid_targets_mean": 2558.2, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 1.7424, | |
| "grad_norm": 0.49226701109131127, | |
| "learning_rate": 3.299574628045269e-05, | |
| "loss": 0.5237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2845689654350281, | |
| "step": 545, | |
| "valid_targets_mean": 3183.1, | |
| "valid_targets_min": 1391 | |
| }, | |
| { | |
| "epoch": 1.7584, | |
| "grad_norm": 0.4935962099638135, | |
| "learning_rate": 3.282534057049322e-05, | |
| "loss": 0.5111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27177274227142334, | |
| "step": 550, | |
| "valid_targets_mean": 2734.2, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 1.7744, | |
| "grad_norm": 0.5019233751841419, | |
| "learning_rate": 3.265333861811933e-05, | |
| "loss": 0.5237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29612475633621216, | |
| "step": 555, | |
| "valid_targets_mean": 3097.1, | |
| "valid_targets_min": 2044 | |
| }, | |
| { | |
| "epoch": 1.7904, | |
| "grad_norm": 0.4577395132839231, | |
| "learning_rate": 3.2479761830701075e-05, | |
| "loss": 0.5109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2313130497932434, | |
| "step": 560, | |
| "valid_targets_mean": 2743.2, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 1.8064, | |
| "grad_norm": 0.46288866982137783, | |
| "learning_rate": 3.230463181161254e-05, | |
| "loss": 0.5006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2379888892173767, | |
| "step": 565, | |
| "valid_targets_mean": 3225.7, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 1.8224, | |
| "grad_norm": 0.5980963250881742, | |
| "learning_rate": 3.212797035754311e-05, | |
| "loss": 0.5095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23481547832489014, | |
| "step": 570, | |
| "valid_targets_mean": 2868.9, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 1.8384, | |
| "grad_norm": 0.45869991131611926, | |
| "learning_rate": 3.194979945578461e-05, | |
| "loss": 0.5157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22095608711242676, | |
| "step": 575, | |
| "valid_targets_mean": 2974.4, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 1.8544, | |
| "grad_norm": 0.4781367450330346, | |
| "learning_rate": 3.177014128149479e-05, | |
| "loss": 0.5269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2355835884809494, | |
| "step": 580, | |
| "valid_targets_mean": 2904.4, | |
| "valid_targets_min": 1593 | |
| }, | |
| { | |
| "epoch": 1.8704, | |
| "grad_norm": 0.5218205842284931, | |
| "learning_rate": 3.158901819493742e-05, | |
| "loss": 0.5335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30380451679229736, | |
| "step": 585, | |
| "valid_targets_mean": 2723.4, | |
| "valid_targets_min": 1488 | |
| }, | |
| { | |
| "epoch": 1.8864, | |
| "grad_norm": 0.49357175630318795, | |
| "learning_rate": 3.1406452738699284e-05, | |
| "loss": 0.5215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26988309621810913, | |
| "step": 590, | |
| "valid_targets_mean": 2617.8, | |
| "valid_targets_min": 1034 | |
| }, | |
| { | |
| "epoch": 1.9024, | |
| "grad_norm": 0.44827271739585833, | |
| "learning_rate": 3.122246763488457e-05, | |
| "loss": 0.4997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22802028059959412, | |
| "step": 595, | |
| "valid_targets_mean": 2838.8, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 1.9184, | |
| "grad_norm": 0.4632124014095409, | |
| "learning_rate": 3.103708578228686e-05, | |
| "loss": 0.4991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24168995022773743, | |
| "step": 600, | |
| "valid_targets_mean": 2863.8, | |
| "valid_targets_min": 1428 | |
| }, | |
| { | |
| "epoch": 1.9344000000000001, | |
| "grad_norm": 0.7371275123922867, | |
| "learning_rate": 3.085033025353915e-05, | |
| "loss": 0.5124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2760087251663208, | |
| "step": 605, | |
| "valid_targets_mean": 3405.6, | |
| "valid_targets_min": 2083 | |
| }, | |
| { | |
| "epoch": 1.9504000000000001, | |
| "grad_norm": 0.4756614197293939, | |
| "learning_rate": 3.066222429224221e-05, | |
| "loss": 0.5145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24378544092178345, | |
| "step": 610, | |
| "valid_targets_mean": 2794.1, | |
| "valid_targets_min": 1296 | |
| }, | |
| { | |
| "epoch": 1.9664000000000001, | |
| "grad_norm": 0.42778554593881357, | |
| "learning_rate": 3.047279131007173e-05, | |
| "loss": 0.5025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2605063021183014, | |
| "step": 615, | |
| "valid_targets_mean": 3072.7, | |
| "valid_targets_min": 1641 | |
| }, | |
| { | |
| "epoch": 1.9824000000000002, | |
| "grad_norm": 0.48736282270910075, | |
| "learning_rate": 3.0282054883864434e-05, | |
| "loss": 0.4881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23605525493621826, | |
| "step": 620, | |
| "valid_targets_mean": 3135.9, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 1.9984, | |
| "grad_norm": 0.4800464610927692, | |
| "learning_rate": 3.009003875268379e-05, | |
| "loss": 0.4943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2850508391857147, | |
| "step": 625, | |
| "valid_targets_mean": 3301.7, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 2.0128, | |
| "grad_norm": 0.5007940198017256, | |
| "learning_rate": 2.9896766814865355e-05, | |
| "loss": 0.4868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23529338836669922, | |
| "step": 630, | |
| "valid_targets_mean": 2643.2, | |
| "valid_targets_min": 1284 | |
| }, | |
| { | |
| "epoch": 2.0288, | |
| "grad_norm": 0.5157074650090006, | |
| "learning_rate": 2.970226312504246e-05, | |
| "loss": 0.4948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27678927779197693, | |
| "step": 635, | |
| "valid_targets_mean": 2965.7, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 2.0448, | |
| "grad_norm": 0.5101562609902904, | |
| "learning_rate": 2.9506551891152334e-05, | |
| "loss": 0.4968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25116124749183655, | |
| "step": 640, | |
| "valid_targets_mean": 2771.2, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 2.0608, | |
| "grad_norm": 0.4529563499288533, | |
| "learning_rate": 2.930965747142319e-05, | |
| "loss": 0.4813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20527660846710205, | |
| "step": 645, | |
| "valid_targets_mean": 2966.9, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 2.0768, | |
| "grad_norm": 0.45826327058683025, | |
| "learning_rate": 2.9111604371342593e-05, | |
| "loss": 0.4964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22757063806056976, | |
| "step": 650, | |
| "valid_targets_mean": 2937.9, | |
| "valid_targets_min": 1403 | |
| }, | |
| { | |
| "epoch": 2.0928, | |
| "grad_norm": 0.4572314921940725, | |
| "learning_rate": 2.891241724060752e-05, | |
| "loss": 0.4649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24460506439208984, | |
| "step": 655, | |
| "valid_targets_mean": 3235.1, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 2.1088, | |
| "grad_norm": 0.5057949551121218, | |
| "learning_rate": 2.8712120870056455e-05, | |
| "loss": 0.5008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27103084325790405, | |
| "step": 660, | |
| "valid_targets_mean": 2827.2, | |
| "valid_targets_min": 1838 | |
| }, | |
| { | |
| "epoch": 2.1248, | |
| "grad_norm": 0.434456854498136, | |
| "learning_rate": 2.851074018858389e-05, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23950263857841492, | |
| "step": 665, | |
| "valid_targets_mean": 3572.2, | |
| "valid_targets_min": 1233 | |
| }, | |
| { | |
| "epoch": 2.1408, | |
| "grad_norm": 0.48614035866392213, | |
| "learning_rate": 2.8308300260037734e-05, | |
| "loss": 0.5032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23101502656936646, | |
| "step": 670, | |
| "valid_targets_mean": 2830.4, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 2.1568, | |
| "grad_norm": 0.44626305596576143, | |
| "learning_rate": 2.8104826280099796e-05, | |
| "loss": 0.4932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25110024213790894, | |
| "step": 675, | |
| "valid_targets_mean": 3202.5, | |
| "valid_targets_min": 1848 | |
| }, | |
| { | |
| "epoch": 2.1728, | |
| "grad_norm": 0.4472844696645451, | |
| "learning_rate": 2.7900343573150003e-05, | |
| "loss": 0.486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24017497897148132, | |
| "step": 680, | |
| "valid_targets_mean": 3204.7, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 2.1888, | |
| "grad_norm": 0.5343225728081022, | |
| "learning_rate": 2.7694877589114442e-05, | |
| "loss": 0.4923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24907417595386505, | |
| "step": 685, | |
| "valid_targets_mean": 2942.4, | |
| "valid_targets_min": 1725 | |
| }, | |
| { | |
| "epoch": 2.2048, | |
| "grad_norm": 0.47468755794982304, | |
| "learning_rate": 2.748845390029794e-05, | |
| "loss": 0.4857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22810614109039307, | |
| "step": 690, | |
| "valid_targets_mean": 2807.8, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 2.2208, | |
| "grad_norm": 0.4826995803470774, | |
| "learning_rate": 2.728109819820129e-05, | |
| "loss": 0.5068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2800004780292511, | |
| "step": 695, | |
| "valid_targets_mean": 3410.8, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 2.2368, | |
| "grad_norm": 0.4592512476126212, | |
| "learning_rate": 2.7072836290323698e-05, | |
| "loss": 0.4836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2372513711452484, | |
| "step": 700, | |
| "valid_targets_mean": 3405.4, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 2.2528, | |
| "grad_norm": 0.4123298917676354, | |
| "learning_rate": 2.6863694096950763e-05, | |
| "loss": 0.4604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2027531862258911, | |
| "step": 705, | |
| "valid_targets_mean": 3237.2, | |
| "valid_targets_min": 1600 | |
| }, | |
| { | |
| "epoch": 2.2688, | |
| "grad_norm": 0.4543061236567951, | |
| "learning_rate": 2.6653697647928485e-05, | |
| "loss": 0.4926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2540218234062195, | |
| "step": 710, | |
| "valid_targets_mean": 2959.7, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 2.2848, | |
| "grad_norm": 0.5076008394558799, | |
| "learning_rate": 2.644287307942352e-05, | |
| "loss": 0.4793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24240398406982422, | |
| "step": 715, | |
| "valid_targets_mean": 2681.1, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 2.3008, | |
| "grad_norm": 0.4089060269005043, | |
| "learning_rate": 2.623124663067034e-05, | |
| "loss": 0.4856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23559370636940002, | |
| "step": 720, | |
| "valid_targets_mean": 3506.2, | |
| "valid_targets_min": 1550 | |
| }, | |
| { | |
| "epoch": 2.3168, | |
| "grad_norm": 0.4551212609304956, | |
| "learning_rate": 2.6018844640705448e-05, | |
| "loss": 0.4827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23695677518844604, | |
| "step": 725, | |
| "valid_targets_mean": 3120.4, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 2.3327999999999998, | |
| "grad_norm": 0.4841751536620542, | |
| "learning_rate": 2.580569354508925e-05, | |
| "loss": 0.4825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20382042229175568, | |
| "step": 730, | |
| "valid_targets_mean": 2650.9, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 2.3487999999999998, | |
| "grad_norm": 0.4581740603290819, | |
| "learning_rate": 2.5591819872615856e-05, | |
| "loss": 0.4635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23988109827041626, | |
| "step": 735, | |
| "valid_targets_mean": 2943.5, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 2.3648, | |
| "grad_norm": 0.4378179137522697, | |
| "learning_rate": 2.5377250242011338e-05, | |
| "loss": 0.488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2582421898841858, | |
| "step": 740, | |
| "valid_targets_mean": 3500.6, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 2.3808, | |
| "grad_norm": 0.47224573930300273, | |
| "learning_rate": 2.516201135862073e-05, | |
| "loss": 0.4754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22435212135314941, | |
| "step": 745, | |
| "valid_targets_mean": 2937.9, | |
| "valid_targets_min": 1579 | |
| }, | |
| { | |
| "epoch": 2.3968, | |
| "grad_norm": 0.49031185792544935, | |
| "learning_rate": 2.494613001108431e-05, | |
| "loss": 0.4928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25457414984703064, | |
| "step": 750, | |
| "valid_targets_mean": 3221.4, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 2.4128, | |
| "grad_norm": 0.5242476263518132, | |
| "learning_rate": 2.4729633068003466e-05, | |
| "loss": 0.4789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24022498726844788, | |
| "step": 755, | |
| "valid_targets_mean": 2896.2, | |
| "valid_targets_min": 1320 | |
| }, | |
| { | |
| "epoch": 2.4288, | |
| "grad_norm": 0.4866222176129818, | |
| "learning_rate": 2.4512547474596624e-05, | |
| "loss": 0.5092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2546798288822174, | |
| "step": 760, | |
| "valid_targets_mean": 3466.6, | |
| "valid_targets_min": 1631 | |
| }, | |
| { | |
| "epoch": 2.4448, | |
| "grad_norm": 0.5137377843083281, | |
| "learning_rate": 2.429490024934566e-05, | |
| "loss": 0.4808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2947126030921936, | |
| "step": 765, | |
| "valid_targets_mean": 2776.8, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 2.4608, | |
| "grad_norm": 0.4869533055861981, | |
| "learning_rate": 2.4076718480633178e-05, | |
| "loss": 0.4933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20823511481285095, | |
| "step": 770, | |
| "valid_targets_mean": 2971.9, | |
| "valid_targets_min": 1278 | |
| }, | |
| { | |
| "epoch": 2.4768, | |
| "grad_norm": 0.45539327422923814, | |
| "learning_rate": 2.3858029323371067e-05, | |
| "loss": 0.5015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20980717241764069, | |
| "step": 775, | |
| "valid_targets_mean": 2973.5, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 2.4928, | |
| "grad_norm": 0.5287411648228943, | |
| "learning_rate": 2.363885999562084e-05, | |
| "loss": 0.4865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23313599824905396, | |
| "step": 780, | |
| "valid_targets_mean": 3004.2, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 2.5088, | |
| "grad_norm": 0.4543483221665911, | |
| "learning_rate": 2.3419237775206026e-05, | |
| "loss": 0.5005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28633490204811096, | |
| "step": 785, | |
| "valid_targets_mean": 3227.5, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 2.5248, | |
| "grad_norm": 0.4651724232555382, | |
| "learning_rate": 2.3199189996317205e-05, | |
| "loss": 0.487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24572448432445526, | |
| "step": 790, | |
| "valid_targets_mean": 2907.9, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 2.5408, | |
| "grad_norm": 0.49088060050512494, | |
| "learning_rate": 2.297874404610998e-05, | |
| "loss": 0.4848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24726298451423645, | |
| "step": 795, | |
| "valid_targets_mean": 2909.0, | |
| "valid_targets_min": 1397 | |
| }, | |
| { | |
| "epoch": 2.5568, | |
| "grad_norm": 0.4412421616860558, | |
| "learning_rate": 2.2757927361296376e-05, | |
| "loss": 0.5024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2554225027561188, | |
| "step": 800, | |
| "valid_targets_mean": 3597.6, | |
| "valid_targets_min": 1393 | |
| }, | |
| { | |
| "epoch": 2.5728, | |
| "grad_norm": 0.4511584722794911, | |
| "learning_rate": 2.2536767424730052e-05, | |
| "loss": 0.4783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25293684005737305, | |
| "step": 805, | |
| "valid_targets_mean": 3084.6, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 2.5888, | |
| "grad_norm": 0.504484886615669, | |
| "learning_rate": 2.2315291761985803e-05, | |
| "loss": 0.4975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23480279743671417, | |
| "step": 810, | |
| "valid_targets_mean": 3016.9, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 2.6048, | |
| "grad_norm": 0.49526946333824134, | |
| "learning_rate": 2.2093527937933716e-05, | |
| "loss": 0.4726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2735830545425415, | |
| "step": 815, | |
| "valid_targets_mean": 3600.4, | |
| "valid_targets_min": 1429 | |
| }, | |
| { | |
| "epoch": 2.6208, | |
| "grad_norm": 0.49005069499970527, | |
| "learning_rate": 2.1871503553308447e-05, | |
| "loss": 0.5052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24496214091777802, | |
| "step": 820, | |
| "valid_targets_mean": 2981.4, | |
| "valid_targets_min": 1634 | |
| }, | |
| { | |
| "epoch": 2.6368, | |
| "grad_norm": 0.4596594761364903, | |
| "learning_rate": 2.164924624127403e-05, | |
| "loss": 0.5024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25404804944992065, | |
| "step": 825, | |
| "valid_targets_mean": 3347.8, | |
| "valid_targets_min": 1674 | |
| }, | |
| { | |
| "epoch": 2.6528, | |
| "grad_norm": 0.46699363461367677, | |
| "learning_rate": 2.1426783663984648e-05, | |
| "loss": 0.4922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24780279397964478, | |
| "step": 830, | |
| "valid_targets_mean": 3289.5, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 2.6688, | |
| "grad_norm": 0.4530913646330165, | |
| "learning_rate": 2.1204143509141818e-05, | |
| "loss": 0.4895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23123887181282043, | |
| "step": 835, | |
| "valid_targets_mean": 3102.1, | |
| "valid_targets_min": 1058 | |
| }, | |
| { | |
| "epoch": 2.6848, | |
| "grad_norm": 0.48488083478979, | |
| "learning_rate": 2.0981353486548363e-05, | |
| "loss": 0.5005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29003238677978516, | |
| "step": 840, | |
| "valid_targets_mean": 3341.6, | |
| "valid_targets_min": 2051 | |
| }, | |
| { | |
| "epoch": 2.7008, | |
| "grad_norm": 0.50927260798858, | |
| "learning_rate": 2.075844132465964e-05, | |
| "loss": 0.4862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.262538343667984, | |
| "step": 845, | |
| "valid_targets_mean": 3244.6, | |
| "valid_targets_min": 1111 | |
| }, | |
| { | |
| "epoch": 2.7168, | |
| "grad_norm": 0.483799594883633, | |
| "learning_rate": 2.0535434767132495e-05, | |
| "loss": 0.4741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24056734144687653, | |
| "step": 850, | |
| "valid_targets_mean": 2681.6, | |
| "valid_targets_min": 1340 | |
| }, | |
| { | |
| "epoch": 2.7328, | |
| "grad_norm": 0.4614773306975341, | |
| "learning_rate": 2.0312361569372215e-05, | |
| "loss": 0.4723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24297058582305908, | |
| "step": 855, | |
| "valid_targets_mean": 3447.4, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 2.7488, | |
| "grad_norm": 0.47344331595599315, | |
| "learning_rate": 2.0089249495078186e-05, | |
| "loss": 0.4977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2512054443359375, | |
| "step": 860, | |
| "valid_targets_mean": 2892.4, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 2.7648, | |
| "grad_norm": 0.4312360053420849, | |
| "learning_rate": 1.9866126312788333e-05, | |
| "loss": 0.4805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2499227225780487, | |
| "step": 865, | |
| "valid_targets_mean": 3342.8, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 2.7808, | |
| "grad_norm": 0.4804917083313915, | |
| "learning_rate": 1.964301979242308e-05, | |
| "loss": 0.488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23908847570419312, | |
| "step": 870, | |
| "valid_targets_mean": 2947.2, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 2.7968, | |
| "grad_norm": 0.8762633926530743, | |
| "learning_rate": 1.9419957701829138e-05, | |
| "loss": 0.485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2324860394001007, | |
| "step": 875, | |
| "valid_targets_mean": 3017.0, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 2.8128, | |
| "grad_norm": 0.4556434340662145, | |
| "learning_rate": 1.9196967803323464e-05, | |
| "loss": 0.4974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23595423996448517, | |
| "step": 880, | |
| "valid_targets_mean": 3213.3, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 2.8288, | |
| "grad_norm": 0.4393572955542803, | |
| "learning_rate": 1.8974077850237983e-05, | |
| "loss": 0.4754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2087182253599167, | |
| "step": 885, | |
| "valid_targets_mean": 2638.4, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 2.8448, | |
| "grad_norm": 0.46271408791167346, | |
| "learning_rate": 1.875131558346542e-05, | |
| "loss": 0.485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23028402030467987, | |
| "step": 890, | |
| "valid_targets_mean": 3273.6, | |
| "valid_targets_min": 1544 | |
| }, | |
| { | |
| "epoch": 2.8608000000000002, | |
| "grad_norm": 0.4513553889559316, | |
| "learning_rate": 1.8528708728006654e-05, | |
| "loss": 0.4693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21357670426368713, | |
| "step": 895, | |
| "valid_targets_mean": 2945.9, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 2.8768000000000002, | |
| "grad_norm": 0.4397598056676539, | |
| "learning_rate": 1.8306284989520055e-05, | |
| "loss": 0.4913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2715904116630554, | |
| "step": 900, | |
| "valid_targets_mean": 3446.7, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 2.8928000000000003, | |
| "grad_norm": 0.4842128353083839, | |
| "learning_rate": 1.8084072050873265e-05, | |
| "loss": 0.4821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22894605994224548, | |
| "step": 905, | |
| "valid_targets_mean": 2792.0, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 2.9088000000000003, | |
| "grad_norm": 0.46238383721407217, | |
| "learning_rate": 1.786209756869775e-05, | |
| "loss": 0.4701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544752359390259, | |
| "step": 910, | |
| "valid_targets_mean": 3167.1, | |
| "valid_targets_min": 2189 | |
| }, | |
| { | |
| "epoch": 2.9248, | |
| "grad_norm": 0.47372661840165786, | |
| "learning_rate": 1.764038916994669e-05, | |
| "loss": 0.4818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2640783488750458, | |
| "step": 915, | |
| "valid_targets_mean": 3044.8, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 2.9408, | |
| "grad_norm": 0.6128265221061115, | |
| "learning_rate": 1.741897444845649e-05, | |
| "loss": 0.4837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27274250984191895, | |
| "step": 920, | |
| "valid_targets_mean": 3361.2, | |
| "valid_targets_min": 1932 | |
| }, | |
| { | |
| "epoch": 2.9568, | |
| "grad_norm": 0.4542712376697493, | |
| "learning_rate": 1.7197880961512498e-05, | |
| "loss": 0.4857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25496718287467957, | |
| "step": 925, | |
| "valid_targets_mean": 2903.7, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 2.9728, | |
| "grad_norm": 0.4615603875036676, | |
| "learning_rate": 1.6977136226419187e-05, | |
| "loss": 0.4896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545563578605652, | |
| "step": 930, | |
| "valid_targets_mean": 3328.4, | |
| "valid_targets_min": 1912 | |
| }, | |
| { | |
| "epoch": 2.9888, | |
| "grad_norm": 0.4329963191374008, | |
| "learning_rate": 1.6756767717075354e-05, | |
| "loss": 0.4827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2186550498008728, | |
| "step": 935, | |
| "valid_targets_mean": 3280.2, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 3.0032, | |
| "grad_norm": 0.4275403372883427, | |
| "learning_rate": 1.6536802860554723e-05, | |
| "loss": 0.4587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2611745595932007, | |
| "step": 940, | |
| "valid_targets_mean": 3092.6, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 3.0192, | |
| "grad_norm": 0.450471108392735, | |
| "learning_rate": 1.631726903369238e-05, | |
| "loss": 0.4665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22990155220031738, | |
| "step": 945, | |
| "valid_targets_mean": 2913.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 3.0352, | |
| "grad_norm": 0.5160073223042104, | |
| "learning_rate": 1.609819355967744e-05, | |
| "loss": 0.4673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2828558087348938, | |
| "step": 950, | |
| "valid_targets_mean": 3116.0, | |
| "valid_targets_min": 1473 | |
| }, | |
| { | |
| "epoch": 3.0512, | |
| "grad_norm": 0.49174113573385936, | |
| "learning_rate": 1.587960370465239e-05, | |
| "loss": 0.469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2625318467617035, | |
| "step": 955, | |
| "valid_targets_mean": 2854.3, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 3.0672, | |
| "grad_norm": 0.45141979426038364, | |
| "learning_rate": 1.5661526674319582e-05, | |
| "loss": 0.46, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2091599404811859, | |
| "step": 960, | |
| "valid_targets_mean": 3043.4, | |
| "valid_targets_min": 1375 | |
| }, | |
| { | |
| "epoch": 3.0832, | |
| "grad_norm": 0.43282987334993606, | |
| "learning_rate": 1.544398961055516e-05, | |
| "loss": 0.4575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22479215264320374, | |
| "step": 965, | |
| "valid_targets_mean": 3451.6, | |
| "valid_targets_min": 1395 | |
| }, | |
| { | |
| "epoch": 3.0992, | |
| "grad_norm": 0.4509947466200453, | |
| "learning_rate": 1.5227019588031035e-05, | |
| "loss": 0.4436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19393005967140198, | |
| "step": 970, | |
| "valid_targets_mean": 3079.7, | |
| "valid_targets_min": 1488 | |
| }, | |
| { | |
| "epoch": 3.1152, | |
| "grad_norm": 0.5160194179383659, | |
| "learning_rate": 1.501064361084511e-05, | |
| "loss": 0.4778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2354956418275833, | |
| "step": 975, | |
| "valid_targets_mean": 2606.4, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 3.1312, | |
| "grad_norm": 0.49691820011535437, | |
| "learning_rate": 1.47948886091604e-05, | |
| "loss": 0.4802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24869810044765472, | |
| "step": 980, | |
| "valid_targets_mean": 2802.9, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 3.1471999999999998, | |
| "grad_norm": 0.5229876254819794, | |
| "learning_rate": 1.4579781435853289e-05, | |
| "loss": 0.4973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2780328392982483, | |
| "step": 985, | |
| "valid_targets_mean": 3310.3, | |
| "valid_targets_min": 1977 | |
| }, | |
| { | |
| "epoch": 3.1632, | |
| "grad_norm": 0.4757032753580724, | |
| "learning_rate": 1.4365348863171406e-05, | |
| "loss": 0.4749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23341715335845947, | |
| "step": 990, | |
| "valid_targets_mean": 2850.2, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 3.1792, | |
| "grad_norm": 0.49397855207266117, | |
| "learning_rate": 1.4151617579401551e-05, | |
| "loss": 0.4664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2610412538051605, | |
| "step": 995, | |
| "valid_targets_mean": 3235.2, | |
| "valid_targets_min": 1271 | |
| }, | |
| { | |
| "epoch": 3.1952, | |
| "grad_norm": 0.5634169766980279, | |
| "learning_rate": 1.3938614185548094e-05, | |
| "loss": 0.4764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24418114125728607, | |
| "step": 1000, | |
| "valid_targets_mean": 2622.4, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 3.2112, | |
| "grad_norm": 0.43284546857851314, | |
| "learning_rate": 1.3726365192022173e-05, | |
| "loss": 0.4714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20849572122097015, | |
| "step": 1005, | |
| "valid_targets_mean": 2964.6, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 3.2272, | |
| "grad_norm": 0.4425874996893573, | |
| "learning_rate": 1.3514897015342257e-05, | |
| "loss": 0.4613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21642732620239258, | |
| "step": 1010, | |
| "valid_targets_mean": 2798.9, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 3.2432, | |
| "grad_norm": 0.520474503449209, | |
| "learning_rate": 1.3304235974846295e-05, | |
| "loss": 0.4632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.258159875869751, | |
| "step": 1015, | |
| "valid_targets_mean": 2868.6, | |
| "valid_targets_min": 1428 | |
| }, | |
| { | |
| "epoch": 3.2592, | |
| "grad_norm": 0.5144656747497975, | |
| "learning_rate": 1.3094408289416052e-05, | |
| "loss": 0.4683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635853886604309, | |
| "step": 1020, | |
| "valid_targets_mean": 3025.1, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 3.2752, | |
| "grad_norm": 0.45505799007822667, | |
| "learning_rate": 1.2885440074213877e-05, | |
| "loss": 0.4732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22156450152397156, | |
| "step": 1025, | |
| "valid_targets_mean": 3054.5, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 3.2912, | |
| "grad_norm": 0.49329116427210845, | |
| "learning_rate": 1.267735733743242e-05, | |
| "loss": 0.4789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545117437839508, | |
| "step": 1030, | |
| "valid_targets_mean": 2909.3, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 3.3072, | |
| "grad_norm": 0.46073569463876607, | |
| "learning_rate": 1.2470185977057643e-05, | |
| "loss": 0.4565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23767399787902832, | |
| "step": 1035, | |
| "valid_targets_mean": 3542.4, | |
| "valid_targets_min": 1528 | |
| }, | |
| { | |
| "epoch": 3.3232, | |
| "grad_norm": 1.1380968918785033, | |
| "learning_rate": 1.2263951777645588e-05, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22722697257995605, | |
| "step": 1040, | |
| "valid_targets_mean": 2743.5, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 3.3392, | |
| "grad_norm": 0.5152322146587375, | |
| "learning_rate": 1.2058680407113176e-05, | |
| "loss": 0.4919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26898497343063354, | |
| "step": 1045, | |
| "valid_targets_mean": 3478.0, | |
| "valid_targets_min": 2344 | |
| }, | |
| { | |
| "epoch": 3.3552, | |
| "grad_norm": 0.4689141874042101, | |
| "learning_rate": 1.1854397413543626e-05, | |
| "loss": 0.4404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19132088124752045, | |
| "step": 1050, | |
| "valid_targets_mean": 2550.5, | |
| "valid_targets_min": 641 | |
| }, | |
| { | |
| "epoch": 3.3712, | |
| "grad_norm": 0.4569028553905679, | |
| "learning_rate": 1.1651128222006713e-05, | |
| "loss": 0.4689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20203140377998352, | |
| "step": 1055, | |
| "valid_targets_mean": 3417.3, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 3.3872, | |
| "grad_norm": 0.4386665491317407, | |
| "learning_rate": 1.1448898131394364e-05, | |
| "loss": 0.4599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2558937072753906, | |
| "step": 1060, | |
| "valid_targets_mean": 3618.4, | |
| "valid_targets_min": 1132 | |
| }, | |
| { | |
| "epoch": 3.4032, | |
| "grad_norm": 0.4857080547943075, | |
| "learning_rate": 1.124773231127196e-05, | |
| "loss": 0.4735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22912126779556274, | |
| "step": 1065, | |
| "valid_targets_mean": 2826.2, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 3.4192, | |
| "grad_norm": 0.7075154349707183, | |
| "learning_rate": 1.1047655798745752e-05, | |
| "loss": 0.4616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23784030973911285, | |
| "step": 1070, | |
| "valid_targets_mean": 3468.4, | |
| "valid_targets_min": 1545 | |
| }, | |
| { | |
| "epoch": 3.4352, | |
| "grad_norm": 0.4741688779214942, | |
| "learning_rate": 1.084869349534671e-05, | |
| "loss": 0.464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2626760005950928, | |
| "step": 1075, | |
| "valid_targets_mean": 3524.6, | |
| "valid_targets_min": 1213 | |
| }, | |
| { | |
| "epoch": 3.4512, | |
| "grad_norm": 0.4874858260927396, | |
| "learning_rate": 1.0650870163931275e-05, | |
| "loss": 0.4724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20847836136817932, | |
| "step": 1080, | |
| "valid_targets_mean": 2429.3, | |
| "valid_targets_min": 1342 | |
| }, | |
| { | |
| "epoch": 3.4672, | |
| "grad_norm": 0.4876869164388468, | |
| "learning_rate": 1.0454210425599426e-05, | |
| "loss": 0.4674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24478471279144287, | |
| "step": 1085, | |
| "valid_targets_mean": 2773.8, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 3.4832, | |
| "grad_norm": 0.4752134862059384, | |
| "learning_rate": 1.0258738756630255e-05, | |
| "loss": 0.4595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23344089090824127, | |
| "step": 1090, | |
| "valid_targets_mean": 2999.8, | |
| "valid_targets_min": 1357 | |
| }, | |
| { | |
| "epoch": 3.4992, | |
| "grad_norm": 0.44670646778927636, | |
| "learning_rate": 1.0064479485435737e-05, | |
| "loss": 0.4726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19428589940071106, | |
| "step": 1095, | |
| "valid_targets_mean": 2776.6, | |
| "valid_targets_min": 1594 | |
| }, | |
| { | |
| "epoch": 3.5152, | |
| "grad_norm": 0.4732489224718485, | |
| "learning_rate": 9.871456789532736e-06, | |
| "loss": 0.4733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22874388098716736, | |
| "step": 1100, | |
| "valid_targets_mean": 2985.3, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 3.5312, | |
| "grad_norm": 0.4507617362751026, | |
| "learning_rate": 9.679694692533909e-06, | |
| "loss": 0.4557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24319292604923248, | |
| "step": 1105, | |
| "valid_targets_mean": 3500.3, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 3.5472, | |
| "grad_norm": 0.47845324805606443, | |
| "learning_rate": 9.489217061157744e-06, | |
| "loss": 0.4747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22654272615909576, | |
| "step": 1110, | |
| "valid_targets_mean": 3058.6, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 3.5632, | |
| "grad_norm": 0.44930562581242833, | |
| "learning_rate": 9.30004760225806e-06, | |
| "loss": 0.4635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21433737874031067, | |
| "step": 1115, | |
| "valid_targets_mean": 2892.0, | |
| "valid_targets_min": 1652 | |
| }, | |
| { | |
| "epoch": 3.5792, | |
| "grad_norm": 0.46051845797382324, | |
| "learning_rate": 9.112209859873479e-06, | |
| "loss": 0.4767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23646137118339539, | |
| "step": 1120, | |
| "valid_targets_mean": 3047.9, | |
| "valid_targets_min": 1204 | |
| }, | |
| { | |
| "epoch": 3.5952, | |
| "grad_norm": 0.518040425573914, | |
| "learning_rate": 8.925727212297154e-06, | |
| "loss": 0.471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24463368952274323, | |
| "step": 1125, | |
| "valid_targets_mean": 2889.5, | |
| "valid_targets_min": 1435 | |
| }, | |
| { | |
| "epoch": 3.6112, | |
| "grad_norm": 0.5282336939379751, | |
| "learning_rate": 8.74062286916705e-06, | |
| "loss": 0.4557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23317721486091614, | |
| "step": 1130, | |
| "valid_targets_mean": 2804.1, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 3.6272, | |
| "grad_norm": 0.42473984542421317, | |
| "learning_rate": 8.55691986857733e-06, | |
| "loss": 0.457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23406881093978882, | |
| "step": 1135, | |
| "valid_targets_mean": 3556.3, | |
| "valid_targets_min": 1793 | |
| }, | |
| { | |
| "epoch": 3.6432, | |
| "grad_norm": 0.4763111625783207, | |
| "learning_rate": 8.374641074210979e-06, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2574063539505005, | |
| "step": 1140, | |
| "valid_targets_mean": 2914.0, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 3.6592000000000002, | |
| "grad_norm": 0.4627805208631961, | |
| "learning_rate": 8.193809172494249e-06, | |
| "loss": 0.4642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21742022037506104, | |
| "step": 1145, | |
| "valid_targets_mean": 3212.3, | |
| "valid_targets_min": 1433 | |
| }, | |
| { | |
| "epoch": 3.6752000000000002, | |
| "grad_norm": 0.4583153261416385, | |
| "learning_rate": 8.014446669773061e-06, | |
| "loss": 0.4541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22402873635292053, | |
| "step": 1150, | |
| "valid_targets_mean": 3207.1, | |
| "valid_targets_min": 2009 | |
| }, | |
| { | |
| "epoch": 3.6912000000000003, | |
| "grad_norm": 0.5684605679181882, | |
| "learning_rate": 7.83657588951187e-06, | |
| "loss": 0.4922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22889772057533264, | |
| "step": 1155, | |
| "valid_targets_mean": 2417.2, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 3.7072000000000003, | |
| "grad_norm": 0.48549138988607504, | |
| "learning_rate": 7.66021896951529e-06, | |
| "loss": 0.4582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22643694281578064, | |
| "step": 1160, | |
| "valid_targets_mean": 2842.7, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 3.7232, | |
| "grad_norm": 0.48999301596541023, | |
| "learning_rate": 7.485397859172841e-06, | |
| "loss": 0.4819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24150823056697845, | |
| "step": 1165, | |
| "valid_targets_mean": 2834.5, | |
| "valid_targets_min": 1642 | |
| }, | |
| { | |
| "epoch": 3.7392, | |
| "grad_norm": 0.5234114662032995, | |
| "learning_rate": 7.312134316727093e-06, | |
| "loss": 0.4681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23145684599876404, | |
| "step": 1170, | |
| "valid_targets_mean": 2914.3, | |
| "valid_targets_min": 1406 | |
| }, | |
| { | |
| "epoch": 3.7552, | |
| "grad_norm": 0.48976591667376895, | |
| "learning_rate": 7.140449906565656e-06, | |
| "loss": 0.4681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2297079712152481, | |
| "step": 1175, | |
| "valid_targets_mean": 3307.9, | |
| "valid_targets_min": 1882 | |
| }, | |
| { | |
| "epoch": 3.7712, | |
| "grad_norm": 0.4975298653538736, | |
| "learning_rate": 6.970365996537285e-06, | |
| "loss": 0.4611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22569899260997772, | |
| "step": 1180, | |
| "valid_targets_mean": 2788.9, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 3.7872, | |
| "grad_norm": 0.48902216227735895, | |
| "learning_rate": 6.801903755292403e-06, | |
| "loss": 0.4712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.241786390542984, | |
| "step": 1185, | |
| "valid_targets_mean": 3153.2, | |
| "valid_targets_min": 2123 | |
| }, | |
| { | |
| "epoch": 3.8032, | |
| "grad_norm": 0.5098309755198677, | |
| "learning_rate": 6.635084149648481e-06, | |
| "loss": 0.4777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2331969290971756, | |
| "step": 1190, | |
| "valid_targets_mean": 3060.3, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 3.8192, | |
| "grad_norm": 0.5079088842619889, | |
| "learning_rate": 6.469927941980483e-06, | |
| "loss": 0.4689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1932951956987381, | |
| "step": 1195, | |
| "valid_targets_mean": 2451.8, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 3.8352, | |
| "grad_norm": 0.51113694581853, | |
| "learning_rate": 6.30645568763681e-06, | |
| "loss": 0.463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24860982596874237, | |
| "step": 1200, | |
| "valid_targets_mean": 3285.6, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 3.8512, | |
| "grad_norm": 0.5202062021434569, | |
| "learning_rate": 6.144687732380963e-06, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23511703312397003, | |
| "step": 1205, | |
| "valid_targets_mean": 2735.3, | |
| "valid_targets_min": 1513 | |
| }, | |
| { | |
| "epoch": 3.8672, | |
| "grad_norm": 0.46415649801108116, | |
| "learning_rate": 5.9846442098592895e-06, | |
| "loss": 0.4604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24313569068908691, | |
| "step": 1210, | |
| "valid_targets_mean": 2886.9, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 3.8832, | |
| "grad_norm": 0.48843310938787426, | |
| "learning_rate": 5.826345039095178e-06, | |
| "loss": 0.4821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2138245403766632, | |
| "step": 1215, | |
| "valid_targets_mean": 2952.6, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 3.8992, | |
| "grad_norm": 0.45187325642880627, | |
| "learning_rate": 5.669809922009937e-06, | |
| "loss": 0.4893, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21185560524463654, | |
| "step": 1220, | |
| "valid_targets_mean": 3259.9, | |
| "valid_targets_min": 1767 | |
| }, | |
| { | |
| "epoch": 3.9152, | |
| "grad_norm": 0.4512362910904971, | |
| "learning_rate": 5.515058340970665e-06, | |
| "loss": 0.464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23606804013252258, | |
| "step": 1225, | |
| "valid_targets_mean": 3170.3, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 3.9312, | |
| "grad_norm": 0.563692302304579, | |
| "learning_rate": 5.362109556365496e-06, | |
| "loss": 0.4762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23333798348903656, | |
| "step": 1230, | |
| "valid_targets_mean": 3311.6, | |
| "valid_targets_min": 1618 | |
| }, | |
| { | |
| "epoch": 3.9472, | |
| "grad_norm": 0.4768555432720377, | |
| "learning_rate": 5.2109826042064445e-06, | |
| "loss": 0.4876, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22933447360992432, | |
| "step": 1235, | |
| "valid_targets_mean": 3085.6, | |
| "valid_targets_min": 1800 | |
| }, | |
| { | |
| "epoch": 3.9632, | |
| "grad_norm": 0.5103664581840371, | |
| "learning_rate": 5.0616962937601945e-06, | |
| "loss": 0.4806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23675984144210815, | |
| "step": 1240, | |
| "valid_targets_mean": 3059.2, | |
| "valid_targets_min": 1210 | |
| }, | |
| { | |
| "epoch": 3.9792, | |
| "grad_norm": 0.46382472726277013, | |
| "learning_rate": 4.914269205207076e-06, | |
| "loss": 0.4661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2383194863796234, | |
| "step": 1245, | |
| "valid_targets_mean": 3139.2, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 3.9952, | |
| "grad_norm": 0.4268050053278703, | |
| "learning_rate": 4.76871968732858e-06, | |
| "loss": 0.4592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23828884959220886, | |
| "step": 1250, | |
| "valid_targets_mean": 3040.5, | |
| "valid_targets_min": 1095 | |
| }, | |
| { | |
| "epoch": 4.0096, | |
| "grad_norm": 0.49732816108587136, | |
| "learning_rate": 4.625065855223689e-06, | |
| "loss": 0.4567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2624007761478424, | |
| "step": 1255, | |
| "valid_targets_mean": 2886.8, | |
| "valid_targets_min": 750 | |
| }, | |
| { | |
| "epoch": 4.0256, | |
| "grad_norm": 0.4831609813667501, | |
| "learning_rate": 4.483325588054259e-06, | |
| "loss": 0.4591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27929121255874634, | |
| "step": 1260, | |
| "valid_targets_mean": 3247.8, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 4.0416, | |
| "grad_norm": 0.4399957174180194, | |
| "learning_rate": 4.343516526819755e-06, | |
| "loss": 0.455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21088378131389618, | |
| "step": 1265, | |
| "valid_targets_mean": 3649.4, | |
| "valid_targets_min": 1935 | |
| }, | |
| { | |
| "epoch": 4.0576, | |
| "grad_norm": 0.4922430221954871, | |
| "learning_rate": 4.205656072161681e-06, | |
| "loss": 0.4581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25056588649749756, | |
| "step": 1270, | |
| "valid_targets_mean": 3408.4, | |
| "valid_targets_min": 1894 | |
| }, | |
| { | |
| "epoch": 4.0736, | |
| "grad_norm": 0.46350490712051406, | |
| "learning_rate": 4.069761382197901e-06, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2043137550354004, | |
| "step": 1275, | |
| "valid_targets_mean": 2936.8, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 4.0896, | |
| "grad_norm": 0.4994153609615677, | |
| "learning_rate": 3.935849370387104e-06, | |
| "loss": 0.446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22454729676246643, | |
| "step": 1280, | |
| "valid_targets_mean": 2772.5, | |
| "valid_targets_min": 1484 | |
| }, | |
| { | |
| "epoch": 4.1056, | |
| "grad_norm": 0.5499827117625989, | |
| "learning_rate": 3.803936703423783e-06, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24745602905750275, | |
| "step": 1285, | |
| "valid_targets_mean": 3050.7, | |
| "valid_targets_min": 1528 | |
| }, | |
| { | |
| "epoch": 4.1216, | |
| "grad_norm": 0.4406929832672752, | |
| "learning_rate": 3.6740397991638864e-06, | |
| "loss": 0.4527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22310206294059753, | |
| "step": 1290, | |
| "valid_targets_mean": 3021.6, | |
| "valid_targets_min": 1125 | |
| }, | |
| { | |
| "epoch": 4.1376, | |
| "grad_norm": 0.44705321465815584, | |
| "learning_rate": 3.5461748245814633e-06, | |
| "loss": 0.4589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22481977939605713, | |
| "step": 1295, | |
| "valid_targets_mean": 3618.6, | |
| "valid_targets_min": 1692 | |
| }, | |
| { | |
| "epoch": 4.1536, | |
| "grad_norm": 0.49516970608135413, | |
| "learning_rate": 3.420357693756502e-06, | |
| "loss": 0.4585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26202359795570374, | |
| "step": 1300, | |
| "valid_targets_mean": 3241.0, | |
| "valid_targets_min": 1015 | |
| }, | |
| { | |
| "epoch": 4.1696, | |
| "grad_norm": 0.5360703828757977, | |
| "learning_rate": 3.2966040658942666e-06, | |
| "loss": 0.4664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.266828715801239, | |
| "step": 1305, | |
| "valid_targets_mean": 2974.2, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 4.1856, | |
| "grad_norm": 0.4950544833096414, | |
| "learning_rate": 3.174929343376374e-06, | |
| "loss": 0.4451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22401739656925201, | |
| "step": 1310, | |
| "valid_targets_mean": 2744.4, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 4.2016, | |
| "grad_norm": 0.4311500601540355, | |
| "learning_rate": 3.055348669843794e-06, | |
| "loss": 0.4495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2312304973602295, | |
| "step": 1315, | |
| "valid_targets_mean": 3403.9, | |
| "valid_targets_min": 1499 | |
| }, | |
| { | |
| "epoch": 4.2176, | |
| "grad_norm": 0.5154587342573326, | |
| "learning_rate": 2.937876928312062e-06, | |
| "loss": 0.4868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3122970461845398, | |
| "step": 1320, | |
| "valid_targets_mean": 3197.4, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 4.2336, | |
| "grad_norm": 0.4559328431144319, | |
| "learning_rate": 2.8225287393189547e-06, | |
| "loss": 0.4747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20903021097183228, | |
| "step": 1325, | |
| "valid_targets_mean": 2931.4, | |
| "valid_targets_min": 1677 | |
| }, | |
| { | |
| "epoch": 4.2496, | |
| "grad_norm": 0.4806364348701646, | |
| "learning_rate": 2.709318459104815e-06, | |
| "loss": 0.4605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23968324065208435, | |
| "step": 1330, | |
| "valid_targets_mean": 2946.6, | |
| "valid_targets_min": 1566 | |
| }, | |
| { | |
| "epoch": 4.2656, | |
| "grad_norm": 0.45289745050543984, | |
| "learning_rate": 2.5982601778257733e-06, | |
| "loss": 0.4515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22231417894363403, | |
| "step": 1335, | |
| "valid_targets_mean": 3004.3, | |
| "valid_targets_min": 1691 | |
| }, | |
| { | |
| "epoch": 4.2816, | |
| "grad_norm": 0.47798104194681423, | |
| "learning_rate": 2.4893677178000797e-06, | |
| "loss": 0.4687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22800111770629883, | |
| "step": 1340, | |
| "valid_targets_mean": 2962.9, | |
| "valid_targets_min": 1148 | |
| }, | |
| { | |
| "epoch": 4.2976, | |
| "grad_norm": 0.4487440581613835, | |
| "learning_rate": 2.3826546317877795e-06, | |
| "loss": 0.467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2532956600189209, | |
| "step": 1345, | |
| "valid_targets_mean": 3242.4, | |
| "valid_targets_min": 1630 | |
| }, | |
| { | |
| "epoch": 4.3136, | |
| "grad_norm": 0.49067799892929675, | |
| "learning_rate": 2.278134201303952e-06, | |
| "loss": 0.4539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19229218363761902, | |
| "step": 1350, | |
| "valid_targets_mean": 2673.9, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 4.3296, | |
| "grad_norm": 0.4932440061265886, | |
| "learning_rate": 2.1758194349656624e-06, | |
| "loss": 0.4551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24860474467277527, | |
| "step": 1355, | |
| "valid_targets_mean": 3220.6, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 4.3456, | |
| "grad_norm": 0.4731361095577356, | |
| "learning_rate": 2.075723066872939e-06, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21686381101608276, | |
| "step": 1360, | |
| "valid_targets_mean": 3198.9, | |
| "valid_targets_min": 1827 | |
| }, | |
| { | |
| "epoch": 4.3616, | |
| "grad_norm": 0.4276096023507029, | |
| "learning_rate": 1.977857555023854e-06, | |
| "loss": 0.4351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25182968378067017, | |
| "step": 1365, | |
| "valid_targets_mean": 3999.2, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 4.3776, | |
| "grad_norm": 0.48625622975368676, | |
| "learning_rate": 1.8822350797640543e-06, | |
| "loss": 0.443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24654650688171387, | |
| "step": 1370, | |
| "valid_targets_mean": 2706.6, | |
| "valid_targets_min": 1803 | |
| }, | |
| { | |
| "epoch": 4.3936, | |
| "grad_norm": 0.45154196191194956, | |
| "learning_rate": 1.788867542270729e-06, | |
| "loss": 0.4621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23573841154575348, | |
| "step": 1375, | |
| "valid_targets_mean": 2747.8, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 4.4096, | |
| "grad_norm": 0.47622906189431036, | |
| "learning_rate": 1.6977665630714345e-06, | |
| "loss": 0.4714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2745051980018616, | |
| "step": 1380, | |
| "valid_targets_mean": 3200.2, | |
| "valid_targets_min": 1554 | |
| }, | |
| { | |
| "epoch": 4.4256, | |
| "grad_norm": 0.4747768382814133, | |
| "learning_rate": 1.6089434805977799e-06, | |
| "loss": 0.4421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20941603183746338, | |
| "step": 1385, | |
| "valid_targets_mean": 2683.9, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 4.4416, | |
| "grad_norm": 0.4898721125591949, | |
| "learning_rate": 1.5224093497742654e-06, | |
| "loss": 0.4746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2430362105369568, | |
| "step": 1390, | |
| "valid_targets_mean": 3143.4, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 4.4576, | |
| "grad_norm": 0.45924092288013746, | |
| "learning_rate": 1.4381749406423695e-06, | |
| "loss": 0.4566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22313876450061798, | |
| "step": 1395, | |
| "valid_targets_mean": 2977.5, | |
| "valid_targets_min": 1612 | |
| }, | |
| { | |
| "epoch": 4.4736, | |
| "grad_norm": 0.45355426629210616, | |
| "learning_rate": 1.3562507370201062e-06, | |
| "loss": 0.4624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21281218528747559, | |
| "step": 1400, | |
| "valid_targets_mean": 2939.4, | |
| "valid_targets_min": 1601 | |
| }, | |
| { | |
| "epoch": 4.4896, | |
| "grad_norm": 0.5066555022569265, | |
| "learning_rate": 1.2766469351972345e-06, | |
| "loss": 0.4741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21767839789390564, | |
| "step": 1405, | |
| "valid_targets_mean": 2710.8, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 4.5056, | |
| "grad_norm": 0.43358289004641076, | |
| "learning_rate": 1.1993734426661985e-06, | |
| "loss": 0.4472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22621162235736847, | |
| "step": 1410, | |
| "valid_targets_mean": 3720.8, | |
| "valid_targets_min": 1983 | |
| }, | |
| { | |
| "epoch": 4.5216, | |
| "grad_norm": 0.5044761882250215, | |
| "learning_rate": 1.1244398768890496e-06, | |
| "loss": 0.4828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23491895198822021, | |
| "step": 1415, | |
| "valid_targets_mean": 2813.6, | |
| "valid_targets_min": 1320 | |
| }, | |
| { | |
| "epoch": 4.5376, | |
| "grad_norm": 0.5330550467460243, | |
| "learning_rate": 1.0518555641004613e-06, | |
| "loss": 0.46, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2191581130027771, | |
| "step": 1420, | |
| "valid_targets_mean": 2327.1, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 4.5536, | |
| "grad_norm": 0.7320905137758208, | |
| "learning_rate": 9.816295381469954e-07, | |
| "loss": 0.4597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2201734483242035, | |
| "step": 1425, | |
| "valid_targets_mean": 2678.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.5696, | |
| "grad_norm": 0.4551873603504526, | |
| "learning_rate": 9.137705393627239e-07, | |
| "loss": 0.445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19194208085536957, | |
| "step": 1430, | |
| "valid_targets_mean": 3091.6, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 4.5856, | |
| "grad_norm": 0.4698529743121287, | |
| "learning_rate": 8.482870134814214e-07, | |
| "loss": 0.4441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2153075933456421, | |
| "step": 1435, | |
| "valid_targets_mean": 2814.6, | |
| "valid_targets_min": 1242 | |
| }, | |
| { | |
| "epoch": 4.6016, | |
| "grad_norm": 0.46872541762060055, | |
| "learning_rate": 7.851871105854125e-07, | |
| "loss": 0.4715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24948900938034058, | |
| "step": 1440, | |
| "valid_targets_mean": 2967.8, | |
| "valid_targets_min": 1511 | |
| }, | |
| { | |
| "epoch": 4.6176, | |
| "grad_norm": 0.4489060249996938, | |
| "learning_rate": 7.244786840912033e-07, | |
| "loss": 0.4428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1973823457956314, | |
| "step": 1445, | |
| "valid_targets_mean": 2968.2, | |
| "valid_targets_min": 1703 | |
| }, | |
| { | |
| "epoch": 4.6336, | |
| "grad_norm": 0.45728313875611964, | |
| "learning_rate": 6.661692897720517e-07, | |
| "loss": 0.4658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24835816025733948, | |
| "step": 1450, | |
| "valid_targets_mean": 3327.2, | |
| "valid_targets_min": 1195 | |
| }, | |
| { | |
| "epoch": 4.6495999999999995, | |
| "grad_norm": 0.4747389713592, | |
| "learning_rate": 6.10266184817565e-07, | |
| "loss": 0.4831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24206383526325226, | |
| "step": 1455, | |
| "valid_targets_mean": 3087.4, | |
| "valid_targets_min": 1463 | |
| }, | |
| { | |
| "epoch": 4.6655999999999995, | |
| "grad_norm": 0.4508085344360603, | |
| "learning_rate": 5.567763269304927e-07, | |
| "loss": 0.4598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2053154706954956, | |
| "step": 1460, | |
| "valid_targets_mean": 2956.6, | |
| "valid_targets_min": 1527 | |
| }, | |
| { | |
| "epoch": 4.6815999999999995, | |
| "grad_norm": 0.530546828942126, | |
| "learning_rate": 5.057063734607392e-07, | |
| "loss": 0.4725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2210843563079834, | |
| "step": 1465, | |
| "valid_targets_mean": 2872.1, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 4.6975999999999996, | |
| "grad_norm": 0.44783159799278005, | |
| "learning_rate": 4.570626805768119e-07, | |
| "loss": 0.4597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20867741107940674, | |
| "step": 1470, | |
| "valid_targets_mean": 2984.9, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 4.7136, | |
| "grad_norm": 0.4833208583687667, | |
| "learning_rate": 4.1085130247472625e-07, | |
| "loss": 0.4601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21128685772418976, | |
| "step": 1475, | |
| "valid_targets_mean": 2744.1, | |
| "valid_targets_min": 1689 | |
| }, | |
| { | |
| "epoch": 4.7296, | |
| "grad_norm": 0.4382426941125842, | |
| "learning_rate": 3.670779906244981e-07, | |
| "loss": 0.4423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22669294476509094, | |
| "step": 1480, | |
| "valid_targets_mean": 3158.8, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 4.7456, | |
| "grad_norm": 0.46954837995443893, | |
| "learning_rate": 3.2574819305432713e-07, | |
| "loss": 0.4629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23599302768707275, | |
| "step": 1485, | |
| "valid_targets_mean": 3009.8, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 4.7616, | |
| "grad_norm": 0.4705667047472302, | |
| "learning_rate": 2.8686705367250824e-07, | |
| "loss": 0.4486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19904813170433044, | |
| "step": 1490, | |
| "valid_targets_mean": 2777.3, | |
| "valid_targets_min": 1237 | |
| }, | |
| { | |
| "epoch": 4.7776, | |
| "grad_norm": 0.4405020382668668, | |
| "learning_rate": 2.504394116272502e-07, | |
| "loss": 0.4318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21686434745788574, | |
| "step": 1495, | |
| "valid_targets_mean": 3203.4, | |
| "valid_targets_min": 1366 | |
| }, | |
| { | |
| "epoch": 4.7936, | |
| "grad_norm": 0.47490195586595313, | |
| "learning_rate": 2.1646980070437973e-07, | |
| "loss": 0.4653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.232308030128479, | |
| "step": 1500, | |
| "valid_targets_mean": 2665.0, | |
| "valid_targets_min": 1263 | |
| }, | |
| { | |
| "epoch": 4.8096, | |
| "grad_norm": 0.45026205218493226, | |
| "learning_rate": 1.8496244876306858e-07, | |
| "loss": 0.4433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21020200848579407, | |
| "step": 1505, | |
| "valid_targets_mean": 3043.4, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 4.8256, | |
| "grad_norm": 0.49102319598317373, | |
| "learning_rate": 1.559212772096319e-07, | |
| "loss": 0.4596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26330631971359253, | |
| "step": 1510, | |
| "valid_targets_mean": 3498.3, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 4.8416, | |
| "grad_norm": 0.4885125920804189, | |
| "learning_rate": 1.2934990050947228e-07, | |
| "loss": 0.4785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2583045959472656, | |
| "step": 1515, | |
| "valid_targets_mean": 3124.8, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 4.8576, | |
| "grad_norm": 0.4276707026867548, | |
| "learning_rate": 1.0525162573723269e-07, | |
| "loss": 0.4768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22992771863937378, | |
| "step": 1520, | |
| "valid_targets_mean": 3659.1, | |
| "valid_targets_min": 1498 | |
| }, | |
| { | |
| "epoch": 4.8736, | |
| "grad_norm": 0.4795330410563074, | |
| "learning_rate": 8.362945216517704e-08, | |
| "loss": 0.4758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545624375343323, | |
| "step": 1525, | |
| "valid_targets_mean": 3432.0, | |
| "valid_targets_min": 1324 | |
| }, | |
| { | |
| "epoch": 4.8896, | |
| "grad_norm": 0.5315564601564388, | |
| "learning_rate": 6.448607088991532e-08, | |
| "loss": 0.4684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2378055900335312, | |
| "step": 1530, | |
| "valid_targets_mean": 2588.7, | |
| "valid_targets_min": 1200 | |
| }, | |
| { | |
| "epoch": 4.9056, | |
| "grad_norm": 0.5164470981252668, | |
| "learning_rate": 4.782386449746934e-08, | |
| "loss": 0.4545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22151297330856323, | |
| "step": 1535, | |
| "valid_targets_mean": 2466.6, | |
| "valid_targets_min": 1493 | |
| }, | |
| { | |
| "epoch": 4.9216, | |
| "grad_norm": 0.4620064992407382, | |
| "learning_rate": 3.3644906766734374e-08, | |
| "loss": 0.4573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24147440493106842, | |
| "step": 1540, | |
| "valid_targets_mean": 3112.8, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 4.9376, | |
| "grad_norm": 0.4824052579676531, | |
| "learning_rate": 2.1950962411367848e-08, | |
| "loss": 0.4525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24404051899909973, | |
| "step": 1545, | |
| "valid_targets_mean": 3064.6, | |
| "valid_targets_min": 2201 | |
| }, | |
| { | |
| "epoch": 4.9536, | |
| "grad_norm": 0.4604378529000815, | |
| "learning_rate": 1.2743486860165022e-08, | |
| "loss": 0.4586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24580053985118866, | |
| "step": 1550, | |
| "valid_targets_mean": 3306.9, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 4.9696, | |
| "grad_norm": 0.4753199018242153, | |
| "learning_rate": 6.023626075915001e-09, | |
| "loss": 0.448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20023775100708008, | |
| "step": 1555, | |
| "valid_targets_mean": 2623.0, | |
| "valid_targets_min": 1790 | |
| }, | |
| { | |
| "epoch": 4.9856, | |
| "grad_norm": 0.4994766980539709, | |
| "learning_rate": 1.7922164127659457e-09, | |
| "loss": 0.4676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23927080631256104, | |
| "step": 1560, | |
| "valid_targets_mean": 3136.8, | |
| "valid_targets_min": 1188 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.6369392275743723, | |
| "learning_rate": 4.978451213499824e-11, | |
| "loss": 0.4567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.399383544921875, | |
| "step": 1565, | |
| "valid_targets_mean": 3486.9, | |
| "valid_targets_min": 1658 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.399383544921875, | |
| "step": 1565, | |
| "total_flos": 6.872336138627973e+17, | |
| "train_loss": 0.5079075557355301, | |
| "train_runtime": 11661.1311, | |
| "train_samples_per_second": 4.283, | |
| "train_steps_per_second": 0.134, | |
| "valid_targets_mean": 3486.9, | |
| "valid_targets_min": 1658 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1565, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.872336138627973e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |