Commit
·
bf395d2
1
Parent(s):
e3f67a1
Best checkpoint (step 4176)
Browse files- pytorch_model.bin +1 -1
- trainer_state.json +3 -106
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498673009
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9a8fd5458b8d1577edb8a797dafb789dccb27c5daa17ed565b29753696bc342
|
| 3 |
size 498673009
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1146,114 +1146,11 @@
|
|
| 1146 |
"eval_samples_per_second": 1517.169,
|
| 1147 |
"eval_steps_per_second": 11.869,
|
| 1148 |
"step": 4176
|
| 1149 |
-
},
|
| 1150 |
-
{
|
| 1151 |
-
"epoch": 2.71,
|
| 1152 |
-
"learning_rate": 5.498322951605176e-06,
|
| 1153 |
-
"loss": 0.5301,
|
| 1154 |
-
"step": 4183
|
| 1155 |
-
},
|
| 1156 |
-
{
|
| 1157 |
-
"epoch": 2.74,
|
| 1158 |
-
"learning_rate": 4.935313847628174e-06,
|
| 1159 |
-
"loss": 0.5402,
|
| 1160 |
-
"step": 4230
|
| 1161 |
-
},
|
| 1162 |
-
{
|
| 1163 |
-
"epoch": 2.77,
|
| 1164 |
-
"learning_rate": 4.372304743651174e-06,
|
| 1165 |
-
"loss": 0.5167,
|
| 1166 |
-
"step": 4277
|
| 1167 |
-
},
|
| 1168 |
-
{
|
| 1169 |
-
"epoch": 2.8,
|
| 1170 |
-
"learning_rate": 3.8092956396741735e-06,
|
| 1171 |
-
"loss": 0.5004,
|
| 1172 |
-
"step": 4324
|
| 1173 |
-
},
|
| 1174 |
-
{
|
| 1175 |
-
"epoch": 2.83,
|
| 1176 |
-
"learning_rate": 3.246286535697173e-06,
|
| 1177 |
-
"loss": 0.5148,
|
| 1178 |
-
"step": 4371
|
| 1179 |
-
},
|
| 1180 |
-
{
|
| 1181 |
-
"epoch": 2.85,
|
| 1182 |
-
"eval_accuracy": 0.7637243375237406,
|
| 1183 |
-
"eval_b_acc": 0.6382943967754277,
|
| 1184 |
-
"eval_f1": 0.7612453985473553,
|
| 1185 |
-
"eval_f1_anger": 0.6462075848303392,
|
| 1186 |
-
"eval_f1_disgust": 0.4666666666666667,
|
| 1187 |
-
"eval_f1_fear": 0.6678592725104352,
|
| 1188 |
-
"eval_f1_joy": 0.7563368765331154,
|
| 1189 |
-
"eval_f1_neutral": 0.8485270885605427,
|
| 1190 |
-
"eval_f1_sadness": 0.6918630502445532,
|
| 1191 |
-
"eval_f1_surprise": 0.4760213143872114,
|
| 1192 |
-
"eval_loss": 0.6637689471244812,
|
| 1193 |
-
"eval_prec": 0.7597715932015663,
|
| 1194 |
-
"eval_prec_anger": 0.6741280583029672,
|
| 1195 |
-
"eval_prec_disgust": 0.5,
|
| 1196 |
-
"eval_prec_fear": 0.681265206812652,
|
| 1197 |
-
"eval_prec_joy": 0.7394084732214229,
|
| 1198 |
-
"eval_prec_neutral": 0.8400183992640294,
|
| 1199 |
-
"eval_prec_sadness": 0.6921708185053381,
|
| 1200 |
-
"eval_prec_surprise": 0.5296442687747036,
|
| 1201 |
-
"eval_recall": 0.7637243375237406,
|
| 1202 |
-
"eval_recall_anger": 0.6205079060852899,
|
| 1203 |
-
"eval_recall_disgust": 0.4375,
|
| 1204 |
-
"eval_recall_fear": 0.6549707602339181,
|
| 1205 |
-
"eval_recall_joy": 0.7740585774058577,
|
| 1206 |
-
"eval_recall_neutral": 0.857209913631243,
|
| 1207 |
-
"eval_recall_sadness": 0.6915555555555556,
|
| 1208 |
-
"eval_recall_surprise": 0.432258064516129,
|
| 1209 |
-
"eval_runtime": 14.5644,
|
| 1210 |
-
"eval_samples_per_second": 1518.363,
|
| 1211 |
-
"eval_steps_per_second": 11.878,
|
| 1212 |
-
"step": 4408
|
| 1213 |
-
},
|
| 1214 |
-
{
|
| 1215 |
-
"epoch": 2.86,
|
| 1216 |
-
"learning_rate": 2.6832774317201726e-06,
|
| 1217 |
-
"loss": 0.5231,
|
| 1218 |
-
"step": 4418
|
| 1219 |
-
},
|
| 1220 |
-
{
|
| 1221 |
-
"epoch": 2.89,
|
| 1222 |
-
"learning_rate": 2.120268327743172e-06,
|
| 1223 |
-
"loss": 0.5223,
|
| 1224 |
-
"step": 4465
|
| 1225 |
-
},
|
| 1226 |
-
{
|
| 1227 |
-
"epoch": 2.92,
|
| 1228 |
-
"learning_rate": 1.5572592237661715e-06,
|
| 1229 |
-
"loss": 0.525,
|
| 1230 |
-
"step": 4512
|
| 1231 |
-
},
|
| 1232 |
-
{
|
| 1233 |
-
"epoch": 2.95,
|
| 1234 |
-
"learning_rate": 9.942501197891712e-07,
|
| 1235 |
-
"loss": 0.5219,
|
| 1236 |
-
"step": 4559
|
| 1237 |
-
},
|
| 1238 |
-
{
|
| 1239 |
-
"epoch": 2.98,
|
| 1240 |
-
"learning_rate": 4.312410158121706e-07,
|
| 1241 |
-
"loss": 0.516,
|
| 1242 |
-
"step": 4606
|
| 1243 |
-
},
|
| 1244 |
-
{
|
| 1245 |
-
"epoch": 3.0,
|
| 1246 |
-
"step": 4638,
|
| 1247 |
-
"total_flos": 3.903427734912e+16,
|
| 1248 |
-
"train_loss": 0.6570020180521673,
|
| 1249 |
-
"train_runtime": 1377.6596,
|
| 1250 |
-
"train_samples_per_second": 430.73,
|
| 1251 |
-
"train_steps_per_second": 3.367
|
| 1252 |
}
|
| 1253 |
],
|
| 1254 |
"max_steps": 4638,
|
| 1255 |
"num_train_epochs": 3,
|
| 1256 |
-
"total_flos": 3.
|
| 1257 |
"trial_name": null,
|
| 1258 |
"trial_params": null
|
| 1259 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.701164294954722,
|
| 5 |
+
"global_step": 4176,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1146 |
"eval_samples_per_second": 1517.169,
|
| 1147 |
"eval_steps_per_second": 11.869,
|
| 1148 |
"step": 4176
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1149 |
}
|
| 1150 |
],
|
| 1151 |
"max_steps": 4638,
|
| 1152 |
"num_train_epochs": 3,
|
| 1153 |
+
"total_flos": 3.51500575835136e+16,
|
| 1154 |
"trial_name": null,
|
| 1155 |
"trial_params": null
|
| 1156 |
}
|