Docty commited on
Commit
7b72b17
·
verified ·
1 Parent(s): fc6f024

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ image_1.png filter=lfs diff=lfs merge=lfs -text
37
+ image_2.png filter=lfs diff=lfs merge=lfs -text
38
+ image_3.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,62 +1,42 @@
1
  ---
2
- library_name: transformers
3
- license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
 
 
 
5
  tags:
6
- - generated_from_trainer
7
- metrics:
8
- - accuracy
9
- model-index:
10
- - name: solacies
11
- results: []
12
  ---
13
 
14
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # solacies
18
 
19
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 0.3814
22
- - Accuracy: 0.9206
23
 
24
- ## Model description
25
 
26
- More information needed
27
 
28
- ## Intended uses & limitations
 
 
 
29
 
30
- More information needed
31
 
32
- ## Training and evaluation data
33
 
34
- More information needed
35
 
36
- ## Training procedure
37
-
38
- ### Training hyperparameters
39
 
40
- The following hyperparameters were used during training:
41
- - learning_rate: 2e-05
42
- - train_batch_size: 8
43
- - eval_batch_size: 8
44
- - seed: 1337
45
- - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
- - lr_scheduler_type: linear
47
- - num_epochs: 2.0
48
 
49
- ### Training results
 
 
50
 
51
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
- |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
- | 0.4536 | 1.0 | 152 | 0.5105 | 0.9088 |
54
- | 0.3086 | 2.0 | 304 | 0.3814 | 0.9206 |
55
 
 
56
 
57
- ### Framework versions
58
 
59
- - Transformers 4.56.1
60
- - Pytorch 2.8.0+cu126
61
- - Datasets 4.0.0
62
- - Tokenizers 0.22.0
 
1
  ---
 
 
2
  base_model: google/vit-base-patch16-224-in21k
3
+ library_name: transformers
4
+ license: creativeml-openrail-m
5
+ inference: true
6
  tags:
7
+ - image-classification
 
 
 
 
 
8
  ---
9
 
10
+ <!-- This model card has been generated automatically according to the information the training script had access to. You
11
  should probably proofread and complete it, then remove this comment. -->
12
 
 
13
 
14
+ # Image Classification
 
 
 
15
 
16
+ This model is a fine-tuned version of google/vit-base-patch16-224-in21k on the Docty/solaices dataset.
17
 
18
+ You can find some example images in the following.
19
 
20
+ ![img_0](./image_0.png)
21
+ ![img_1](./image_1.png)
22
+ ![img_2](./image_2.png)
23
+ ![img_3](./image_3.png)
24
 
 
25
 
 
26
 
 
27
 
28
+ ## Intended uses & limitations
 
 
29
 
30
+ #### How to use
 
 
 
 
 
 
 
31
 
32
+ ```python
33
+ # TODO: add an example code snippet for running this diffusion pipeline
34
+ ```
35
 
36
+ #### Limitations and bias
 
 
 
37
 
38
+ [TODO: provide examples of latent issues and potential remediations]
39
 
40
+ ## Training details
41
 
42
+ [TODO: describe the data used to train the model]
 
 
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9205882352941176,
4
+ "eval_loss": 0.3814464807510376,
5
+ "eval_runtime": 5.3041,
6
+ "eval_samples_per_second": 64.101,
7
+ "eval_steps_per_second": 8.107,
8
+ "total_flos": 1.8830891020935168e+17,
9
+ "train_loss": 0.5599808394908905,
10
+ "train_runtime": 139.819,
11
+ "train_samples_per_second": 17.38,
12
+ "train_steps_per_second": 2.174
13
+ }
checkpoint-152/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "dtype": "float32",
7
+ "encoder_stride": 16,
8
+ "finetuning_task": "image-classification",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Alluvial soil",
14
+ "1": "Black Soil",
15
+ "2": "Red soil",
16
+ "3": "Clay soil"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "Alluvial soil": "0",
23
+ "Black Soil": "1",
24
+ "Clay soil": "3",
25
+ "Red soil": "2"
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "model_type": "vit",
29
+ "num_attention_heads": 12,
30
+ "num_channels": 3,
31
+ "num_hidden_layers": 12,
32
+ "patch_size": 16,
33
+ "pooler_act": "tanh",
34
+ "pooler_output_size": 768,
35
+ "problem_type": "single_label_classification",
36
+ "qkv_bias": true,
37
+ "transformers_version": "4.56.1"
38
+ }
checkpoint-152/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fed0a142d166d027467e8671d3c03c37165aa06617e6b3f2674a5c24f4a5c7ce
3
+ size 343230128
checkpoint-152/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da28a21bc9defc2f7a731cfa38c0307362baee426ea92d96dbb66be3bda61a2
3
+ size 686584395
checkpoint-152/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-152/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc2b20d61bc2b174b25de4c323a622844840f3dc20b720a015dfe0574768058
3
+ size 14709
checkpoint-152/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b867aeee168120a50840d69528cf3ce12dc5f30434bacbe8ab595475fdd2a6f1
3
+ size 1465
checkpoint-152/trainer_state.json ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 152,
3
+ "best_metric": 0.5104668140411377,
4
+ "best_model_checkpoint": "./solacies/checkpoint-152",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 152,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06578947368421052,
14
+ "grad_norm": 2.7918763160705566,
15
+ "learning_rate": 1.9407894736842107e-05,
16
+ "loss": 1.3351,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.13157894736842105,
21
+ "grad_norm": 2.623225212097168,
22
+ "learning_rate": 1.8750000000000002e-05,
23
+ "loss": 1.1694,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.19736842105263158,
28
+ "grad_norm": 2.48968505859375,
29
+ "learning_rate": 1.8092105263157896e-05,
30
+ "loss": 1.0772,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.2631578947368421,
35
+ "grad_norm": 2.2622175216674805,
36
+ "learning_rate": 1.743421052631579e-05,
37
+ "loss": 1.0927,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.32894736842105265,
42
+ "grad_norm": 2.3349521160125732,
43
+ "learning_rate": 1.6776315789473686e-05,
44
+ "loss": 0.9766,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.39473684210526316,
49
+ "grad_norm": 2.7016446590423584,
50
+ "learning_rate": 1.611842105263158e-05,
51
+ "loss": 0.8842,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.4605263157894737,
56
+ "grad_norm": 1.8381617069244385,
57
+ "learning_rate": 1.5460526315789475e-05,
58
+ "loss": 0.7284,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.5263157894736842,
63
+ "grad_norm": 2.1242270469665527,
64
+ "learning_rate": 1.4802631578947371e-05,
65
+ "loss": 0.6287,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.5921052631578947,
70
+ "grad_norm": 1.3842352628707886,
71
+ "learning_rate": 1.4144736842105264e-05,
72
+ "loss": 0.6329,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.6578947368421053,
77
+ "grad_norm": 2.2132720947265625,
78
+ "learning_rate": 1.3486842105263159e-05,
79
+ "loss": 0.607,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.7236842105263158,
84
+ "grad_norm": 2.2834842205047607,
85
+ "learning_rate": 1.2828947368421055e-05,
86
+ "loss": 0.5891,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.7894736842105263,
91
+ "grad_norm": 2.5198376178741455,
92
+ "learning_rate": 1.2171052631578948e-05,
93
+ "loss": 0.5515,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.8552631578947368,
98
+ "grad_norm": 1.5494874715805054,
99
+ "learning_rate": 1.1513157894736844e-05,
100
+ "loss": 0.4724,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.9210526315789473,
105
+ "grad_norm": 2.719534158706665,
106
+ "learning_rate": 1.0855263157894737e-05,
107
+ "loss": 0.4908,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.9868421052631579,
112
+ "grad_norm": 1.478468418121338,
113
+ "learning_rate": 1.0197368421052632e-05,
114
+ "loss": 0.4536,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 1.0,
119
+ "eval_accuracy": 0.9088235294117647,
120
+ "eval_loss": 0.5104668140411377,
121
+ "eval_runtime": 5.2696,
122
+ "eval_samples_per_second": 64.521,
123
+ "eval_steps_per_second": 8.16,
124
+ "step": 152
125
+ }
126
+ ],
127
+ "logging_steps": 10,
128
+ "max_steps": 304,
129
+ "num_input_tokens_seen": 0,
130
+ "num_train_epochs": 2,
131
+ "save_steps": 500,
132
+ "stateful_callbacks": {
133
+ "TrainerControl": {
134
+ "args": {
135
+ "should_epoch_stop": false,
136
+ "should_evaluate": false,
137
+ "should_log": false,
138
+ "should_save": true,
139
+ "should_training_stop": false
140
+ },
141
+ "attributes": {}
142
+ }
143
+ },
144
+ "total_flos": 9.415445510467584e+16,
145
+ "train_batch_size": 8,
146
+ "trial_name": null,
147
+ "trial_params": null
148
+ }
checkpoint-152/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2749b476100f1d5f9cc9322da668b6a0c25d1a2cdcaa80ee3927191cd1610b37
3
+ size 5777
checkpoint-304/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "dtype": "float32",
7
+ "encoder_stride": 16,
8
+ "finetuning_task": "image-classification",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Alluvial soil",
14
+ "1": "Black Soil",
15
+ "2": "Red soil",
16
+ "3": "Clay soil"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "Alluvial soil": "0",
23
+ "Black Soil": "1",
24
+ "Clay soil": "3",
25
+ "Red soil": "2"
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "model_type": "vit",
29
+ "num_attention_heads": 12,
30
+ "num_channels": 3,
31
+ "num_hidden_layers": 12,
32
+ "patch_size": 16,
33
+ "pooler_act": "tanh",
34
+ "pooler_output_size": 768,
35
+ "problem_type": "single_label_classification",
36
+ "qkv_bias": true,
37
+ "transformers_version": "4.56.1"
38
+ }
checkpoint-304/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ff922694774c0f68f8d40259991a84e71cd78203889010c45e1468706fa0e9
3
+ size 343230128
checkpoint-304/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b850649db3268f1d1a6a03463368188d711aa2671380c54245b0193c3f9f08e2
3
+ size 686584395
checkpoint-304/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-304/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317c10508a950a8da06c9939621c8916c6f59605875f57016e54cbd9ad9254ed
3
+ size 14709
checkpoint-304/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58698d9d05da327f9f6cab12659d64d0ed60537344651eb8bfb67f56df737c2
3
+ size 1465
checkpoint-304/trainer_state.json ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 304,
3
+ "best_metric": 0.3814464807510376,
4
+ "best_model_checkpoint": "./solacies/checkpoint-304",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 304,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06578947368421052,
14
+ "grad_norm": 2.7918763160705566,
15
+ "learning_rate": 1.9407894736842107e-05,
16
+ "loss": 1.3351,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.13157894736842105,
21
+ "grad_norm": 2.623225212097168,
22
+ "learning_rate": 1.8750000000000002e-05,
23
+ "loss": 1.1694,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.19736842105263158,
28
+ "grad_norm": 2.48968505859375,
29
+ "learning_rate": 1.8092105263157896e-05,
30
+ "loss": 1.0772,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.2631578947368421,
35
+ "grad_norm": 2.2622175216674805,
36
+ "learning_rate": 1.743421052631579e-05,
37
+ "loss": 1.0927,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.32894736842105265,
42
+ "grad_norm": 2.3349521160125732,
43
+ "learning_rate": 1.6776315789473686e-05,
44
+ "loss": 0.9766,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.39473684210526316,
49
+ "grad_norm": 2.7016446590423584,
50
+ "learning_rate": 1.611842105263158e-05,
51
+ "loss": 0.8842,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.4605263157894737,
56
+ "grad_norm": 1.8381617069244385,
57
+ "learning_rate": 1.5460526315789475e-05,
58
+ "loss": 0.7284,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.5263157894736842,
63
+ "grad_norm": 2.1242270469665527,
64
+ "learning_rate": 1.4802631578947371e-05,
65
+ "loss": 0.6287,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.5921052631578947,
70
+ "grad_norm": 1.3842352628707886,
71
+ "learning_rate": 1.4144736842105264e-05,
72
+ "loss": 0.6329,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.6578947368421053,
77
+ "grad_norm": 2.2132720947265625,
78
+ "learning_rate": 1.3486842105263159e-05,
79
+ "loss": 0.607,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.7236842105263158,
84
+ "grad_norm": 2.2834842205047607,
85
+ "learning_rate": 1.2828947368421055e-05,
86
+ "loss": 0.5891,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.7894736842105263,
91
+ "grad_norm": 2.5198376178741455,
92
+ "learning_rate": 1.2171052631578948e-05,
93
+ "loss": 0.5515,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.8552631578947368,
98
+ "grad_norm": 1.5494874715805054,
99
+ "learning_rate": 1.1513157894736844e-05,
100
+ "loss": 0.4724,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.9210526315789473,
105
+ "grad_norm": 2.719534158706665,
106
+ "learning_rate": 1.0855263157894737e-05,
107
+ "loss": 0.4908,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.9868421052631579,
112
+ "grad_norm": 1.478468418121338,
113
+ "learning_rate": 1.0197368421052632e-05,
114
+ "loss": 0.4536,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 1.0,
119
+ "eval_accuracy": 0.9088235294117647,
120
+ "eval_loss": 0.5104668140411377,
121
+ "eval_runtime": 5.2696,
122
+ "eval_samples_per_second": 64.521,
123
+ "eval_steps_per_second": 8.16,
124
+ "step": 152
125
+ },
126
+ {
127
+ "epoch": 1.0526315789473684,
128
+ "grad_norm": 1.4376304149627686,
129
+ "learning_rate": 9.539473684210528e-06,
130
+ "loss": 0.378,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.118421052631579,
135
+ "grad_norm": 1.3609135150909424,
136
+ "learning_rate": 8.881578947368423e-06,
137
+ "loss": 0.3742,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.1842105263157894,
142
+ "grad_norm": 2.889965057373047,
143
+ "learning_rate": 8.223684210526316e-06,
144
+ "loss": 0.3694,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 1.25,
149
+ "grad_norm": 5.698398113250732,
150
+ "learning_rate": 7.565789473684211e-06,
151
+ "loss": 0.4044,
152
+ "step": 190
153
+ },
154
+ {
155
+ "epoch": 1.3157894736842106,
156
+ "grad_norm": 1.3650037050247192,
157
+ "learning_rate": 6.907894736842106e-06,
158
+ "loss": 0.3697,
159
+ "step": 200
160
+ },
161
+ {
162
+ "epoch": 1.381578947368421,
163
+ "grad_norm": 2.522857904434204,
164
+ "learning_rate": 6.25e-06,
165
+ "loss": 0.4656,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 1.4473684210526316,
170
+ "grad_norm": 1.6762239933013916,
171
+ "learning_rate": 5.592105263157896e-06,
172
+ "loss": 0.3532,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.513157894736842,
177
+ "grad_norm": 1.3175244331359863,
178
+ "learning_rate": 4.9342105263157895e-06,
179
+ "loss": 0.3821,
180
+ "step": 230
181
+ },
182
+ {
183
+ "epoch": 1.5789473684210527,
184
+ "grad_norm": 1.7241592407226562,
185
+ "learning_rate": 4.276315789473684e-06,
186
+ "loss": 0.3258,
187
+ "step": 240
188
+ },
189
+ {
190
+ "epoch": 1.6447368421052633,
191
+ "grad_norm": 1.2837048768997192,
192
+ "learning_rate": 3.618421052631579e-06,
193
+ "loss": 0.3147,
194
+ "step": 250
195
+ },
196
+ {
197
+ "epoch": 1.7105263157894737,
198
+ "grad_norm": 2.3983030319213867,
199
+ "learning_rate": 2.960526315789474e-06,
200
+ "loss": 0.3278,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 1.776315789473684,
205
+ "grad_norm": 1.1498711109161377,
206
+ "learning_rate": 2.3026315789473684e-06,
207
+ "loss": 0.3126,
208
+ "step": 270
209
+ },
210
+ {
211
+ "epoch": 1.8421052631578947,
212
+ "grad_norm": 2.200284004211426,
213
+ "learning_rate": 1.6447368421052635e-06,
214
+ "loss": 0.2814,
215
+ "step": 280
216
+ },
217
+ {
218
+ "epoch": 1.9078947368421053,
219
+ "grad_norm": 1.2347966432571411,
220
+ "learning_rate": 9.86842105263158e-07,
221
+ "loss": 0.2528,
222
+ "step": 290
223
+ },
224
+ {
225
+ "epoch": 1.973684210526316,
226
+ "grad_norm": 1.8223544359207153,
227
+ "learning_rate": 3.2894736842105264e-07,
228
+ "loss": 0.3086,
229
+ "step": 300
230
+ },
231
+ {
232
+ "epoch": 2.0,
233
+ "eval_accuracy": 0.9205882352941176,
234
+ "eval_loss": 0.3814464807510376,
235
+ "eval_runtime": 5.5076,
236
+ "eval_samples_per_second": 61.732,
237
+ "eval_steps_per_second": 7.807,
238
+ "step": 304
239
+ }
240
+ ],
241
+ "logging_steps": 10,
242
+ "max_steps": 304,
243
+ "num_input_tokens_seen": 0,
244
+ "num_train_epochs": 2,
245
+ "save_steps": 500,
246
+ "stateful_callbacks": {
247
+ "TrainerControl": {
248
+ "args": {
249
+ "should_epoch_stop": false,
250
+ "should_evaluate": false,
251
+ "should_log": false,
252
+ "should_save": true,
253
+ "should_training_stop": true
254
+ },
255
+ "attributes": {}
256
+ }
257
+ },
258
+ "total_flos": 1.8830891020935168e+17,
259
+ "train_batch_size": 8,
260
+ "trial_name": null,
261
+ "trial_params": null
262
+ }
checkpoint-304/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2749b476100f1d5f9cc9322da668b6a0c25d1a2cdcaa80ee3927191cd1610b37
3
+ size 5777
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9205882352941176,
4
+ "eval_loss": 0.3814464807510376,
5
+ "eval_runtime": 5.3041,
6
+ "eval_samples_per_second": 64.101,
7
+ "eval_steps_per_second": 8.107
8
+ }
image_0.png ADDED
image_1.png ADDED

Git LFS Details

  • SHA256: 3a14dfd77f0519dc31a815cfbb66eebf7f1ed5a9d259a9db859dc1992a292a8d
  • Pointer size: 131 Bytes
  • Size of remote file: 936 kB
image_2.png ADDED

Git LFS Details

  • SHA256: 19e2c87bba8b6e461f16f28c6a2dad0a5363c81c439eed462b811b7fbdc3d4c8
  • Pointer size: 131 Bytes
  • Size of remote file: 102 kB
image_3.png ADDED

Git LFS Details

  • SHA256: 8a85f6d034f65ba78a2cf08b23b5953cd751803862b5c4915a9682295a7d2407
  • Pointer size: 131 Bytes
  • Size of remote file: 114 kB
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.8830891020935168e+17,
4
+ "train_loss": 0.5599808394908905,
5
+ "train_runtime": 139.819,
6
+ "train_samples_per_second": 17.38,
7
+ "train_steps_per_second": 2.174
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 304,
3
+ "best_metric": 0.3814464807510376,
4
+ "best_model_checkpoint": "./solacies/checkpoint-304",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 304,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06578947368421052,
14
+ "grad_norm": 2.7918763160705566,
15
+ "learning_rate": 1.9407894736842107e-05,
16
+ "loss": 1.3351,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.13157894736842105,
21
+ "grad_norm": 2.623225212097168,
22
+ "learning_rate": 1.8750000000000002e-05,
23
+ "loss": 1.1694,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.19736842105263158,
28
+ "grad_norm": 2.48968505859375,
29
+ "learning_rate": 1.8092105263157896e-05,
30
+ "loss": 1.0772,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.2631578947368421,
35
+ "grad_norm": 2.2622175216674805,
36
+ "learning_rate": 1.743421052631579e-05,
37
+ "loss": 1.0927,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.32894736842105265,
42
+ "grad_norm": 2.3349521160125732,
43
+ "learning_rate": 1.6776315789473686e-05,
44
+ "loss": 0.9766,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.39473684210526316,
49
+ "grad_norm": 2.7016446590423584,
50
+ "learning_rate": 1.611842105263158e-05,
51
+ "loss": 0.8842,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.4605263157894737,
56
+ "grad_norm": 1.8381617069244385,
57
+ "learning_rate": 1.5460526315789475e-05,
58
+ "loss": 0.7284,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.5263157894736842,
63
+ "grad_norm": 2.1242270469665527,
64
+ "learning_rate": 1.4802631578947371e-05,
65
+ "loss": 0.6287,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.5921052631578947,
70
+ "grad_norm": 1.3842352628707886,
71
+ "learning_rate": 1.4144736842105264e-05,
72
+ "loss": 0.6329,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.6578947368421053,
77
+ "grad_norm": 2.2132720947265625,
78
+ "learning_rate": 1.3486842105263159e-05,
79
+ "loss": 0.607,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.7236842105263158,
84
+ "grad_norm": 2.2834842205047607,
85
+ "learning_rate": 1.2828947368421055e-05,
86
+ "loss": 0.5891,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.7894736842105263,
91
+ "grad_norm": 2.5198376178741455,
92
+ "learning_rate": 1.2171052631578948e-05,
93
+ "loss": 0.5515,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.8552631578947368,
98
+ "grad_norm": 1.5494874715805054,
99
+ "learning_rate": 1.1513157894736844e-05,
100
+ "loss": 0.4724,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.9210526315789473,
105
+ "grad_norm": 2.719534158706665,
106
+ "learning_rate": 1.0855263157894737e-05,
107
+ "loss": 0.4908,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.9868421052631579,
112
+ "grad_norm": 1.478468418121338,
113
+ "learning_rate": 1.0197368421052632e-05,
114
+ "loss": 0.4536,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 1.0,
119
+ "eval_accuracy": 0.9088235294117647,
120
+ "eval_loss": 0.5104668140411377,
121
+ "eval_runtime": 5.2696,
122
+ "eval_samples_per_second": 64.521,
123
+ "eval_steps_per_second": 8.16,
124
+ "step": 152
125
+ },
126
+ {
127
+ "epoch": 1.0526315789473684,
128
+ "grad_norm": 1.4376304149627686,
129
+ "learning_rate": 9.539473684210528e-06,
130
+ "loss": 0.378,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.118421052631579,
135
+ "grad_norm": 1.3609135150909424,
136
+ "learning_rate": 8.881578947368423e-06,
137
+ "loss": 0.3742,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.1842105263157894,
142
+ "grad_norm": 2.889965057373047,
143
+ "learning_rate": 8.223684210526316e-06,
144
+ "loss": 0.3694,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 1.25,
149
+ "grad_norm": 5.698398113250732,
150
+ "learning_rate": 7.565789473684211e-06,
151
+ "loss": 0.4044,
152
+ "step": 190
153
+ },
154
+ {
155
+ "epoch": 1.3157894736842106,
156
+ "grad_norm": 1.3650037050247192,
157
+ "learning_rate": 6.907894736842106e-06,
158
+ "loss": 0.3697,
159
+ "step": 200
160
+ },
161
+ {
162
+ "epoch": 1.381578947368421,
163
+ "grad_norm": 2.522857904434204,
164
+ "learning_rate": 6.25e-06,
165
+ "loss": 0.4656,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 1.4473684210526316,
170
+ "grad_norm": 1.6762239933013916,
171
+ "learning_rate": 5.592105263157896e-06,
172
+ "loss": 0.3532,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.513157894736842,
177
+ "grad_norm": 1.3175244331359863,
178
+ "learning_rate": 4.9342105263157895e-06,
179
+ "loss": 0.3821,
180
+ "step": 230
181
+ },
182
+ {
183
+ "epoch": 1.5789473684210527,
184
+ "grad_norm": 1.7241592407226562,
185
+ "learning_rate": 4.276315789473684e-06,
186
+ "loss": 0.3258,
187
+ "step": 240
188
+ },
189
+ {
190
+ "epoch": 1.6447368421052633,
191
+ "grad_norm": 1.2837048768997192,
192
+ "learning_rate": 3.618421052631579e-06,
193
+ "loss": 0.3147,
194
+ "step": 250
195
+ },
196
+ {
197
+ "epoch": 1.7105263157894737,
198
+ "grad_norm": 2.3983030319213867,
199
+ "learning_rate": 2.960526315789474e-06,
200
+ "loss": 0.3278,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 1.776315789473684,
205
+ "grad_norm": 1.1498711109161377,
206
+ "learning_rate": 2.3026315789473684e-06,
207
+ "loss": 0.3126,
208
+ "step": 270
209
+ },
210
+ {
211
+ "epoch": 1.8421052631578947,
212
+ "grad_norm": 2.200284004211426,
213
+ "learning_rate": 1.6447368421052635e-06,
214
+ "loss": 0.2814,
215
+ "step": 280
216
+ },
217
+ {
218
+ "epoch": 1.9078947368421053,
219
+ "grad_norm": 1.2347966432571411,
220
+ "learning_rate": 9.86842105263158e-07,
221
+ "loss": 0.2528,
222
+ "step": 290
223
+ },
224
+ {
225
+ "epoch": 1.973684210526316,
226
+ "grad_norm": 1.8223544359207153,
227
+ "learning_rate": 3.2894736842105264e-07,
228
+ "loss": 0.3086,
229
+ "step": 300
230
+ },
231
+ {
232
+ "epoch": 2.0,
233
+ "eval_accuracy": 0.9205882352941176,
234
+ "eval_loss": 0.3814464807510376,
235
+ "eval_runtime": 5.5076,
236
+ "eval_samples_per_second": 61.732,
237
+ "eval_steps_per_second": 7.807,
238
+ "step": 304
239
+ },
240
+ {
241
+ "epoch": 2.0,
242
+ "step": 304,
243
+ "total_flos": 1.8830891020935168e+17,
244
+ "train_loss": 0.5599808394908905,
245
+ "train_runtime": 139.819,
246
+ "train_samples_per_second": 17.38,
247
+ "train_steps_per_second": 2.174
248
+ }
249
+ ],
250
+ "logging_steps": 10,
251
+ "max_steps": 304,
252
+ "num_input_tokens_seen": 0,
253
+ "num_train_epochs": 2,
254
+ "save_steps": 500,
255
+ "stateful_callbacks": {
256
+ "TrainerControl": {
257
+ "args": {
258
+ "should_epoch_stop": false,
259
+ "should_evaluate": false,
260
+ "should_log": false,
261
+ "should_save": true,
262
+ "should_training_stop": true
263
+ },
264
+ "attributes": {}
265
+ }
266
+ },
267
+ "total_flos": 1.8830891020935168e+17,
268
+ "train_batch_size": 8,
269
+ "trial_name": null,
270
+ "trial_params": null
271
+ }