Update README.md
Browse files
README.md
CHANGED
|
@@ -83,4 +83,52 @@ Dataset was formatted in ShareGpt format for the purposes of using with Axolotl,
|
|
| 83 |
- num_epochs: 3
|
| 84 |
- optimizer: adamw_bnb_8bit
|
| 85 |
- lr_scheduler: cosine
|
| 86 |
-
- learning_rate: 0.00025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
- num_epochs: 3
|
| 84 |
- optimizer: adamw_bnb_8bit
|
| 85 |
- lr_scheduler: cosine
|
| 86 |
+
- learning_rate: 0.00025
|
| 87 |
+
|
| 88 |
+
#### Evaluation
|
| 89 |
+
|
| 90 |
+
| Groups |Version| Filter |n-shot| Metric | Value | |Stderr|
|
| 91 |
+
|--------------------|-------|----------------|-----:|-----------|------:|---|-----:|
|
| 92 |
+
|Open LLM Leaderboard|N/A |none | 5|rouge2_acc | 0.1920|± |0.0176|
|
| 93 |
+
| | |none | 5|bleu_max |15.2292|± |0.6714|
|
| 94 |
+
| | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
|
| 95 |
+
| - truthfulqa_mc1 | 2|none | 0|acc | 0.2440|± |0.0192|
|
| 96 |
+
| - truthfulqa_mc2 | 2|none | 0|acc | 0.4430|± |0.0195|
|
| 97 |
+
| - winogrande | 1|none | 5|acc | 0.5120|± |0.0224|
|
| 98 |
+
| - arc_challenge | 1|none | 25|acc | 0.1760|± |0.0170|
|
| 99 |
+
| | |none | 25|acc_norm | 0.2320|± |0.0189|
|
| 100 |
+
| - gsm8k | 3|strict-match | 5|exact_match| 0.0060|± |0.0035|
|
| 101 |
+
| | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
|
| 102 |
+
| - hellaswag | 1|none | 10|acc | 0.3520|± |0.0214|
|
| 103 |
+
| | |none | 10|acc_norm | 0.4040|± |0.0220|
|
| 104 |
+
| | |none | 5|rouge2_diff|-3.3178|± |0.9477|
|
| 105 |
+
| | |none | 5|rougeL_acc | 0.3860|± |0.0218|
|
| 106 |
+
| | |none | 5|acc_norm | 0.3180|± |0.0145|
|
| 107 |
+
| | |none | 5|rouge1_diff|-1.5564|± |1.0223|
|
| 108 |
+
| | |none | 5|bleu_diff |-0.6500|± |0.6421|
|
| 109 |
+
| | |none | 5|rouge2_max |16.4873|± |1.0172|
|
| 110 |
+
| | |none | 5|rougeL_diff|-0.7765|± |1.0034|
|
| 111 |
+
| | |strict-match | 5|exact_match| 0.0060|± |0.0035|
|
| 112 |
+
| | |none | 5|bleu_acc | 0.4360|± |0.0222|
|
| 113 |
+
| | |none | 5|rougeL_max |33.8798|± |0.9367|
|
| 114 |
+
| | |none | 5|rouge1_max |36.3550|± |0.9462|
|
| 115 |
+
| | |none | 5|rouge1_acc | 0.3700|± |0.0216|
|
| 116 |
+
| | |none | 5|acc | 0.2664|± |0.0036|
|
| 117 |
+
| - mmlu |N/A |none | 0|acc | 0.2533|± |0.0039|
|
| 118 |
+
| - humanities |N/A |none | 5|acc | 0.2408|± |0.0075|
|
| 119 |
+
| - other |N/A |none | 5|acc | 0.2443|± |0.0080|
|
| 120 |
+
| - social_sciences |N/A |none | 5|acc | 0.2538|± |0.0081|
|
| 121 |
+
| - stem |N/A |none | 5|acc | 0.2740|± |0.0079|
|
| 122 |
+
| - truthfulqa |N/A |none | 0|rouge2_acc | 0.1920|± |0.0176|
|
| 123 |
+
| | |none | 0|rougeL_diff|-0.7765|± |1.0034|
|
| 124 |
+
| | |none | 0|bleu_max |15.2292|± |0.6714|
|
| 125 |
+
| | |none | 0|rouge2_diff|-3.3178|± |0.9477|
|
| 126 |
+
| | |none | 0|rougeL_acc | 0.3860|± |0.0218|
|
| 127 |
+
| | |none | 0|bleu_diff |-0.6500|± |0.6421|
|
| 128 |
+
| | |none | 0|rouge2_max |16.4873|± |1.0172|
|
| 129 |
+
| | |none | 0|rouge1_diff|-1.5564|± |1.0223|
|
| 130 |
+
| | |none | 0|acc | 0.3435|± |0.0137|
|
| 131 |
+
| | |none | 0|bleu_acc | 0.4360|± |0.0222|
|
| 132 |
+
| | |none | 0|rougeL_max |33.8798|± |0.9367|
|
| 133 |
+
| | |none | 0|rouge1_max |36.3550|± |0.9462|
|
| 134 |
+
| | |none | 0|rouge1_acc | 0.3700|± |0.0216|
|