Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
#17
by
autoevaluator
HF Staff
- opened
README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
-
|
| 4 |
tags:
|
| 5 |
- opt
|
| 6 |
- text-generation
|
| 7 |
-
|
| 8 |
commercial: false
|
| 9 |
model-index:
|
| 10 |
- name: inverse-scaling/opt-13b_eval
|
|
@@ -18,14 +18,16 @@ model-index:
|
|
| 18 |
config: inverse-scaling--NeQA
|
| 19 |
split: train
|
| 20 |
metrics:
|
| 21 |
-
-
|
| 22 |
-
type: accuracy
|
| 23 |
value: 0.49666666666666665
|
|
|
|
| 24 |
verified: true
|
| 25 |
-
|
| 26 |
-
|
| 27 |
value: 0.7090707456072172
|
|
|
|
| 28 |
verified: true
|
|
|
|
| 29 |
- task:
|
| 30 |
type: zero-shot-classification
|
| 31 |
name: Zero-Shot Text Classification
|
|
@@ -35,14 +37,16 @@ model-index:
|
|
| 35 |
config: inverse-scaling--quote-repetition
|
| 36 |
split: train
|
| 37 |
metrics:
|
| 38 |
-
-
|
| 39 |
-
type: accuracy
|
| 40 |
value: 0.8
|
|
|
|
| 41 |
verified: true
|
| 42 |
-
|
| 43 |
-
|
| 44 |
value: 0.4678814027383723
|
|
|
|
| 45 |
verified: true
|
|
|
|
| 46 |
- task:
|
| 47 |
type: zero-shot-classification
|
| 48 |
name: Zero-Shot Text Classification
|
|
@@ -52,14 +56,16 @@ model-index:
|
|
| 52 |
config: inverse-scaling--redefine-math
|
| 53 |
split: train
|
| 54 |
metrics:
|
| 55 |
-
-
|
| 56 |
-
type: accuracy
|
| 57 |
value: 0.5933333333333334
|
|
|
|
| 58 |
verified: true
|
| 59 |
-
|
| 60 |
-
|
| 61 |
value: 0.7308767640383708
|
|
|
|
| 62 |
verified: true
|
|
|
|
| 63 |
- task:
|
| 64 |
type: zero-shot-classification
|
| 65 |
name: Zero-Shot Text Classification
|
|
@@ -69,14 +75,16 @@ model-index:
|
|
| 69 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 70 |
split: train
|
| 71 |
metrics:
|
| 72 |
-
-
|
| 73 |
-
type: accuracy
|
| 74 |
value: 0.2698412698412698
|
|
|
|
| 75 |
verified: true
|
| 76 |
-
|
| 77 |
-
|
| 78 |
value: 0.7708483344978756
|
|
|
|
| 79 |
verified: true
|
|
|
|
| 80 |
- task:
|
| 81 |
type: zero-shot-classification
|
| 82 |
name: Zero-Shot Text Classification
|
|
@@ -86,14 +94,16 @@ model-index:
|
|
| 86 |
config: mathemakitten--winobias_antistereotype_test_cot_v1
|
| 87 |
split: test
|
| 88 |
metrics:
|
| 89 |
-
-
|
| 90 |
-
type: accuracy
|
| 91 |
value: 0.3422330097087379
|
|
|
|
| 92 |
verified: true
|
| 93 |
-
|
| 94 |
-
|
| 95 |
value: 1.4404955777914985
|
|
|
|
| 96 |
verified: true
|
|
|
|
| 97 |
- task:
|
| 98 |
type: zero-shot-classification
|
| 99 |
name: Zero-Shot Text Classification
|
|
@@ -103,14 +113,16 @@ model-index:
|
|
| 103 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 104 |
split: test
|
| 105 |
metrics:
|
| 106 |
-
-
|
| 107 |
-
type: accuracy
|
| 108 |
value: 0.30339805825242716
|
|
|
|
| 109 |
verified: true
|
| 110 |
-
|
| 111 |
-
|
| 112 |
value: 1.539870785999474
|
|
|
|
| 113 |
verified: true
|
|
|
|
| 114 |
- task:
|
| 115 |
type: zero-shot-classification
|
| 116 |
name: Zero-Shot Text Classification
|
|
@@ -120,14 +132,16 @@ model-index:
|
|
| 120 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 121 |
split: test
|
| 122 |
metrics:
|
| 123 |
-
-
|
| 124 |
-
type: accuracy
|
| 125 |
value: 0.3640776699029126
|
|
|
|
| 126 |
verified: true
|
| 127 |
-
|
| 128 |
-
|
| 129 |
value: 1.4798047741848304
|
|
|
|
| 130 |
verified: true
|
|
|
|
| 131 |
---
|
| 132 |
|
| 133 |
# OPT : Open Pre-trained Transformer Language Models
|
|
|
|
| 1 |
---
|
| 2 |
language: en
|
| 3 |
+
license: other
|
| 4 |
tags:
|
| 5 |
- opt
|
| 6 |
- text-generation
|
| 7 |
+
inference: false
|
| 8 |
commercial: false
|
| 9 |
model-index:
|
| 10 |
- name: inverse-scaling/opt-13b_eval
|
|
|
|
| 18 |
config: inverse-scaling--NeQA
|
| 19 |
split: train
|
| 20 |
metrics:
|
| 21 |
+
- type: accuracy
|
|
|
|
| 22 |
value: 0.49666666666666665
|
| 23 |
+
name: Accuracy
|
| 24 |
verified: true
|
| 25 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWQwNzZlNTM4ZWVjODNkOTIzNjg1NTNkNjE0MGJlMjU4ZWI3NTQzYjg4YTY3MDU2MGViYTYyYjZlZDc0NzQzNCIsInZlcnNpb24iOjF9.qNBGm2Mc3OKjadswivJnO1Lul0NeAjGJe-2FfO57phNPMdgp-rDkTl0YMqC1Rljp8BjT4egJ8IdEQgynUE_hDg
|
| 26 |
+
- type: loss
|
| 27 |
value: 0.7090707456072172
|
| 28 |
+
name: Loss
|
| 29 |
verified: true
|
| 30 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMmY2NTAxOTQ3MmUwZjgxZGM0NDU1YmQzNmRmMTk3MTZhM2IxM2EwYmYxNzJjODM4MWMxNWQwOTczZWRiMGU1NyIsInZlcnNpb24iOjF9.rni9n_PdKnee5J_sMwlS0W7QWfhqlAXX6S4dUAakGQFW10zLDBb2pPfkKdSYz956yyTMrKBX0ZYT2uQGWxurAg
|
| 31 |
- task:
|
| 32 |
type: zero-shot-classification
|
| 33 |
name: Zero-Shot Text Classification
|
|
|
|
| 37 |
config: inverse-scaling--quote-repetition
|
| 38 |
split: train
|
| 39 |
metrics:
|
| 40 |
+
- type: accuracy
|
|
|
|
| 41 |
value: 0.8
|
| 42 |
+
name: Accuracy
|
| 43 |
verified: true
|
| 44 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjFjZjM5NWFjN2Y5ODFiYzRjOGE3MDQ1YmFmYjlkYWRlNTdlMjlhMTY2ZmZmNGQwOWQyNmEzZDk2ZTkwZjQyMCIsInZlcnNpb24iOjF9.Fn-zemt_ghgMvekGYouH-ldScOskoGtbBJ6Mpz8vE27Eca_bOYV6DdQq4Mhd3q9eVqAVg_ybsUFAx215Pjs1Cg
|
| 45 |
+
- type: loss
|
| 46 |
value: 0.4678814027383723
|
| 47 |
+
name: Loss
|
| 48 |
verified: true
|
| 49 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWVlZjVlZjE5OGIwYTg0ZjJkZjI0NTA2MzUyNDgyY2EyODIzYzk5Zjg1OTMwMTcyODNlZjM2MWE3YWI0MDlhMCIsInZlcnNpb24iOjF9.kFNX4JZsFTeIaxw8kuuc7l5e4J6KWygm6U4RsKwEr8qZumKuJ0IDVPlNzIh0lh2z7OjbGCHsq1bRbPeJQb_bAg
|
| 50 |
- task:
|
| 51 |
type: zero-shot-classification
|
| 52 |
name: Zero-Shot Text Classification
|
|
|
|
| 56 |
config: inverse-scaling--redefine-math
|
| 57 |
split: train
|
| 58 |
metrics:
|
| 59 |
+
- type: accuracy
|
|
|
|
| 60 |
value: 0.5933333333333334
|
| 61 |
+
name: Accuracy
|
| 62 |
verified: true
|
| 63 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWVhN2NiMmM4ODAzYzliMTgwZGI0MTdjOGZiM2QwOWVkNGFiMTUwZTA1OGE5MjQwODBjNzFlNjYyMGViNjU0YSIsInZlcnNpb24iOjF9.nQ_UAPkYBSJNpyCP3Pc9ZG3Ns905vy-41HDVdxZrvrs3s5yhiDIH1Gu6bvAzTeiupPVLCu_Rpfp63e4h1sBDBg
|
| 64 |
+
- type: loss
|
| 65 |
value: 0.7308767640383708
|
| 66 |
+
name: Loss
|
| 67 |
verified: true
|
| 68 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzVlYTBjMmI4OTBlN2I2M2IxYmM4NGNhYzkxMzA1MWExOWYxZWFkMzlhZDRlYzk3MzkzOTBiOGU4YTJhNGExMyIsInZlcnNpb24iOjF9.xNkna8ygLtmV3ezRbOeYfushHT-p2Kbja3kKkGhUcfAPjKgUVe-mu9dyxez6G-fUWZHHaXuCZuZMvWqP27MGDA
|
| 69 |
- task:
|
| 70 |
type: zero-shot-classification
|
| 71 |
name: Zero-Shot Text Classification
|
|
|
|
| 75 |
config: inverse-scaling--hindsight-neglect-10shot
|
| 76 |
split: train
|
| 77 |
metrics:
|
| 78 |
+
- type: accuracy
|
|
|
|
| 79 |
value: 0.2698412698412698
|
| 80 |
+
name: Accuracy
|
| 81 |
verified: true
|
| 82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDc1MzczMmE5ZjRhNWY5YWQzYzc5NTA1OGQ2OTAyYTQzMjFhMWJjYTU2NDYxYThmNzgzMzVmMDNhZmY4ODMxYyIsInZlcnNpb24iOjF9.KtTrigpdC3RydTC0L6ueo-D8lBhsYFTt5ncvlFoDksMDKEo-OiqZj2vkPuErII9Rzr-3H-MqDVyO2UN-VDH7AA
|
| 83 |
+
- type: loss
|
| 84 |
value: 0.7708483344978756
|
| 85 |
+
name: Loss
|
| 86 |
verified: true
|
| 87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDM2NGMzZTBiMjBkNTAxMGI0MWU5YjQ4NmI4OTU5ZmNiMGE4ZTc1MTczOGRmZTVhMmI5MWNkOGZkMWVhZjQxYSIsInZlcnNpb24iOjF9.CKR5kHqjy07_Rkv2VngLM5cl3KRWQ7rHayctMbzmUzDJq39fJq-jkERNW_JZGIZnMQ4GSINGpnrgP_PE73QzBw
|
| 88 |
- task:
|
| 89 |
type: zero-shot-classification
|
| 90 |
name: Zero-Shot Text Classification
|
|
|
|
| 94 |
config: mathemakitten--winobias_antistereotype_test_cot_v1
|
| 95 |
split: test
|
| 96 |
metrics:
|
| 97 |
+
- type: accuracy
|
|
|
|
| 98 |
value: 0.3422330097087379
|
| 99 |
+
name: Accuracy
|
| 100 |
verified: true
|
| 101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjAwOGI4YTRlNmUwMTNlNTEyNjQ1YWNjOTcxOGM1N2M4YjY3ZDczMzBhYTM1Y2ZhMWNhM2U3NjQwNDc5Zjk2MiIsInZlcnNpb24iOjF9.ig0ColofjUx0XbMxwbc1n0D5ZX_Pd5csQKXt0GtcrMsgGUU1pz26ArpxcNFThaQT33-PwTLSjf7_W_wMnwDsCw
|
| 102 |
+
- type: loss
|
| 103 |
value: 1.4404955777914985
|
| 104 |
+
name: Loss
|
| 105 |
verified: true
|
| 106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2JhNGZkOTFiMjFlNGU0MGIxOGI0NTFmN2Q2ODE0ZDEwZjY2NzhmOGU4ZDY3ZDM4Y2ExNGY2MDY4ZDk5ZmFlZSIsInZlcnNpb24iOjF9.9jjeZD1rWaxyIUQO2uyJv2Yf3pNCC6fLnKWJGKSYf2nyWgThKS2JgR0jI4oFG7GtsON03tjeGvmkTdC_Fv7kCQ
|
| 107 |
- task:
|
| 108 |
type: zero-shot-classification
|
| 109 |
name: Zero-Shot Text Classification
|
|
|
|
| 113 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
| 114 |
split: test
|
| 115 |
metrics:
|
| 116 |
+
- type: accuracy
|
|
|
|
| 117 |
value: 0.30339805825242716
|
| 118 |
+
name: Accuracy
|
| 119 |
verified: true
|
| 120 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTE5ZWFlMzQ5ZWRjNDlkZGFiOThiMzRkMTQ3ZDRkYjkzYzY0OTc3NWI1MzBmZDUwZmMzYTBjZDZlOTc0ODdjNyIsInZlcnNpb24iOjF9.hvwwChF87sW6hJ-Jg_pVPagKNACcVTx8-S-_FFbWW97PHZbhtwLgef_tTCGMF2t4HdPssTr1EEgQ3DOh0RfYDg
|
| 121 |
+
- type: loss
|
| 122 |
value: 1.539870785999474
|
| 123 |
+
name: Loss
|
| 124 |
verified: true
|
| 125 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMWYwOTgxYmVhMjY3Mzg0NzA5NWY4MmQ4ZjhlYjA0M2YyZDE5MTczZDRhN2FjMjc2MGMwMjU0MDk1YTQ5MzRkZCIsInZlcnNpb24iOjF9.fmdxhv2Ern7ZnCWW19cDTAB3-NaXmYF8xkEw40W2ssxGq50WymezMuqo2ssYGmFZJiiZNPx15OjRQza6V-DDAA
|
| 126 |
- task:
|
| 127 |
type: zero-shot-classification
|
| 128 |
name: Zero-Shot Text Classification
|
|
|
|
| 132 |
config: mathemakitten--winobias_antistereotype_test_v5
|
| 133 |
split: test
|
| 134 |
metrics:
|
| 135 |
+
- type: accuracy
|
|
|
|
| 136 |
value: 0.3640776699029126
|
| 137 |
+
name: Accuracy
|
| 138 |
verified: true
|
| 139 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDA2NTZjYmM4MmYyNmM2YjA0YTA4NTVlODFlYjBhYTZlOWFmMGU5YzhlM2RkNWFhZTg1NGM4YjI4YzBmY2IxOSIsInZlcnNpb24iOjF9.6yqaB2Owq36GDA3kHfbkWyuxhmj8LhO8kEGYm7vZ6g3qfM6OkkkXFhX-D4bse-W3WILLRb4TE3xAad2EIkSLAA
|
| 140 |
+
- type: loss
|
| 141 |
value: 1.4798047741848304
|
| 142 |
+
name: Loss
|
| 143 |
verified: true
|
| 144 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTMxMzVmYThkNmU4ODhmNDgwZWM5ZjM2ZjFkODBjYTY1OGFiNDIwZTM4NDlmMTA4N2Q5ZTk4MThhMzVhN2RjNCIsInZlcnNpb24iOjF9.4i_6ZOjSLyMoPl3BlNMQJ3a1uRYcVpdyaEucECvzJ9786tUQ-RZ-6guKy2-hiZI3DKa1gsks9nPFfeRhLJyiBA
|
| 145 |
---
|
| 146 |
|
| 147 |
# OPT : Open Pre-trained Transformer Language Models
|