Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).

Files changed (1) hide show

README.md +44 -30

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 language: en
-inference: false
 tags:
 - opt
 - text-generation
-license: other
 commercial: false
 model-index:
 - name: inverse-scaling/opt-13b_eval
@@ -18,14 +18,16 @@ model-index:
       config: inverse-scaling--NeQA
       split: train
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.49666666666666665
       verified: true
-    - name: Loss
-      type: loss
       value: 0.7090707456072172
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -35,14 +37,16 @@ model-index:
       config: inverse-scaling--quote-repetition
       split: train
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.8
       verified: true
-    - name: Loss
-      type: loss
       value: 0.4678814027383723
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -52,14 +56,16 @@ model-index:
       config: inverse-scaling--redefine-math
       split: train
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.5933333333333334
       verified: true
-    - name: Loss
-      type: loss
       value: 0.7308767640383708
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -69,14 +75,16 @@ model-index:
       config: inverse-scaling--hindsight-neglect-10shot
       split: train
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.2698412698412698
       verified: true
-    - name: Loss
-      type: loss
       value: 0.7708483344978756
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -86,14 +94,16 @@ model-index:
       config: mathemakitten--winobias_antistereotype_test_cot_v1
       split: test
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.3422330097087379
       verified: true
-    - name: Loss
-      type: loss
       value: 1.4404955777914985
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -103,14 +113,16 @@ model-index:
       config: mathemakitten--winobias_antistereotype_test_cot_v3
       split: test
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.30339805825242716
       verified: true
-    - name: Loss
-      type: loss
       value: 1.539870785999474
       verified: true
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
@@ -120,14 +132,16 @@ model-index:
       config: mathemakitten--winobias_antistereotype_test_v5
       split: test
     metrics:
-    - name: Accuracy
-      type: accuracy
       value: 0.3640776699029126
       verified: true
-    - name: Loss
-      type: loss
       value: 1.4798047741848304
       verified: true
 ---
 # OPT : Open Pre-trained Transformer Language Models

 ---
 language: en
+license: other
 tags:
 - opt
 - text-generation
+inference: false
 commercial: false
 model-index:
 - name: inverse-scaling/opt-13b_eval
       config: inverse-scaling--NeQA
       split: train
     metrics:
+    - type: accuracy
       value: 0.49666666666666665
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWQwNzZlNTM4ZWVjODNkOTIzNjg1NTNkNjE0MGJlMjU4ZWI3NTQzYjg4YTY3MDU2MGViYTYyYjZlZDc0NzQzNCIsInZlcnNpb24iOjF9.qNBGm2Mc3OKjadswivJnO1Lul0NeAjGJe-2FfO57phNPMdgp-rDkTl0YMqC1Rljp8BjT4egJ8IdEQgynUE_hDg
+    - type: loss
       value: 0.7090707456072172
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMmY2NTAxOTQ3MmUwZjgxZGM0NDU1YmQzNmRmMTk3MTZhM2IxM2EwYmYxNzJjODM4MWMxNWQwOTczZWRiMGU1NyIsInZlcnNpb24iOjF9.rni9n_PdKnee5J_sMwlS0W7QWfhqlAXX6S4dUAakGQFW10zLDBb2pPfkKdSYz956yyTMrKBX0ZYT2uQGWxurAg
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: inverse-scaling--quote-repetition
       split: train
     metrics:
+    - type: accuracy
       value: 0.8
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjFjZjM5NWFjN2Y5ODFiYzRjOGE3MDQ1YmFmYjlkYWRlNTdlMjlhMTY2ZmZmNGQwOWQyNmEzZDk2ZTkwZjQyMCIsInZlcnNpb24iOjF9.Fn-zemt_ghgMvekGYouH-ldScOskoGtbBJ6Mpz8vE27Eca_bOYV6DdQq4Mhd3q9eVqAVg_ybsUFAx215Pjs1Cg
+    - type: loss
       value: 0.4678814027383723
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWVlZjVlZjE5OGIwYTg0ZjJkZjI0NTA2MzUyNDgyY2EyODIzYzk5Zjg1OTMwMTcyODNlZjM2MWE3YWI0MDlhMCIsInZlcnNpb24iOjF9.kFNX4JZsFTeIaxw8kuuc7l5e4J6KWygm6U4RsKwEr8qZumKuJ0IDVPlNzIh0lh2z7OjbGCHsq1bRbPeJQb_bAg
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: inverse-scaling--redefine-math
       split: train
     metrics:
+    - type: accuracy
       value: 0.5933333333333334
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWVhN2NiMmM4ODAzYzliMTgwZGI0MTdjOGZiM2QwOWVkNGFiMTUwZTA1OGE5MjQwODBjNzFlNjYyMGViNjU0YSIsInZlcnNpb24iOjF9.nQ_UAPkYBSJNpyCP3Pc9ZG3Ns905vy-41HDVdxZrvrs3s5yhiDIH1Gu6bvAzTeiupPVLCu_Rpfp63e4h1sBDBg
+    - type: loss
       value: 0.7308767640383708
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzVlYTBjMmI4OTBlN2I2M2IxYmM4NGNhYzkxMzA1MWExOWYxZWFkMzlhZDRlYzk3MzkzOTBiOGU4YTJhNGExMyIsInZlcnNpb24iOjF9.xNkna8ygLtmV3ezRbOeYfushHT-p2Kbja3kKkGhUcfAPjKgUVe-mu9dyxez6G-fUWZHHaXuCZuZMvWqP27MGDA
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: inverse-scaling--hindsight-neglect-10shot
       split: train
     metrics:
+    - type: accuracy
       value: 0.2698412698412698
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDc1MzczMmE5ZjRhNWY5YWQzYzc5NTA1OGQ2OTAyYTQzMjFhMWJjYTU2NDYxYThmNzgzMzVmMDNhZmY4ODMxYyIsInZlcnNpb24iOjF9.KtTrigpdC3RydTC0L6ueo-D8lBhsYFTt5ncvlFoDksMDKEo-OiqZj2vkPuErII9Rzr-3H-MqDVyO2UN-VDH7AA
+    - type: loss
       value: 0.7708483344978756
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDM2NGMzZTBiMjBkNTAxMGI0MWU5YjQ4NmI4OTU5ZmNiMGE4ZTc1MTczOGRmZTVhMmI5MWNkOGZkMWVhZjQxYSIsInZlcnNpb24iOjF9.CKR5kHqjy07_Rkv2VngLM5cl3KRWQ7rHayctMbzmUzDJq39fJq-jkERNW_JZGIZnMQ4GSINGpnrgP_PE73QzBw
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: mathemakitten--winobias_antistereotype_test_cot_v1
       split: test
     metrics:
+    - type: accuracy
       value: 0.3422330097087379
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjAwOGI4YTRlNmUwMTNlNTEyNjQ1YWNjOTcxOGM1N2M4YjY3ZDczMzBhYTM1Y2ZhMWNhM2U3NjQwNDc5Zjk2MiIsInZlcnNpb24iOjF9.ig0ColofjUx0XbMxwbc1n0D5ZX_Pd5csQKXt0GtcrMsgGUU1pz26ArpxcNFThaQT33-PwTLSjf7_W_wMnwDsCw
+    - type: loss
       value: 1.4404955777914985
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2JhNGZkOTFiMjFlNGU0MGIxOGI0NTFmN2Q2ODE0ZDEwZjY2NzhmOGU4ZDY3ZDM4Y2ExNGY2MDY4ZDk5ZmFlZSIsInZlcnNpb24iOjF9.9jjeZD1rWaxyIUQO2uyJv2Yf3pNCC6fLnKWJGKSYf2nyWgThKS2JgR0jI4oFG7GtsON03tjeGvmkTdC_Fv7kCQ
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: mathemakitten--winobias_antistereotype_test_cot_v3
       split: test
     metrics:
+    - type: accuracy
       value: 0.30339805825242716
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTE5ZWFlMzQ5ZWRjNDlkZGFiOThiMzRkMTQ3ZDRkYjkzYzY0OTc3NWI1MzBmZDUwZmMzYTBjZDZlOTc0ODdjNyIsInZlcnNpb24iOjF9.hvwwChF87sW6hJ-Jg_pVPagKNACcVTx8-S-_FFbWW97PHZbhtwLgef_tTCGMF2t4HdPssTr1EEgQ3DOh0RfYDg
+    - type: loss
       value: 1.539870785999474
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMWYwOTgxYmVhMjY3Mzg0NzA5NWY4MmQ4ZjhlYjA0M2YyZDE5MTczZDRhN2FjMjc2MGMwMjU0MDk1YTQ5MzRkZCIsInZlcnNpb24iOjF9.fmdxhv2Ern7ZnCWW19cDTAB3-NaXmYF8xkEw40W2ssxGq50WymezMuqo2ssYGmFZJiiZNPx15OjRQza6V-DDAA
   - task:
       type: zero-shot-classification
       name: Zero-Shot Text Classification
       config: mathemakitten--winobias_antistereotype_test_v5
       split: test
     metrics:
+    - type: accuracy
       value: 0.3640776699029126
+      name: Accuracy
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDA2NTZjYmM4MmYyNmM2YjA0YTA4NTVlODFlYjBhYTZlOWFmMGU5YzhlM2RkNWFhZTg1NGM4YjI4YzBmY2IxOSIsInZlcnNpb24iOjF9.6yqaB2Owq36GDA3kHfbkWyuxhmj8LhO8kEGYm7vZ6g3qfM6OkkkXFhX-D4bse-W3WILLRb4TE3xAad2EIkSLAA
+    - type: loss
       value: 1.4798047741848304
+      name: Loss
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTMxMzVmYThkNmU4ODhmNDgwZWM5ZjM2ZjFkODBjYTY1OGFiNDIwZTM4NDlmMTA4N2Q5ZTk4MThhMzVhN2RjNCIsInZlcnNpb24iOjF9.4i_6ZOjSLyMoPl3BlNMQJ3a1uRYcVpdyaEucECvzJ9786tUQ-RZ-6guKy2-hiZI3DKa1gsks9nPFfeRhLJyiBA
 ---
 # OPT : Open Pre-trained Transformer Language Models