Commit 
							
							·
						
						fdc3b93
	
1
								Parent(s):
							
							f36b058
								
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
Browse filesBeep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
    	
        README.md
    CHANGED
    
    | @@ -1,10 +1,10 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
             
            language: en
         | 
| 3 | 
            -
             | 
| 4 | 
             
            tags:
         | 
| 5 | 
             
            - opt
         | 
| 6 | 
             
            - text-generation
         | 
| 7 | 
            -
             | 
| 8 | 
             
            commercial: false
         | 
| 9 | 
             
            model-index:
         | 
| 10 | 
             
            - name: inverse-scaling/opt-13b_eval
         | 
| @@ -18,14 +18,16 @@ model-index: | |
| 18 | 
             
                  config: inverse-scaling--NeQA
         | 
| 19 | 
             
                  split: train
         | 
| 20 | 
             
                metrics:
         | 
| 21 | 
            -
                -  | 
| 22 | 
            -
                  type: accuracy
         | 
| 23 | 
             
                  value: 0.49666666666666665
         | 
|  | |
| 24 | 
             
                  verified: true
         | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
             
                  value: 0.7090707456072172
         | 
|  | |
| 28 | 
             
                  verified: true
         | 
|  | |
| 29 | 
             
              - task:
         | 
| 30 | 
             
                  type: zero-shot-classification
         | 
| 31 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -35,14 +37,16 @@ model-index: | |
| 35 | 
             
                  config: inverse-scaling--quote-repetition
         | 
| 36 | 
             
                  split: train
         | 
| 37 | 
             
                metrics:
         | 
| 38 | 
            -
                -  | 
| 39 | 
            -
                  type: accuracy
         | 
| 40 | 
             
                  value: 0.8
         | 
|  | |
| 41 | 
             
                  verified: true
         | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
             
                  value: 0.4678814027383723
         | 
|  | |
| 45 | 
             
                  verified: true
         | 
|  | |
| 46 | 
             
              - task:
         | 
| 47 | 
             
                  type: zero-shot-classification
         | 
| 48 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -52,14 +56,16 @@ model-index: | |
| 52 | 
             
                  config: inverse-scaling--redefine-math
         | 
| 53 | 
             
                  split: train
         | 
| 54 | 
             
                metrics:
         | 
| 55 | 
            -
                -  | 
| 56 | 
            -
                  type: accuracy
         | 
| 57 | 
             
                  value: 0.5933333333333334
         | 
|  | |
| 58 | 
             
                  verified: true
         | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
             
                  value: 0.7308767640383708
         | 
|  | |
| 62 | 
             
                  verified: true
         | 
|  | |
| 63 | 
             
              - task:
         | 
| 64 | 
             
                  type: zero-shot-classification
         | 
| 65 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -69,14 +75,16 @@ model-index: | |
| 69 | 
             
                  config: inverse-scaling--hindsight-neglect-10shot
         | 
| 70 | 
             
                  split: train
         | 
| 71 | 
             
                metrics:
         | 
| 72 | 
            -
                -  | 
| 73 | 
            -
                  type: accuracy
         | 
| 74 | 
             
                  value: 0.2698412698412698
         | 
|  | |
| 75 | 
             
                  verified: true
         | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
             
                  value: 0.7708483344978756
         | 
|  | |
| 79 | 
             
                  verified: true
         | 
|  | |
| 80 | 
             
              - task:
         | 
| 81 | 
             
                  type: zero-shot-classification
         | 
| 82 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -86,14 +94,16 @@ model-index: | |
| 86 | 
             
                  config: mathemakitten--winobias_antistereotype_test_cot_v1
         | 
| 87 | 
             
                  split: test
         | 
| 88 | 
             
                metrics:
         | 
| 89 | 
            -
                -  | 
| 90 | 
            -
                  type: accuracy
         | 
| 91 | 
             
                  value: 0.3422330097087379
         | 
|  | |
| 92 | 
             
                  verified: true
         | 
| 93 | 
            -
             | 
| 94 | 
            -
             | 
| 95 | 
             
                  value: 1.4404955777914985
         | 
|  | |
| 96 | 
             
                  verified: true
         | 
|  | |
| 97 | 
             
              - task:
         | 
| 98 | 
             
                  type: zero-shot-classification
         | 
| 99 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -103,14 +113,16 @@ model-index: | |
| 103 | 
             
                  config: mathemakitten--winobias_antistereotype_test_cot_v3
         | 
| 104 | 
             
                  split: test
         | 
| 105 | 
             
                metrics:
         | 
| 106 | 
            -
                -  | 
| 107 | 
            -
                  type: accuracy
         | 
| 108 | 
             
                  value: 0.30339805825242716
         | 
|  | |
| 109 | 
             
                  verified: true
         | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
             
                  value: 1.539870785999474
         | 
|  | |
| 113 | 
             
                  verified: true
         | 
|  | |
| 114 | 
             
              - task:
         | 
| 115 | 
             
                  type: zero-shot-classification
         | 
| 116 | 
             
                  name: Zero-Shot Text Classification
         | 
| @@ -120,14 +132,16 @@ model-index: | |
| 120 | 
             
                  config: mathemakitten--winobias_antistereotype_test_v5
         | 
| 121 | 
             
                  split: test
         | 
| 122 | 
             
                metrics:
         | 
| 123 | 
            -
                -  | 
| 124 | 
            -
                  type: accuracy
         | 
| 125 | 
             
                  value: 0.3640776699029126
         | 
|  | |
| 126 | 
             
                  verified: true
         | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
             
                  value: 1.4798047741848304
         | 
|  | |
| 130 | 
             
                  verified: true
         | 
|  | |
| 131 | 
             
            ---
         | 
| 132 |  | 
| 133 | 
             
            # OPT : Open Pre-trained Transformer Language Models
         | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
             
            language: en
         | 
| 3 | 
            +
            license: other
         | 
| 4 | 
             
            tags:
         | 
| 5 | 
             
            - opt
         | 
| 6 | 
             
            - text-generation
         | 
| 7 | 
            +
            inference: false
         | 
| 8 | 
             
            commercial: false
         | 
| 9 | 
             
            model-index:
         | 
| 10 | 
             
            - name: inverse-scaling/opt-13b_eval
         | 
|  | |
| 18 | 
             
                  config: inverse-scaling--NeQA
         | 
| 19 | 
             
                  split: train
         | 
| 20 | 
             
                metrics:
         | 
| 21 | 
            +
                - type: accuracy
         | 
|  | |
| 22 | 
             
                  value: 0.49666666666666665
         | 
| 23 | 
            +
                  name: Accuracy
         | 
| 24 | 
             
                  verified: true
         | 
| 25 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWQwNzZlNTM4ZWVjODNkOTIzNjg1NTNkNjE0MGJlMjU4ZWI3NTQzYjg4YTY3MDU2MGViYTYyYjZlZDc0NzQzNCIsInZlcnNpb24iOjF9.qNBGm2Mc3OKjadswivJnO1Lul0NeAjGJe-2FfO57phNPMdgp-rDkTl0YMqC1Rljp8BjT4egJ8IdEQgynUE_hDg
         | 
| 26 | 
            +
                - type: loss
         | 
| 27 | 
             
                  value: 0.7090707456072172
         | 
| 28 | 
            +
                  name: Loss
         | 
| 29 | 
             
                  verified: true
         | 
| 30 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMmY2NTAxOTQ3MmUwZjgxZGM0NDU1YmQzNmRmMTk3MTZhM2IxM2EwYmYxNzJjODM4MWMxNWQwOTczZWRiMGU1NyIsInZlcnNpb24iOjF9.rni9n_PdKnee5J_sMwlS0W7QWfhqlAXX6S4dUAakGQFW10zLDBb2pPfkKdSYz956yyTMrKBX0ZYT2uQGWxurAg
         | 
| 31 | 
             
              - task:
         | 
| 32 | 
             
                  type: zero-shot-classification
         | 
| 33 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 37 | 
             
                  config: inverse-scaling--quote-repetition
         | 
| 38 | 
             
                  split: train
         | 
| 39 | 
             
                metrics:
         | 
| 40 | 
            +
                - type: accuracy
         | 
|  | |
| 41 | 
             
                  value: 0.8
         | 
| 42 | 
            +
                  name: Accuracy
         | 
| 43 | 
             
                  verified: true
         | 
| 44 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjFjZjM5NWFjN2Y5ODFiYzRjOGE3MDQ1YmFmYjlkYWRlNTdlMjlhMTY2ZmZmNGQwOWQyNmEzZDk2ZTkwZjQyMCIsInZlcnNpb24iOjF9.Fn-zemt_ghgMvekGYouH-ldScOskoGtbBJ6Mpz8vE27Eca_bOYV6DdQq4Mhd3q9eVqAVg_ybsUFAx215Pjs1Cg
         | 
| 45 | 
            +
                - type: loss
         | 
| 46 | 
             
                  value: 0.4678814027383723
         | 
| 47 | 
            +
                  name: Loss
         | 
| 48 | 
             
                  verified: true
         | 
| 49 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWVlZjVlZjE5OGIwYTg0ZjJkZjI0NTA2MzUyNDgyY2EyODIzYzk5Zjg1OTMwMTcyODNlZjM2MWE3YWI0MDlhMCIsInZlcnNpb24iOjF9.kFNX4JZsFTeIaxw8kuuc7l5e4J6KWygm6U4RsKwEr8qZumKuJ0IDVPlNzIh0lh2z7OjbGCHsq1bRbPeJQb_bAg
         | 
| 50 | 
             
              - task:
         | 
| 51 | 
             
                  type: zero-shot-classification
         | 
| 52 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 56 | 
             
                  config: inverse-scaling--redefine-math
         | 
| 57 | 
             
                  split: train
         | 
| 58 | 
             
                metrics:
         | 
| 59 | 
            +
                - type: accuracy
         | 
|  | |
| 60 | 
             
                  value: 0.5933333333333334
         | 
| 61 | 
            +
                  name: Accuracy
         | 
| 62 | 
             
                  verified: true
         | 
| 63 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWVhN2NiMmM4ODAzYzliMTgwZGI0MTdjOGZiM2QwOWVkNGFiMTUwZTA1OGE5MjQwODBjNzFlNjYyMGViNjU0YSIsInZlcnNpb24iOjF9.nQ_UAPkYBSJNpyCP3Pc9ZG3Ns905vy-41HDVdxZrvrs3s5yhiDIH1Gu6bvAzTeiupPVLCu_Rpfp63e4h1sBDBg
         | 
| 64 | 
            +
                - type: loss
         | 
| 65 | 
             
                  value: 0.7308767640383708
         | 
| 66 | 
            +
                  name: Loss
         | 
| 67 | 
             
                  verified: true
         | 
| 68 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzVlYTBjMmI4OTBlN2I2M2IxYmM4NGNhYzkxMzA1MWExOWYxZWFkMzlhZDRlYzk3MzkzOTBiOGU4YTJhNGExMyIsInZlcnNpb24iOjF9.xNkna8ygLtmV3ezRbOeYfushHT-p2Kbja3kKkGhUcfAPjKgUVe-mu9dyxez6G-fUWZHHaXuCZuZMvWqP27MGDA
         | 
| 69 | 
             
              - task:
         | 
| 70 | 
             
                  type: zero-shot-classification
         | 
| 71 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 75 | 
             
                  config: inverse-scaling--hindsight-neglect-10shot
         | 
| 76 | 
             
                  split: train
         | 
| 77 | 
             
                metrics:
         | 
| 78 | 
            +
                - type: accuracy
         | 
|  | |
| 79 | 
             
                  value: 0.2698412698412698
         | 
| 80 | 
            +
                  name: Accuracy
         | 
| 81 | 
             
                  verified: true
         | 
| 82 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDc1MzczMmE5ZjRhNWY5YWQzYzc5NTA1OGQ2OTAyYTQzMjFhMWJjYTU2NDYxYThmNzgzMzVmMDNhZmY4ODMxYyIsInZlcnNpb24iOjF9.KtTrigpdC3RydTC0L6ueo-D8lBhsYFTt5ncvlFoDksMDKEo-OiqZj2vkPuErII9Rzr-3H-MqDVyO2UN-VDH7AA
         | 
| 83 | 
            +
                - type: loss
         | 
| 84 | 
             
                  value: 0.7708483344978756
         | 
| 85 | 
            +
                  name: Loss
         | 
| 86 | 
             
                  verified: true
         | 
| 87 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDM2NGMzZTBiMjBkNTAxMGI0MWU5YjQ4NmI4OTU5ZmNiMGE4ZTc1MTczOGRmZTVhMmI5MWNkOGZkMWVhZjQxYSIsInZlcnNpb24iOjF9.CKR5kHqjy07_Rkv2VngLM5cl3KRWQ7rHayctMbzmUzDJq39fJq-jkERNW_JZGIZnMQ4GSINGpnrgP_PE73QzBw
         | 
| 88 | 
             
              - task:
         | 
| 89 | 
             
                  type: zero-shot-classification
         | 
| 90 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 94 | 
             
                  config: mathemakitten--winobias_antistereotype_test_cot_v1
         | 
| 95 | 
             
                  split: test
         | 
| 96 | 
             
                metrics:
         | 
| 97 | 
            +
                - type: accuracy
         | 
|  | |
| 98 | 
             
                  value: 0.3422330097087379
         | 
| 99 | 
            +
                  name: Accuracy
         | 
| 100 | 
             
                  verified: true
         | 
| 101 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjAwOGI4YTRlNmUwMTNlNTEyNjQ1YWNjOTcxOGM1N2M4YjY3ZDczMzBhYTM1Y2ZhMWNhM2U3NjQwNDc5Zjk2MiIsInZlcnNpb24iOjF9.ig0ColofjUx0XbMxwbc1n0D5ZX_Pd5csQKXt0GtcrMsgGUU1pz26ArpxcNFThaQT33-PwTLSjf7_W_wMnwDsCw
         | 
| 102 | 
            +
                - type: loss
         | 
| 103 | 
             
                  value: 1.4404955777914985
         | 
| 104 | 
            +
                  name: Loss
         | 
| 105 | 
             
                  verified: true
         | 
| 106 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2JhNGZkOTFiMjFlNGU0MGIxOGI0NTFmN2Q2ODE0ZDEwZjY2NzhmOGU4ZDY3ZDM4Y2ExNGY2MDY4ZDk5ZmFlZSIsInZlcnNpb24iOjF9.9jjeZD1rWaxyIUQO2uyJv2Yf3pNCC6fLnKWJGKSYf2nyWgThKS2JgR0jI4oFG7GtsON03tjeGvmkTdC_Fv7kCQ
         | 
| 107 | 
             
              - task:
         | 
| 108 | 
             
                  type: zero-shot-classification
         | 
| 109 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 113 | 
             
                  config: mathemakitten--winobias_antistereotype_test_cot_v3
         | 
| 114 | 
             
                  split: test
         | 
| 115 | 
             
                metrics:
         | 
| 116 | 
            +
                - type: accuracy
         | 
|  | |
| 117 | 
             
                  value: 0.30339805825242716
         | 
| 118 | 
            +
                  name: Accuracy
         | 
| 119 | 
             
                  verified: true
         | 
| 120 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTE5ZWFlMzQ5ZWRjNDlkZGFiOThiMzRkMTQ3ZDRkYjkzYzY0OTc3NWI1MzBmZDUwZmMzYTBjZDZlOTc0ODdjNyIsInZlcnNpb24iOjF9.hvwwChF87sW6hJ-Jg_pVPagKNACcVTx8-S-_FFbWW97PHZbhtwLgef_tTCGMF2t4HdPssTr1EEgQ3DOh0RfYDg
         | 
| 121 | 
            +
                - type: loss
         | 
| 122 | 
             
                  value: 1.539870785999474
         | 
| 123 | 
            +
                  name: Loss
         | 
| 124 | 
             
                  verified: true
         | 
| 125 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMWYwOTgxYmVhMjY3Mzg0NzA5NWY4MmQ4ZjhlYjA0M2YyZDE5MTczZDRhN2FjMjc2MGMwMjU0MDk1YTQ5MzRkZCIsInZlcnNpb24iOjF9.fmdxhv2Ern7ZnCWW19cDTAB3-NaXmYF8xkEw40W2ssxGq50WymezMuqo2ssYGmFZJiiZNPx15OjRQza6V-DDAA
         | 
| 126 | 
             
              - task:
         | 
| 127 | 
             
                  type: zero-shot-classification
         | 
| 128 | 
             
                  name: Zero-Shot Text Classification
         | 
|  | |
| 132 | 
             
                  config: mathemakitten--winobias_antistereotype_test_v5
         | 
| 133 | 
             
                  split: test
         | 
| 134 | 
             
                metrics:
         | 
| 135 | 
            +
                - type: accuracy
         | 
|  | |
| 136 | 
             
                  value: 0.3640776699029126
         | 
| 137 | 
            +
                  name: Accuracy
         | 
| 138 | 
             
                  verified: true
         | 
| 139 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDA2NTZjYmM4MmYyNmM2YjA0YTA4NTVlODFlYjBhYTZlOWFmMGU5YzhlM2RkNWFhZTg1NGM4YjI4YzBmY2IxOSIsInZlcnNpb24iOjF9.6yqaB2Owq36GDA3kHfbkWyuxhmj8LhO8kEGYm7vZ6g3qfM6OkkkXFhX-D4bse-W3WILLRb4TE3xAad2EIkSLAA
         | 
| 140 | 
            +
                - type: loss
         | 
| 141 | 
             
                  value: 1.4798047741848304
         | 
| 142 | 
            +
                  name: Loss
         | 
| 143 | 
             
                  verified: true
         | 
| 144 | 
            +
                  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTMxMzVmYThkNmU4ODhmNDgwZWM5ZjM2ZjFkODBjYTY1OGFiNDIwZTM4NDlmMTA4N2Q5ZTk4MThhMzVhN2RjNCIsInZlcnNpb24iOjF9.4i_6ZOjSLyMoPl3BlNMQJ3a1uRYcVpdyaEucECvzJ9786tUQ-RZ-6guKy2-hiZI3DKa1gsks9nPFfeRhLJyiBA
         | 
| 145 | 
             
            ---
         | 
| 146 |  | 
| 147 | 
             
            # OPT : Open Pre-trained Transformer Language Models
         | 

