Commit
·
e02c7b7
1
Parent(s):
718da24
Update README.md
Browse files
README.md
CHANGED
|
@@ -51,7 +51,7 @@ model-index:
|
|
| 51 |
verified: false
|
| 52 |
- name: pass@100
|
| 53 |
type: pass@100
|
| 54 |
-
value: 0.
|
| 55 |
- task:
|
| 56 |
type: text-generation
|
| 57 |
dataset:
|
|
@@ -68,7 +68,7 @@ model-index:
|
|
| 68 |
verified: false
|
| 69 |
- name: pass@100
|
| 70 |
type: pass@100
|
| 71 |
-
value: 0.
|
| 72 |
- task:
|
| 73 |
type: text-generation
|
| 74 |
dataset:
|
|
@@ -85,7 +85,7 @@ model-index:
|
|
| 85 |
verified: false
|
| 86 |
- name: pass@100
|
| 87 |
type: pass@100
|
| 88 |
-
value: 0.
|
| 89 |
verified: false
|
| 90 |
- task:
|
| 91 |
type: text-generation
|
|
@@ -127,17 +127,9 @@ model-index:
|
|
| 127 |
type: loubnabnl/humaneval_infilling
|
| 128 |
name: HumanEval FIM (Python)
|
| 129 |
metrics:
|
| 130 |
-
- name:
|
| 131 |
-
type:
|
| 132 |
-
value: 0.
|
| 133 |
-
verified: false
|
| 134 |
-
- name: pass@10
|
| 135 |
-
type: pass@10
|
| 136 |
-
value: 0.0
|
| 137 |
-
verified: false
|
| 138 |
-
- name: pass@100
|
| 139 |
-
type: pass@100
|
| 140 |
-
value: 0.0
|
| 141 |
verified: false
|
| 142 |
- task:
|
| 143 |
type: text-generation
|
|
@@ -145,34 +137,20 @@ model-index:
|
|
| 145 |
type: nuprl/MultiPL-E
|
| 146 |
name: MultiPL HumanEval FIM (Java)
|
| 147 |
metrics:
|
| 148 |
-
- name:
|
| 149 |
-
type:
|
| 150 |
-
value: 0.
|
| 151 |
verified: false
|
| 152 |
-
- name: pass@10
|
| 153 |
-
type: pass@10
|
| 154 |
-
value: 0.0
|
| 155 |
-
verified: false
|
| 156 |
-
- name: pass@100
|
| 157 |
-
type: pass@100
|
| 158 |
-
value: 0.0
|
| 159 |
- task:
|
| 160 |
type: text-generation
|
| 161 |
dataset:
|
| 162 |
type: nuprl/MultiPL-E
|
| 163 |
name: MultiPL HumanEval FIM (JavaScript)
|
| 164 |
metrics:
|
| 165 |
-
- name:
|
| 166 |
-
type:
|
| 167 |
-
value: 0.
|
| 168 |
verified: false
|
| 169 |
-
- name: pass@10
|
| 170 |
-
type: pass@10
|
| 171 |
-
value: 0.0
|
| 172 |
-
verified: false
|
| 173 |
-
- name: pass@100
|
| 174 |
-
type: pass@100
|
| 175 |
-
value: 0.0
|
| 176 |
- task:
|
| 177 |
type: text-generation
|
| 178 |
dataset:
|
|
@@ -181,7 +159,7 @@ model-index:
|
|
| 181 |
metrics:
|
| 182 |
- name: BLEU
|
| 183 |
type: bleu
|
| 184 |
-
value:
|
| 185 |
verified: false
|
| 186 |
---
|
| 187 |
|
|
|
|
| 51 |
verified: false
|
| 52 |
- name: pass@100
|
| 53 |
type: pass@100
|
| 54 |
+
value: 0.41
|
| 55 |
- task:
|
| 56 |
type: text-generation
|
| 57 |
dataset:
|
|
|
|
| 68 |
verified: false
|
| 69 |
- name: pass@100
|
| 70 |
type: pass@100
|
| 71 |
+
value: 0.47
|
| 72 |
- task:
|
| 73 |
type: text-generation
|
| 74 |
dataset:
|
|
|
|
| 85 |
verified: false
|
| 86 |
- name: pass@100
|
| 87 |
type: pass@100
|
| 88 |
+
value: 0.49
|
| 89 |
verified: false
|
| 90 |
- task:
|
| 91 |
type: text-generation
|
|
|
|
| 127 |
type: loubnabnl/humaneval_infilling
|
| 128 |
name: HumanEval FIM (Python)
|
| 129 |
metrics:
|
| 130 |
+
- name: single_line
|
| 131 |
+
type: exact_match
|
| 132 |
+
value: 0.44
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
verified: false
|
| 134 |
- task:
|
| 135 |
type: text-generation
|
|
|
|
| 137 |
type: nuprl/MultiPL-E
|
| 138 |
name: MultiPL HumanEval FIM (Java)
|
| 139 |
metrics:
|
| 140 |
+
- name: single_line
|
| 141 |
+
type: exact_match
|
| 142 |
+
value: 0.62
|
| 143 |
verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
- task:
|
| 145 |
type: text-generation
|
| 146 |
dataset:
|
| 147 |
type: nuprl/MultiPL-E
|
| 148 |
name: MultiPL HumanEval FIM (JavaScript)
|
| 149 |
metrics:
|
| 150 |
+
- name: single_line
|
| 151 |
+
type: exact_match
|
| 152 |
+
value: 0.60
|
| 153 |
verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
- task:
|
| 155 |
type: text-generation
|
| 156 |
dataset:
|
|
|
|
| 159 |
metrics:
|
| 160 |
- name: BLEU
|
| 161 |
type: bleu
|
| 162 |
+
value: 18.13
|
| 163 |
verified: false
|
| 164 |
---
|
| 165 |
|