Spaces:
Running
Running
Added bleu prefix to return dict's key
Browse filesSometimes when using `evaluate.combine()` it's unclear which sub-metrics, the keys are from, e.g. https://www.kaggle.com/code/alvations/huggingface-evaluate-for-mt-evaluations, being explicit would help. Also added the individual ngrams values in the results so that tensorboard picks it up properly.
bleu.py
CHANGED
|
@@ -123,11 +123,20 @@ class Bleu(evaluate.Metric):
|
|
| 123 |
reference_corpus=references, translation_corpus=predictions, max_order=max_order, smooth=smooth
|
| 124 |
)
|
| 125 |
(bleu, precisions, bp, ratio, translation_length, reference_length) = score
|
| 126 |
-
|
|
|
|
| 127 |
"bleu": bleu,
|
| 128 |
-
"
|
| 129 |
-
"
|
| 130 |
-
"
|
| 131 |
-
"
|
| 132 |
-
"
|
| 133 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
reference_corpus=references, translation_corpus=predictions, max_order=max_order, smooth=smooth
|
| 124 |
)
|
| 125 |
(bleu, precisions, bp, ratio, translation_length, reference_length) = score
|
| 126 |
+
|
| 127 |
+
results = {
|
| 128 |
"bleu": bleu,
|
| 129 |
+
"bleu_precisions": precisions,
|
| 130 |
+
"bleu_brevity_penalty": bp,
|
| 131 |
+
"bleu_length_ratio": ratio,
|
| 132 |
+
"bleu_translation_length": translation_length,
|
| 133 |
+
"bleu_reference_length": reference_length,
|
| 134 |
}
|
| 135 |
+
|
| 136 |
+
# Add explicit floats values for precisions,
|
| 137 |
+
# so that tensorboard scalars automatically picks it up.
|
| 138 |
+
for n, p in enumerate(precisions, 1):
|
| 139 |
+
results[f'bleu_{n}gram_precisions'] = p
|
| 140 |
+
|
| 141 |
+
return results
|
| 142 |
+
|