Spaces:
Running
Running
Update Space (evaluate main: dfdd0cc0)
Browse files
bleu.py
CHANGED
|
@@ -57,7 +57,7 @@ _KWARGS_DESCRIPTION = """
|
|
| 57 |
Computes BLEU score of translated segments against one or more references.
|
| 58 |
Args:
|
| 59 |
predictions: list of translations to score.
|
| 60 |
-
references: list of lists of references for each translation.
|
| 61 |
tokenizer : approach used for tokenizing `predictions` and `references`.
|
| 62 |
The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
|
| 63 |
This can be replaced by any function that takes a string as input and returns a list of tokens as output.
|
|
@@ -91,12 +91,20 @@ class Bleu(evaluate.EvaluationModule):
|
|
| 91 |
description=_DESCRIPTION,
|
| 92 |
citation=_CITATION,
|
| 93 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 94 |
-
features=
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
|
| 101 |
reference_urls=[
|
| 102 |
"https://en.wikipedia.org/wiki/BLEU",
|
|
@@ -105,6 +113,10 @@ class Bleu(evaluate.EvaluationModule):
|
|
| 105 |
)
|
| 106 |
|
| 107 |
def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
references = [[tokenizer(r) for r in ref] for ref in references]
|
| 109 |
predictions = [tokenizer(p) for p in predictions]
|
| 110 |
score = compute_bleu(
|
|
|
|
| 57 |
Computes BLEU score of translated segments against one or more references.
|
| 58 |
Args:
|
| 59 |
predictions: list of translations to score.
|
| 60 |
+
references: list of lists of or just a list of references for each translation.
|
| 61 |
tokenizer : approach used for tokenizing `predictions` and `references`.
|
| 62 |
The default tokenizer is `tokenizer_13a`, a minimal tokenization approach that is equivalent to `mteval-v13a`, used by WMT.
|
| 63 |
This can be replaced by any function that takes a string as input and returns a list of tokens as output.
|
|
|
|
| 91 |
description=_DESCRIPTION,
|
| 92 |
citation=_CITATION,
|
| 93 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 94 |
+
features=[
|
| 95 |
+
datasets.Features(
|
| 96 |
+
{
|
| 97 |
+
"predictions": datasets.Value("string", id="sequence"),
|
| 98 |
+
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
|
| 99 |
+
}
|
| 100 |
+
),
|
| 101 |
+
datasets.Features(
|
| 102 |
+
{
|
| 103 |
+
"predictions": datasets.Value("string", id="sequence"),
|
| 104 |
+
"references": datasets.Value("string", id="sequence"),
|
| 105 |
+
}
|
| 106 |
+
),
|
| 107 |
+
],
|
| 108 |
codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
|
| 109 |
reference_urls=[
|
| 110 |
"https://en.wikipedia.org/wiki/BLEU",
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
def _compute(self, predictions, references, tokenizer=Tokenizer13a(), max_order=4, smooth=False):
|
| 116 |
+
# if only one reference is provided make sure we still use list of lists
|
| 117 |
+
if isinstance(references[0], str):
|
| 118 |
+
references = [[ref] for ref in references]
|
| 119 |
+
|
| 120 |
references = [[tokenizer(r) for r in ref] for ref in references]
|
| 121 |
predictions = [tokenizer(p) for p in predictions]
|
| 122 |
score = compute_bleu(
|