Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
8f4448c
1
Parent(s):
e223525
Display ASR-WER in app
Browse files
app.py
CHANGED
|
@@ -60,6 +60,17 @@ METRICS = {
|
|
| 60 |
between predicted and actual text. Higher scores indicate better language understanding.
|
| 61 |
""",
|
| 62 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
}
|
| 64 |
|
| 65 |
|
|
@@ -195,6 +206,8 @@ def create_model_comparison_plot(metric):
|
|
| 195 |
for lang in top_languages:
|
| 196 |
for score in lang["scores"]:
|
| 197 |
# Get the value directly using the field name
|
|
|
|
|
|
|
| 198 |
value = score[metric["field_name"]]
|
| 199 |
if value is not None:
|
| 200 |
scores_flat.append(
|
|
@@ -254,15 +267,18 @@ def create_language_stats_df(metric):
|
|
| 254 |
"Overall": round(lang["overall_score"], 3)
|
| 255 |
if lang["overall_score"] is not None
|
| 256 |
else "N/A",
|
| 257 |
-
"
|
| 258 |
if lang["mt_bleu"] is not None
|
| 259 |
else "N/A",
|
| 260 |
-
"
|
| 261 |
if lang["cls_acc"] is not None
|
| 262 |
else "N/A",
|
| 263 |
"MLM": round(lang["mlm_chrf"], 3)
|
| 264 |
if lang["mlm_chrf"] is not None
|
| 265 |
else "N/A",
|
|
|
|
|
|
|
|
|
|
| 266 |
"Best Model": model_link,
|
| 267 |
"CommonVoice Hours": commonvoice_link,
|
| 268 |
}
|
|
@@ -299,7 +315,7 @@ def create_scatter_plot(metric):
|
|
| 299 |
scores = [
|
| 300 |
score[metric["field_name"]]
|
| 301 |
for score in lang["scores"]
|
| 302 |
-
if score[metric["field_name"]] is not None
|
| 303 |
]
|
| 304 |
if scores: # Only include if we have valid scores
|
| 305 |
avg_score = sum(scores) / len(scores)
|
|
|
|
| 60 |
between predicted and actual text. Higher scores indicate better language understanding.
|
| 61 |
""",
|
| 62 |
},
|
| 63 |
+
"asr_wer": {
|
| 64 |
+
"display_name": "Automatic Speech Recognition (WER)",
|
| 65 |
+
"field_name": "asr_wer",
|
| 66 |
+
"label": "WER",
|
| 67 |
+
"explanation": """
|
| 68 |
+
**Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
|
| 69 |
+
It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
|
| 70 |
+
transcription into the reference text, divided by the number of words in the reference.
|
| 71 |
+
Lower scores indicate better performance, with 0 being perfect transcription.
|
| 72 |
+
""",
|
| 73 |
+
},
|
| 74 |
}
|
| 75 |
|
| 76 |
|
|
|
|
| 206 |
for lang in top_languages:
|
| 207 |
for score in lang["scores"]:
|
| 208 |
# Get the value directly using the field name
|
| 209 |
+
if metric["field_name"] not in score:
|
| 210 |
+
continue
|
| 211 |
value = score[metric["field_name"]]
|
| 212 |
if value is not None:
|
| 213 |
scores_flat.append(
|
|
|
|
| 267 |
"Overall": round(lang["overall_score"], 3)
|
| 268 |
if lang["overall_score"] is not None
|
| 269 |
else "N/A",
|
| 270 |
+
"Translation": round(lang["mt_bleu"], 3)
|
| 271 |
if lang["mt_bleu"] is not None
|
| 272 |
else "N/A",
|
| 273 |
+
"Classification": round(lang["cls_acc"], 3)
|
| 274 |
if lang["cls_acc"] is not None
|
| 275 |
else "N/A",
|
| 276 |
"MLM": round(lang["mlm_chrf"], 3)
|
| 277 |
if lang["mlm_chrf"] is not None
|
| 278 |
else "N/A",
|
| 279 |
+
"ASR": round(lang["asr_wer"], 3)
|
| 280 |
+
if lang["asr_wer"] is not None
|
| 281 |
+
else "N/A",
|
| 282 |
"Best Model": model_link,
|
| 283 |
"CommonVoice Hours": commonvoice_link,
|
| 284 |
}
|
|
|
|
| 315 |
scores = [
|
| 316 |
score[metric["field_name"]]
|
| 317 |
for score in lang["scores"]
|
| 318 |
+
if metric["field_name"] in score and score[metric["field_name"]] is not None
|
| 319 |
]
|
| 320 |
if scores: # Only include if we have valid scores
|
| 321 |
avg_score = sum(scores) / len(scores)
|