astroza's picture
Update leaderboard configuration and results processing for Chilean Spanish ASR evaluation
13a06cd
raw
history blame
4.41 kB
import gradio as gr
import pandas as pd
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
INTRODUCTION_TEXT,
ABOUT_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
def load_results():
"""Load and process results from CSV file"""
try:
df = pd.read_csv("results.csv")
# Get WER by dataset for each model
wer_by_dataset = df.pivot_table(
index='model_id',
columns='dataset',
values='wer',
aggfunc='mean'
).round(2)
# Calculate overall average WER
wer_by_dataset['Average WER'] = df.groupby('model_id')['wer'].mean().round(2)
# Calculate RTFx properly: sum(total_audio_length) / sum(total_time)
audio_time_sums = df.groupby('model_id').agg({
'total_audio_length': 'sum',
'total_time': 'sum'
})
rtfx_calculated = (audio_time_sums['total_audio_length'] / audio_time_sums['total_time']).round(2)
# Combine all metrics
model_stats = wer_by_dataset.copy()
model_stats['RTFx'] = rtfx_calculated
# Set RTFx to NA for ElevenLabs (API-based, not local model)
elevenlabs_mask = model_stats.index.str.contains('elevenlabs', case=False, na=False)
model_stats.loc[elevenlabs_mask, 'RTFx'] = 'N/A'
# Sort by average WER (lower is better)
model_stats = model_stats.sort_values('Average WER')
# Reset index to make model_id a column
model_stats = model_stats.reset_index()
# Reorder columns: Model, Average WER first, then Datarisas, then other datasets, then RTFx
dataset_columns = [col for col in model_stats.columns if col not in ['model_id', 'Average WER', 'RTFx']]
# Put datarisas first, then other datasets
datarisas_col = [col for col in dataset_columns if 'datarisas' in col.lower()]
other_dataset_cols = [col for col in dataset_columns if 'datarisas' not in col.lower()]
ordered_dataset_cols = datarisas_col + other_dataset_cols
new_column_order = ['model_id', 'Average WER'] + ordered_dataset_cols + ['RTFx']
model_stats = model_stats[new_column_order]
# Convert model names to appropriate links
def create_model_link(model_name):
if 'elevenlabs' in model_name.lower():
return f'<a href="https://elevenlabs.io/speech-to-text" target="_blank">{model_name}</a>'
else:
return f'<a href="https://huggingface.co/{model_name}" target="_blank">{model_name}</a>'
model_stats['model_id'] = model_stats['model_id'].apply(create_model_link)
# Rename columns for better display
column_mapping = {'model_id': 'Model', 'Average WER': 'Average WER ⬇️', 'RTFx': 'RTFx ⬆️'}
# Add arrows to dataset WER columns
for col in dataset_columns:
column_mapping[col] = f'{col.replace("_", " ").title()} WER ⬇️'
model_stats = model_stats.rename(columns=column_mapping)
return model_stats
except FileNotFoundError:
# Return empty dataframe if CSV doesn't exist
return pd.DataFrame(columns=['Model', 'Average WER ⬇️', 'RTFx ⬆️'])
# Load results
leaderboard_df = load_results()
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Chilean Spanish ASR Leaderboard", elem_id="leaderboard-tab", id=0):
gr.Dataframe(
value=leaderboard_df,
interactive=False,
wrap=True,
datatype=["markdown"] + ["number"] * (len(leaderboard_df.columns) - 1)
)
with gr.TabItem("πŸ“ About", elem_id="about-tab", id=1):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
demo.launch()