Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| INTRODUCTION_TEXT, | |
| ABOUT_TEXT, | |
| TITLE, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| def load_results(): | |
| """Load and process results from CSV file""" | |
| try: | |
| df = pd.read_csv("results.csv") | |
| # Get WER by dataset for each model | |
| wer_by_dataset = df.pivot_table( | |
| index='model_id', | |
| columns='dataset', | |
| values='wer', | |
| aggfunc='mean' | |
| ).round(2) | |
| # Calculate overall average WER | |
| wer_by_dataset['Average WER'] = df.groupby('model_id')['wer'].mean().round(2) | |
| # Calculate RTFx properly: sum(total_audio_length) / sum(total_time) | |
| audio_time_sums = df.groupby('model_id').agg({ | |
| 'total_audio_length': 'sum', | |
| 'total_time': 'sum' | |
| }) | |
| rtfx_calculated = (audio_time_sums['total_audio_length'] / audio_time_sums['total_time']).round(2) | |
| # Combine all metrics | |
| model_stats = wer_by_dataset.copy() | |
| model_stats['RTFx'] = rtfx_calculated | |
| # Set RTFx to NA for ElevenLabs (API-based, not local model) | |
| elevenlabs_mask = model_stats.index.str.contains('elevenlabs', case=False, na=False) | |
| model_stats.loc[elevenlabs_mask, 'RTFx'] = 'N/A' | |
| # Sort by average WER (lower is better) | |
| model_stats = model_stats.sort_values('Average WER') | |
| # Reset index to make model_id a column | |
| model_stats = model_stats.reset_index() | |
| # Reorder columns: Model, Average WER first, then Datarisas, then other datasets, then RTFx | |
| dataset_columns = [col for col in model_stats.columns if col not in ['model_id', 'Average WER', 'RTFx']] | |
| # Put datarisas first, then other datasets | |
| datarisas_col = [col for col in dataset_columns if 'datarisas' in col.lower()] | |
| other_dataset_cols = [col for col in dataset_columns if 'datarisas' not in col.lower()] | |
| ordered_dataset_cols = datarisas_col + other_dataset_cols | |
| new_column_order = ['model_id', 'Average WER'] + ordered_dataset_cols + ['RTFx'] | |
| model_stats = model_stats[new_column_order] | |
| # Convert model names to appropriate links | |
| def create_model_link(model_name): | |
| if 'elevenlabs' in model_name.lower(): | |
| return f'<a href="https://elevenlabs.io/speech-to-text" target="_blank">{model_name}</a>' | |
| else: | |
| return f'<a href="https://huggingface.co/{model_name}" target="_blank">{model_name}</a>' | |
| model_stats['model_id'] = model_stats['model_id'].apply(create_model_link) | |
| # Rename columns for better display | |
| column_mapping = {'model_id': 'Model', 'Average WER': 'Average WER β¬οΈ', 'RTFx': 'RTFx β¬οΈ'} | |
| # Add arrows to dataset WER columns | |
| for col in dataset_columns: | |
| column_mapping[col] = f'{col.replace("_", " ").title()} WER β¬οΈ' | |
| model_stats = model_stats.rename(columns=column_mapping) | |
| return model_stats | |
| except FileNotFoundError: | |
| # Return empty dataframe if CSV doesn't exist | |
| return pd.DataFrame(columns=['Model', 'Average WER β¬οΈ', 'RTFx β¬οΈ']) | |
| # Load results | |
| leaderboard_df = load_results() | |
| demo = gr.Blocks(css=custom_css) | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π Chilean Spanish ASR Leaderboard", elem_id="leaderboard-tab", id=0): | |
| gr.Dataframe( | |
| value=leaderboard_df, | |
| interactive=False, | |
| wrap=True, | |
| datatype=["markdown"] + ["number"] * (len(leaderboard_df.columns) - 1) | |
| ) | |
| with gr.TabItem("π About", elem_id="about-tab", id=1): | |
| gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=20, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| demo.launch() |