Spaces:
Running
Running
| import os | |
| import time | |
| import json | |
| import regex as re | |
| import gradio as gr | |
| from gradio.themes.utils.sizes import text_md | |
| from gradio_modal import Modal | |
| from content import ( | |
| HEADER_MARKDOWN, | |
| LEADERBOARD_TAB_TITLE_MARKDOWN, | |
| LEADERBOARD_TAB_BELLOW_TABLE_MARKDOWN, | |
| LEADERBOARD_TAB_BELLOW_SCATTER_PLOT_MARKDOWN, | |
| LEADERBOARD_TAB_BELLOW_HEATMAP_MARKDOWN, | |
| SUBMISSION_TAB_TITLE_MARKDOWN, | |
| MODAL_SUBMIT_MARKDOWN, | |
| SUBMISSION_DETAILS_MARKDOWN, | |
| RANKING_AFTER_SUBMISSION_MARKDOWN, | |
| MORE_DETAILS_MARKDOWN, | |
| ABOUT_MARKDOWN, | |
| ) | |
| from server import LeaderboardServer, xmlAndMarkdownEscape, xmlQuoteAttr, api, requests, check_significance_is_reachable | |
| HF_SPACE_TOKEN = os.environ["HF_SPACE_TOKEN"] | |
| HF_SPACE_ID = os.environ["HF_SPACE_ID"] | |
| # For testing purpose | |
| HF_DISABLE_SUBMIT = bool(int(os.environ.get("HF_DISABLE_SUBMIT", "0"))) | |
| from huggingface_hub import dump_environment_info | |
| dump_environment_info() | |
| print() | |
| print("= pip freeze =") | |
| print("==============") | |
| import subprocess | |
| subprocess.run(["python", "-m", "pip", "freeze"], check=True) | |
| print() | |
| leaderboard_server = LeaderboardServer() | |
| SUBMISSION_INPUTS = dict.fromkeys(( | |
| "team_name", | |
| "model_name", | |
| "model_type", | |
| "parameters", | |
| "input_length", | |
| "precision", | |
| "description", | |
| "link_to_model", | |
| "submission_file", | |
| )).keys() | |
| def on_submit_pressed(): | |
| return gr.update(value='Processing submission…', interactive=False) | |
| def validate_url(url): | |
| try: | |
| response = requests.get(url, timeout=5) | |
| except requests.exceptions.ConnectTimeout: | |
| pass | |
| except requests.exceptions.ConnectionError as e: | |
| return str(e).rsplit(":", 1)[-1].strip() | |
| except Exception as e: | |
| return str(e) | |
| else: | |
| if response.status_code != 200: | |
| return f'Failed to get <{url}>. Status code: {response.status_code}' | |
| def validate_submission_inputs(**inputs): | |
| if any(key for key, value in inputs.items() if key != "description" and value in (None, "")): | |
| raise ValueError('Please fill in all fields (only the description field is optional)') | |
| if not os.path.exists(inputs["submission_file"]): | |
| raise ValueError('File does not exist') | |
| error_msg = validate_url(inputs["link_to_model"]) | |
| if error_msg: | |
| raise ValueError(f'Link to the model is not valid: {error_msg}') | |
| if not inputs["parameters"] > 0: | |
| raise ValueError('Attribute `Parameters (B)` should be greater than zero') | |
| if not (inputs["input_length"] > 0 and inputs["input_length"] == int(inputs["input_length"])): | |
| raise ValueError('Attribute `Input length (# tokens)` should be greater than zero and integer type') | |
| def process_submission(*inputs): | |
| try: | |
| inputs = dict(zip(SUBMISSION_INPUTS, inputs)) | |
| for key in inputs: | |
| if key in ("team_name", "model_name"): | |
| inputs[key] = re.sub(r"""\s+""", " ", inputs[key]).strip() | |
| elif key in ("description", "link_to_model"): | |
| inputs[key] = inputs[key].strip() | |
| validate_submission_inputs(**inputs) | |
| metadata = SUBMISSION_INPUTS - {"submission_file"} | |
| metadata = {key: inputs[key] for key in metadata} | |
| gr.Info('Submission valid, going to the tournament…') | |
| pre_submit = leaderboard_server.prepare_model_for_submission(inputs["submission_file"], metadata) | |
| except ValueError as err: | |
| gr.Warning(str(err)) | |
| return ( | |
| gr.update(value='Pre-submit model', visible=True, interactive=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| ) | |
| except Exception as err: | |
| gr.Warning(str(err), duration=None) | |
| return ( | |
| gr.update(value='Pre-submit model', visible=True, interactive=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| ) | |
| gr.Info('You can see the results of your model below.', duration=15) | |
| if HF_DISABLE_SUBMIT: | |
| submit_prompt = gr.update(visible=False) | |
| submission_btn_yes = gr.update(visible=False) | |
| else: | |
| submit_prompt = gr.update(visible=True) | |
| submission_btn_yes = gr.update(interactive=True, visible=True) | |
| # TODO: checkbox use_corrected_p_value | |
| pre_submit_leaderboard_table = gr.update( | |
| value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS), | |
| visible=True, | |
| ) | |
| pre_submit_leaderboard_table_csv = gr.update( | |
| value=leaderboard_server.get_leaderboard_csv(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS), | |
| visible=True, | |
| ) | |
| pre_submit_model_tournament_results_table = gr.update( | |
| value=leaderboard_server.get_model_tournament_table(pre_submit.submission_id, leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS, pre_submit=pre_submit), | |
| visible=True, | |
| ) | |
| pre_submit_model_tournament_results_table_csv = gr.update( | |
| value=leaderboard_server.get_model_tournament_table_csv(pre_submit.submission_id, leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS, pre_submit=pre_submit), | |
| visible=True, | |
| ) | |
| return ( | |
| gr.update(visible=False), | |
| submit_prompt, | |
| submission_btn_yes, | |
| gr.update(interactive=True, visible=True), | |
| gr.update(visible=True), | |
| pre_submit_leaderboard_table, | |
| pre_submit_leaderboard_table_csv, | |
| pre_submit_model_tournament_results_table, | |
| pre_submit_model_tournament_results_table_csv, | |
| ) | |
| def get_submission_ids_and_titles(): | |
| with leaderboard_server.var_lock.ro: | |
| submission_ids_and_titles = [ | |
| ( | |
| leaderboard_server.submission_id_to_model_title[submission_id], | |
| submission_id, | |
| ) | |
| for submission_id in leaderboard_server.submission_ids | |
| ] | |
| submission_ids_and_titles.sort(key=lambda x: x[0].lower()) | |
| return submission_ids_and_titles | |
| def submit_results(*inputs): | |
| inputs = dict(zip(SUBMISSION_INPUTS, inputs)) | |
| with open(inputs["submission_file"]) as fp: | |
| data = json.load(fp) | |
| gr.Info('Checking if the tournament is complete…') | |
| leaderboard_server.save_model_submission(inputs["submission_file"], data=data) | |
| gr.Info('Submission successful!') | |
| # erase pre-submit | |
| submission_id = data["submission_metadata"]["submission_id"] | |
| leaderboard_server.pre_submit.pop(submission_id) | |
| with leaderboard_server.var_lock.ro: | |
| leaderboard = gr.update( | |
| value=leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_csv = gr.update( | |
| value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_scatter_plot = gr.update( | |
| value=leaderboard_server.get_leaderboard_scatter_plot(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_heatmap = gr.update( | |
| value=leaderboard_server.get_leaderboard_heatmap(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| submission_ids_and_titles = get_submission_ids_and_titles() | |
| return ( | |
| gr.update(value='Pre-submit model', visible=True, interactive=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| leaderboard, | |
| leaderboard_csv, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| gr.update(visible=False), | |
| gr.update(choices=submission_ids_and_titles), | |
| gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| gr.update(choices=submission_ids_and_titles), | |
| ) | |
| def erase_pre_submit(*inputs): | |
| inputs = dict(zip(SUBMISSION_INPUTS, inputs)) | |
| with open(inputs["submission_file"]) as fp: | |
| data = json.load(fp) | |
| submission_id = data["submission_metadata"]["submission_id"] | |
| leaderboard_server.pre_submit.pop(submission_id) | |
| return ( | |
| gr.update(value='Pre-submit model', visible=True, interactive=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| ) | |
| def fetch_model_detail(submission_id): | |
| metadata = leaderboard_server.get_model_detail(submission_id) | |
| return ( | |
| gr.update(value=metadata['description'], visible=True), | |
| gr.update(value=metadata['link_to_model'], visible=True) | |
| ) | |
| def fetch_model_tournament_results_table(submission_id, category, use_corrected_p_value): | |
| kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)] | |
| if submission_id == None or category == None: | |
| return gr.update( | |
| visible=False, | |
| ) | |
| else: | |
| return gr.update( | |
| value=leaderboard_server.get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| def fetch_model_tournament_results_table_csv(submission_id, category, use_corrected_p_value): | |
| kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)] | |
| if submission_id == None or category == None: | |
| return gr.update( | |
| visible=False, | |
| ) | |
| else: | |
| return gr.update( | |
| value=leaderboard_server.get_model_tournament_table_csv(submission_id, category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| def create_task_abbreviation_legend_table(category): | |
| task_abbreviation_legend_body = [] | |
| abbreviation2name = leaderboard_server.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS[category] | |
| for abbr, name, url in abbreviation2name.values(): | |
| task_abbreviation_legend_body.append([ | |
| xmlAndMarkdownEscape(abbr), | |
| xmlAndMarkdownEscape(name), | |
| f'<a href={xmlQuoteAttr(url)}>{xmlAndMarkdownEscape(url)}</a>', | |
| ]) | |
| return task_abbreviation_legend_body | |
| def change_leaderboard_category(category, use_corrected_p_value, selected_submission_id): | |
| if category == leaderboard_server.TASKS_CATEGORY_OVERALL: | |
| task_abbreviation_legend = gr.update( | |
| visible=False, | |
| ) | |
| tournament_results_title = gr.update( | |
| visible=False, | |
| ) | |
| tournament_results_dropdown = gr.update( | |
| visible=False, | |
| ) | |
| model_tournament_results_table = gr.update( | |
| visible=False, | |
| ) | |
| model_tournament_results_table_csv = gr.update( | |
| visible=False, | |
| ) | |
| else: | |
| task_abbreviation_legend = gr.update( | |
| value=create_task_abbreviation_legend_table(category), | |
| visible=True, | |
| ) | |
| tournament_results_title = gr.update( | |
| visible=True, | |
| ) | |
| tournament_results_dropdown = gr.update( | |
| visible=True, | |
| ) | |
| model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category, use_corrected_p_value) | |
| model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category, use_corrected_p_value) | |
| kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)] | |
| leaderboard = gr.update( | |
| value=leaderboard_server.get_leaderboard(category=category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| leaderboard_csv = gr.update( | |
| value=leaderboard_server.get_leaderboard_csv(category=category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| leaderboard_scatter_plot = gr.update( | |
| value=leaderboard_server.get_leaderboard_scatter_plot(category=category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| leaderboard_heatmap = gr.update( | |
| value=leaderboard_server.get_leaderboard_heatmap(category=category, kind_of_p_value=kind_of_p_value), | |
| visible=True, | |
| ) | |
| return ( | |
| leaderboard, | |
| leaderboard_csv, | |
| task_abbreviation_legend, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| tournament_results_title, | |
| tournament_results_dropdown, | |
| model_tournament_results_table, | |
| model_tournament_results_table_csv, | |
| ) | |
| def show_modal(): | |
| gr.Info('You are going to submit your model.', duration=5) # It is used to scroll up | |
| return gr.update(visible=True) | |
| def hide_modal(): | |
| return gr.update(visible=False) | |
| def disable_submit_buttons(): | |
| return ( | |
| gr.update(interactive=False), | |
| gr.update(interactive=False), | |
| ) | |
| def enable_submit_buttons(): | |
| return ( | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| ) | |
| def results_dataset_integrity_check(): | |
| leaderboard_server.results_dataset_integrity_check(solve=True) | |
| def update_results_dataset_integrity_solving_progress(): | |
| progress = leaderboard_server.tournament_results_integrity_solving_progress * 100 | |
| if leaderboard_server.tournament_results_corrupted: | |
| results_dataset_integrity_solving_progress = gr.update( | |
| value=f"Tournament results is preparing, please wait… {progress:.0f}% done" | |
| ) | |
| results_dataset_integrity_restart_btn = gr.update( | |
| interactive=False, | |
| visible=False, | |
| ) | |
| else: | |
| results_dataset_integrity_solving_progress = gr.update( | |
| value="Tournament results is prepared, please restart this space with the button showed below." | |
| ) | |
| results_dataset_integrity_restart_btn = gr.update( | |
| interactive=True, | |
| visible=True, | |
| ) | |
| return ( | |
| results_dataset_integrity_solving_progress, | |
| results_dataset_integrity_restart_btn, | |
| ) | |
| def on_tournament_results_corrupted(): | |
| if leaderboard_server.tournament_results_corrupted and not leaderboard_server.tournament_results_integrity_solving: | |
| results_dataset_integrity_check_btn = gr.update( | |
| interactive=True, | |
| visible=True, | |
| ) | |
| results_dataset_integrity_solving_progress = gr.update( | |
| visible=False, | |
| ) | |
| else: | |
| results_dataset_integrity_check_btn = gr.update( | |
| interactive=False, | |
| visible=False, | |
| ) | |
| results_dataset_integrity_solving_progress = gr.update( | |
| visible=True, | |
| ) | |
| return ( | |
| results_dataset_integrity_check_btn, | |
| results_dataset_integrity_solving_progress, | |
| ) | |
| def restart_space(): | |
| api.restart_space(repo_id=HF_SPACE_ID, token=HF_SPACE_TOKEN) | |
| def check_significance_is_reachable_hook(): | |
| # Due to a frequent exception on Hugging Face Space: requests.exceptions.ConnectionError("HTTPSConnectionPool(host='czechllm.fit.vutbr.cz', port=443): Max retries exceeded with url: /benczechmark-leaderboard/compare_significance/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa3bbf76320>: Failed to establish a new connection: [Errno 101] Network is unreachable'))") | |
| if check_significance_is_reachable(): | |
| print("Check significance is reachable.") | |
| return gr.update(active=False) | |
| else: | |
| print("Check significance is not reachable.") | |
| print("Restarting in 10 seconds…") | |
| time.sleep(10) | |
| restart_space() | |
| def on_application_load(): | |
| with leaderboard_server.var_lock.ro: | |
| leaderboard = gr.update( | |
| value=leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_csv = gr.update( | |
| value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_scatter_plot = gr.update( | |
| value=leaderboard_server.get_leaderboard_scatter_plot(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| leaderboard_heatmap = gr.update( | |
| value=leaderboard_server.get_leaderboard_heatmap(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| visible=True, | |
| ) | |
| submission_ids_and_titles = get_submission_ids_and_titles() | |
| return ( | |
| leaderboard, | |
| leaderboard_csv, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| gr.update(choices=submission_ids_and_titles), | |
| gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| gr.update(choices=submission_ids_and_titles), | |
| ) | |
| custom_css = """ | |
| footer {visibility: hidden} | |
| tr { | |
| background-color: var(--table-even-background-fill); | |
| font-family: "IBM Plex Mono"; | |
| } | |
| tr.row_odd { | |
| background-color: var(--table-odd-background-fill); | |
| } | |
| .leaderboard-table td:first-child p, .leaderboard-table-model-details td:first-child p { | |
| margin: 0px; | |
| } | |
| .leaderboard-table th:nth-child(10), .leaderboard-table td:nth-child(10) { | |
| border-right-width: 2px; | |
| border-right-color: var(--border-color-primary); | |
| } | |
| .leaderboard-table td:nth-child(10) p { | |
| font-weight: bolder; | |
| } | |
| .leaderboard-table tr[model_title]:hover::after, .leaderboard-table-model-details tr[model_title]:hover::after { | |
| content: attr(model_title); | |
| position: fixed; | |
| background-color: rgba(50, 50, 50, 0.9); | |
| color: white; | |
| padding: 5px; | |
| border-radius: 5px; | |
| white-space: nowrap; | |
| z-index: 10; | |
| pointer-events: none; | |
| left: var(--table-rect-left, 0px); | |
| top: var(--row-rect-bottom, 0px); | |
| transform: translateY(0px); | |
| } | |
| .gradio-bokeh { | |
| padding-top: calc(var(--block-label-margin) + 4px + 19.6px + 4px + 2px); | |
| } | |
| .leaderboard-heatmap .gradio-bokeh { | |
| justify-content: normal; | |
| } | |
| """ | |
| custom_js = """ | |
| <script> | |
| function addTitleForEachRowOfLeaderboardTable(){ | |
| const tables = document.querySelectorAll('.leaderboard-table table, .leaderboard-table-model-details table'); | |
| tables.forEach(table => { | |
| const rect = table.getBoundingClientRect(); | |
| table.style.setProperty('--table-rect-left', `${rect.left}px`); | |
| const rows = table.querySelectorAll('tr'); | |
| if (table.scrollLeft > 10) { | |
| rows.forEach(row => { | |
| // Find the first cell in the row that contains a link | |
| const firstCellLink = row.querySelector('td a'); | |
| if (firstCellLink) { | |
| // Get the value of the title attribute from the first link | |
| const titleText = firstCellLink.getAttribute('title'); | |
| // Set the model_title attribute for the row | |
| row.setAttribute('model_title', titleText); | |
| // Set bottom position | |
| const rect = row.getBoundingClientRect(); | |
| row.style.setProperty('--row-rect-bottom', `${rect.bottom}px`); | |
| } | |
| }); | |
| } else { | |
| rows.forEach(row => { | |
| // Find the first cell in the row that contains a link | |
| const firstCellLink = row.querySelector('td a'); | |
| if (firstCellLink) { | |
| // Get the value of the title attribute from the first link | |
| const titleText = firstCellLink.getAttribute('title'); | |
| // Remove the model_title attribute for the row | |
| row.removeAttribute('model_title'); | |
| } | |
| }); | |
| } | |
| }); | |
| } | |
| const intervalId = setInterval(addTitleForEachRowOfLeaderboardTable, 1000); | |
| </script> | |
| """ | |
| CHECKBOX_USE_CORRECTED_P_VALUE_INFO = "Switch to False Discovery Rate (FDR) guarantees" | |
| CHECKBOX_USE_CORRECTED_P_VALUE_LABEL = "FDR guarantees" | |
| def gradio_app(): | |
| with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main: | |
| check_significance_is_reachable_timer = gr.Timer( | |
| value=2 * 60, # seconds | |
| ) | |
| check_significance_is_reachable_timer.tick( | |
| fn=check_significance_is_reachable_hook, | |
| outputs=check_significance_is_reachable_timer | |
| ) | |
| gr.Markdown(HEADER_MARKDOWN) | |
| if leaderboard_server.tournament_results_corrupted: | |
| gr.Markdown("## Results dataset integrity solving") | |
| results_dataset_integrity_check_btn = gr.Button( | |
| value='Check integrity of results dataset', | |
| interactive=True, | |
| visible=False, | |
| ) | |
| results_dataset_integrity_solving_progress = gr.Markdown( | |
| value=None, | |
| visible=False, | |
| ) | |
| results_dataset_integrity_restart_btn = gr.Button( | |
| value='Restart this space', | |
| interactive=False, | |
| visible=False, | |
| ) | |
| results_dataset_integrity_solving_progress_timer = gr.Timer( | |
| value=60, # seconds | |
| ) | |
| results_dataset_integrity_solving_progress_timer.tick( | |
| fn=update_results_dataset_integrity_solving_progress, | |
| outputs=[ | |
| results_dataset_integrity_solving_progress, | |
| results_dataset_integrity_restart_btn, | |
| ] | |
| ) | |
| results_dataset_integrity_check_btn.click( | |
| fn=lambda: ( | |
| gr.update(interactive=False, visible=False), | |
| gr.update(visible=True), | |
| ), | |
| outputs=[ | |
| results_dataset_integrity_check_btn, | |
| results_dataset_integrity_solving_progress, | |
| ] | |
| ).then( | |
| fn=results_dataset_integrity_check | |
| ) | |
| results_dataset_integrity_restart_btn.click( | |
| fn=lambda: gr.update(interactive=False), | |
| outputs=results_dataset_integrity_restart_btn | |
| ).then( | |
| fn=restart_space | |
| ) | |
| main.load( | |
| fn=on_tournament_results_corrupted, | |
| outputs=[ | |
| results_dataset_integrity_check_btn, | |
| results_dataset_integrity_solving_progress, | |
| ] | |
| ).then( | |
| fn=update_results_dataset_integrity_solving_progress, | |
| outputs=[ | |
| results_dataset_integrity_solving_progress, | |
| results_dataset_integrity_restart_btn, | |
| ] | |
| ) | |
| return main | |
| with gr.Tabs(): | |
| with leaderboard_server.var_lock.ro: | |
| submission_ids_and_titles = get_submission_ids_and_titles() | |
| with gr.TabItem('Leaderboard'): | |
| with gr.Column(): | |
| gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN) | |
| with gr.Row(): | |
| leaderboard_category_of_tasks = gr.Dropdown( | |
| choices=( | |
| [ | |
| leaderboard_server.TASKS_CATEGORY_OVERALL, | |
| leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS, | |
| ] + sorted(leaderboard_server.TASKS_CATEGORIES) | |
| ), | |
| value=leaderboard_server.TASKS_CATEGORY_OVERALL, | |
| label="Category of benchmarks", | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| leaderboard_use_corrected_p_value = gr.Checkbox( | |
| info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO, | |
| label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| leaderboard_table = gr.DataFrame( | |
| leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| interactive=False, | |
| label=None, | |
| visible=True, | |
| datatype="markdown", | |
| elem_classes="leaderboard-table", | |
| ) | |
| with gr.Row(): | |
| leaderboard_table_csv = gr.DownloadButton( | |
| label="Download leaderboard in CSV format", | |
| value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| ) | |
| gr.Markdown(LEADERBOARD_TAB_BELLOW_TABLE_MARKDOWN) | |
| with gr.Row(): | |
| leaderboard_table_legend = gr.DataFrame( | |
| value=None, | |
| headers=[ | |
| "Abbr.", # "task abbreviation" | |
| "Name", | |
| "URL", | |
| ], | |
| column_widths=["150px"], | |
| datatype="markdown", | |
| label="Descriptions of the tasks", | |
| visible=False, | |
| interactive=False, | |
| elem_classes="leaderboard-table-legend", | |
| ) | |
| with gr.Row(): | |
| tournament_results_title = gr.Markdown( | |
| value="### Scatter plot", | |
| visible=True, | |
| ) | |
| with gr.Row(): | |
| leaderboard_scatter_plot = gr.HTML( | |
| value=leaderboard_server.get_leaderboard_scatter_plot(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| label='Scatter plot', | |
| ) | |
| gr.Markdown(LEADERBOARD_TAB_BELLOW_SCATTER_PLOT_MARKDOWN) | |
| with gr.Row(): | |
| tournament_results_title = gr.Markdown( | |
| value="### Heatmap", | |
| visible=True, | |
| ) | |
| with gr.Row(): | |
| leaderboard_heatmap = gr.HTML( | |
| value=leaderboard_server.get_leaderboard_heatmap(category=leaderboard_server.TASKS_CATEGORY_OVERALL), | |
| label='Heatmap', | |
| elem_classes="leaderboard-heatmap", | |
| ) | |
| gr.Markdown(LEADERBOARD_TAB_BELLOW_HEATMAP_MARKDOWN) | |
| with gr.Row(): | |
| tournament_results_title = gr.Markdown( | |
| value="## Tournament results for selected model", | |
| visible=False, | |
| ) | |
| with gr.Row(): | |
| tournament_results_dropdown = gr.Dropdown( | |
| value=None, | |
| choices=submission_ids_and_titles, | |
| label="Select model", | |
| visible=False, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| model_tournament_results_table = gr.DataFrame( | |
| value=None, | |
| datatype="markdown", | |
| label="The model won against…", | |
| visible=False, | |
| interactive=False, | |
| elem_classes="leaderboard-table-model-details", | |
| ) | |
| with gr.Row(): | |
| model_tournament_results_table_csv = gr.DownloadButton( | |
| label="Download model tournament results in CSV format", | |
| visible=False, | |
| ) | |
| for _leaderboard_form_input in [ | |
| leaderboard_category_of_tasks, | |
| leaderboard_use_corrected_p_value, | |
| ]: | |
| _leaderboard_form_input.change( | |
| fn=change_leaderboard_category, | |
| inputs=[ | |
| leaderboard_category_of_tasks, | |
| leaderboard_use_corrected_p_value, | |
| tournament_results_dropdown, | |
| ], | |
| outputs=[ | |
| leaderboard_table, | |
| leaderboard_table_csv, | |
| leaderboard_table_legend, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| tournament_results_title, | |
| tournament_results_dropdown, | |
| model_tournament_results_table, | |
| model_tournament_results_table_csv, | |
| ], | |
| ) | |
| tournament_results_dropdown.change( | |
| fn=fetch_model_tournament_results_table, | |
| inputs=[ | |
| tournament_results_dropdown, | |
| leaderboard_category_of_tasks, | |
| leaderboard_use_corrected_p_value, | |
| ], | |
| outputs=model_tournament_results_table, | |
| ).then( | |
| fn=fetch_model_tournament_results_table_csv, | |
| inputs=[ | |
| tournament_results_dropdown, | |
| leaderboard_category_of_tasks, | |
| leaderboard_use_corrected_p_value, | |
| ], | |
| outputs=model_tournament_results_table_csv, | |
| ) | |
| with gr.TabItem('Model details'): | |
| gr.Markdown(MORE_DETAILS_MARKDOWN) | |
| with gr.Row(): | |
| model_details_model_dropdown = gr.Dropdown( | |
| choices=submission_ids_and_titles, | |
| label="Select model", | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| model_details_description = gr.Text(value='', label='Model description', visible=False, interactive=False) | |
| model_details_url = gr.Text(value='', label='Model url', visible=False, interactive=False) | |
| with gr.Row(): | |
| model_details_tournament_results_title = gr.Markdown( | |
| value="## Tournament results for selected model", | |
| visible=False, | |
| ) | |
| with gr.Row(): | |
| model_details_category_of_tasks = gr.Dropdown( | |
| choices=( | |
| [ | |
| leaderboard_server.TASKS_CATEGORY_OVERALL, | |
| ] + sorted(leaderboard_server.TASKS_CATEGORIES) | |
| ), | |
| value=None, | |
| label="Category of benchmarks", | |
| visible=False, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| model_details_use_corrected_p_value = gr.Checkbox( | |
| info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO, | |
| label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL, | |
| visible=False, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| model_details_model_tournament_results_table = gr.DataFrame( | |
| value=None, | |
| datatype="markdown", | |
| label="The model won against…", | |
| visible=False, | |
| interactive=False, | |
| elem_classes="leaderboard-table-model-details", | |
| ) | |
| with gr.Row(): | |
| model_details_model_tournament_results_table_csv = gr.DownloadButton( | |
| label="Download model tournament results in CSV format", | |
| visible=False, | |
| ) | |
| model_details_model_dropdown.change( | |
| fn=fetch_model_detail, | |
| inputs=[model_details_model_dropdown], | |
| outputs=[model_details_description, model_details_url], | |
| ).then( | |
| fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False), | |
| inputs=model_details_model_dropdown, | |
| outputs=model_details_tournament_results_title | |
| ).then( | |
| fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False), | |
| inputs=model_details_model_dropdown, | |
| outputs=model_details_category_of_tasks | |
| ).then( | |
| fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False), | |
| inputs=model_details_model_dropdown, | |
| outputs=model_details_use_corrected_p_value | |
| ).then( | |
| fn=fetch_model_tournament_results_table, | |
| inputs=[ | |
| model_details_model_dropdown, | |
| model_details_category_of_tasks, | |
| model_details_use_corrected_p_value, | |
| ], | |
| outputs=model_details_model_tournament_results_table | |
| ).then( | |
| fn=fetch_model_tournament_results_table_csv, | |
| inputs=[ | |
| model_details_model_dropdown, | |
| model_details_category_of_tasks, | |
| model_details_use_corrected_p_value, | |
| ], | |
| outputs=model_details_model_tournament_results_table_csv | |
| ) | |
| for _model_details_form_input in [ | |
| model_details_category_of_tasks, | |
| model_details_use_corrected_p_value, | |
| ]: | |
| _model_details_form_input.change( | |
| fn=fetch_model_tournament_results_table, | |
| inputs=[ | |
| model_details_model_dropdown, | |
| model_details_category_of_tasks, | |
| model_details_use_corrected_p_value, | |
| ], | |
| outputs=model_details_model_tournament_results_table, | |
| ).then( | |
| fn=fetch_model_tournament_results_table_csv, | |
| inputs=[ | |
| model_details_model_dropdown, | |
| model_details_category_of_tasks, | |
| model_details_use_corrected_p_value, | |
| ], | |
| outputs=model_details_model_tournament_results_table_csv, | |
| ) | |
| with gr.TabItem('Submission'): | |
| with gr.Column(): | |
| gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN) | |
| submission_inputs = dict.fromkeys(SUBMISSION_INPUTS) | |
| with gr.Row(): | |
| submission_inputs["team_name"] = gr.Textbox(label='Team name', type='text') | |
| submission_inputs["model_name"] = gr.Textbox(label='Model name', type='text') | |
| submission_inputs["model_type"] = gr.Dropdown( | |
| label="Model type", | |
| choices=("chat", "pretrained", "ensemble"), | |
| ) | |
| submission_inputs["parameters"] = gr.Number( | |
| label='Parameters (B)', | |
| value=0.01, | |
| step=0.01, | |
| ) | |
| with gr.Row(): | |
| submission_inputs["input_length"] = gr.Number( | |
| label='Input length (# tokens)', | |
| value=0, | |
| step=1, | |
| ) | |
| submission_inputs["precision"] = gr.Dropdown( | |
| label="Precision", | |
| choices=("float32", "bfloat32", "float16", "bfloat16", "8bit", "4bit"), | |
| ) | |
| submission_inputs["description"] = gr.Textbox(label='Description', type='text') | |
| submission_inputs["link_to_model"] = gr.Textbox(label='Link to model', type='text') | |
| submission_inputs["submission_file"] = gr.File(label='Upload your results', type='filepath') | |
| pre_submission_btn = gr.Button(value='Pre-submit model', interactive=True) | |
| submit_prompt = gr.Markdown( | |
| SUBMISSION_DETAILS_MARKDOWN, | |
| visible=False | |
| ) | |
| pre_submit_info = gr.Markdown( | |
| RANKING_AFTER_SUBMISSION_MARKDOWN, | |
| visible=False | |
| ) | |
| pre_submit_leaderboard_table = gr.DataFrame( | |
| value=None, | |
| datatype="markdown", | |
| label="The leaderboard for this pre-submission", | |
| visible=False, | |
| interactive=False, | |
| elem_classes="leaderboard-table", | |
| ) | |
| pre_submit_leaderboard_table_csv = gr.DownloadButton( | |
| label="Download the leaderboard in CSV format", | |
| visible=False, | |
| ) | |
| pre_submit_model_tournament_results_table = gr.DataFrame( | |
| value=None, | |
| datatype="markdown", | |
| label="The pre-submited model won against…", | |
| visible=False, | |
| interactive=False, | |
| elem_classes="leaderboard-table-model-details", | |
| ) | |
| pre_submit_model_tournament_results_table_csv = gr.DownloadButton( | |
| label="Download the model tournament results in CSV format", | |
| visible=False, | |
| ) | |
| # delimiter | |
| with gr.Row(): | |
| pass | |
| submission_btn_yes = gr.Button(value='Submit model', interactive=False, visible=False) | |
| submission_btn_no = gr.Button(value='Reverse process', interactive=False, visible=False) | |
| with Modal(visible=False, allow_user_close=False) as modal_submit: | |
| gr.Markdown(MODAL_SUBMIT_MARKDOWN) | |
| modal_submit_yes = gr.Button("Yes", interactive=True) | |
| modal_submit_no = gr.Button("No", interactive=True) | |
| pre_submission_btn.click( | |
| fn=on_submit_pressed, | |
| outputs=[pre_submission_btn], | |
| ).then( | |
| fn=lambda *inputs: tuple( | |
| gr.update(interactive=False) | |
| for _ in inputs | |
| ), | |
| inputs=list(submission_inputs.values()), | |
| outputs=list(submission_inputs.values()), | |
| ).then( | |
| fn=process_submission, | |
| inputs=list(submission_inputs.values()), | |
| outputs=[ | |
| pre_submission_btn, | |
| submit_prompt, | |
| submission_btn_yes, | |
| submission_btn_no, | |
| pre_submit_info, | |
| pre_submit_leaderboard_table, | |
| pre_submit_leaderboard_table_csv, | |
| pre_submit_model_tournament_results_table, | |
| pre_submit_model_tournament_results_table_csv, | |
| ], | |
| concurrency_limit=None, | |
| ) | |
| submission_btn_yes.click( | |
| fn=show_modal, | |
| outputs=[modal_submit] | |
| ) | |
| modal_submit_yes.click( | |
| fn=disable_submit_buttons, | |
| outputs=[ | |
| modal_submit_yes, | |
| modal_submit_no, | |
| ] | |
| ).then( | |
| fn=submit_results, | |
| inputs=list(submission_inputs.values()), | |
| outputs=[ | |
| pre_submission_btn, | |
| submission_btn_yes, | |
| submission_btn_no, | |
| submit_prompt, | |
| pre_submit_info, | |
| pre_submit_leaderboard_table, | |
| pre_submit_leaderboard_table_csv, | |
| pre_submit_model_tournament_results_table, | |
| pre_submit_model_tournament_results_table_csv, | |
| leaderboard_table, | |
| leaderboard_table_csv, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| modal_submit, | |
| model_details_model_dropdown, | |
| leaderboard_category_of_tasks, | |
| tournament_results_dropdown, | |
| ], | |
| ).then( | |
| fn=enable_submit_buttons, | |
| outputs=[ | |
| modal_submit_yes, | |
| modal_submit_no, | |
| ] | |
| ).then( | |
| fn=lambda *inputs: tuple( | |
| gr.update(value=None, interactive=True) | |
| for _ in inputs | |
| ), | |
| inputs=list(submission_inputs.values()), | |
| outputs=list(submission_inputs.values()), | |
| ) | |
| modal_submit_no.click( | |
| fn=disable_submit_buttons, | |
| outputs=[ | |
| modal_submit_yes, | |
| modal_submit_no, | |
| ] | |
| ).then( | |
| fn=hide_modal, | |
| outputs=[modal_submit] | |
| ).then( | |
| fn=enable_submit_buttons, | |
| outputs=[ | |
| modal_submit_yes, | |
| modal_submit_no, | |
| ] | |
| ) | |
| submission_btn_no.click( | |
| fn=erase_pre_submit, | |
| inputs=list(submission_inputs.values()), | |
| outputs=[ | |
| pre_submission_btn, | |
| submission_btn_yes, | |
| submission_btn_no, | |
| submit_prompt, | |
| pre_submit_info, | |
| pre_submit_leaderboard_table, | |
| pre_submit_leaderboard_table_csv, | |
| pre_submit_model_tournament_results_table, | |
| pre_submit_model_tournament_results_table_csv, | |
| ], | |
| ).then( | |
| fn=lambda *inputs: tuple( | |
| gr.update(interactive=True) | |
| for _ in inputs | |
| ), | |
| inputs=list(submission_inputs.values()), | |
| outputs=list(submission_inputs.values()), | |
| ) | |
| with gr.TabItem('About'): | |
| gr.Markdown(ABOUT_MARKDOWN) | |
| main.load( | |
| on_application_load, | |
| inputs=None, | |
| outputs=[ | |
| leaderboard_table, | |
| leaderboard_table_csv, | |
| leaderboard_scatter_plot, | |
| leaderboard_heatmap, | |
| model_details_model_dropdown, | |
| leaderboard_category_of_tasks, | |
| tournament_results_dropdown, | |
| ] | |
| ) | |
| return main | |
| app = gradio_app() | |
| app.launch() | |