Spaces:
Runtime error
Runtime error
| __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import pandas as pd | |
| import numpy as np | |
| from collections import defaultdict | |
| from constants import * | |
| import os | |
| from huggingface_hub import Repository | |
| import json | |
| global data_component, filter_component | |
| TOKEN = os.environ.get("TOKEN") | |
| repo = Repository(local_dir="./download_from_dataset", clone_from="JMMMU/leaderboard_result", repo_type="dataset", use_auth_token=TOKEN) | |
| current_directory = os.getcwd() | |
| def validate_model_size(s): | |
| pattern = r'^\d+B$|^-$' | |
| if re.match(pattern, s): | |
| return s | |
| else: | |
| return '-' | |
| def upload_file(files): | |
| file_paths = [file.name for file in files] | |
| return file_paths | |
| def get_acc(data, subject_list): | |
| acc = 0 | |
| for subject in subject_list: | |
| acc += data["results"][subject]['jmmmu_acc,none'] | |
| acc = acc/len(subject_list) | |
| acc = acc * 100 | |
| acc = round(acc, 1) | |
| return acc | |
| def calculate_score(input_file): | |
| json_string = input_file.decode('utf-8') | |
| data = json.loads(json_string) | |
| result_dict = {} | |
| overall = data["results"]["jmmmu"]['jmmmu_acc,none']*100 | |
| ca = data["results"]["culture_agnostic"]['jmmmu_acc,none']*100 | |
| cs = data["results"]["culture_specific"]['jmmmu_acc,none']*100 | |
| overall = round(overall, 1) | |
| ca = round(ca, 1) | |
| cs = round(cs, 1) | |
| # Art_Psychology | |
| art_psychology_subject_list = ["jmmmu_design", "jmmmu_music", "jmmmu_psychology"] | |
| # Science | |
| science_subject_list = ["jmmmu_biology", "jmmmu_chemistry", "jmmmu_physics", "jmmmu_math"] | |
| # Business | |
| business_subject_list = ["jmmmu_accounting", "jmmmu_economics", "jmmmu_finance", "jmmmu_manage", "jmmmu_marketing"] | |
| # Medicine | |
| medicine_subject_list = ["jmmmu_basic_medical_science", "jmmmu_clinical_medicine", "jmmmu_diagnostics_and_laboratory_medicine", "jmmmu_pharmacy", "jmmmu_public_health"] | |
| # Tech_Eng. | |
| tech_eng_subject_list = ["jmmmu_agriculture", "jmmmu_architecture_and_engineering", "jmmmu_computer_science", "jmmmu_electronics", "jmmmu_energy_and_power", "jmmmu_materials", "jmmmu_mechanical_engineering"] | |
| jmmmu_japanese_art_subject_list = ["jmmmu_japanese_art"] | |
| jmmmu_japanese_heritage_subject_list = ["jmmmu_japanese_heritage"] | |
| jmmmu_japanese_history_subject_list = ["jmmmu_japanese_history"] | |
| jmmmu_world_history_subject_list = ["jmmmu_world_history"] | |
| art_psychology = get_acc(data, art_psychology_subject_list) | |
| science = get_acc(data, science_subject_list) | |
| business = get_acc(data, business_subject_list) | |
| medicine = get_acc(data, medicine_subject_list) | |
| tech_eng = get_acc(data, tech_eng_subject_list) | |
| japanese_art = get_acc(data, jmmmu_japanese_art_subject_list) | |
| japanese_heritage = get_acc(data, jmmmu_japanese_heritage_subject_list) | |
| japanese_history = get_acc(data, jmmmu_japanese_history_subject_list) | |
| world_history = get_acc(data, jmmmu_world_history_subject_list) | |
| result_dict =\ | |
| { | |
| "overall": overall, | |
| "cultureSpecific": cs, | |
| "cultureAgnostic": ca, | |
| "japaneseArt": japanese_art, | |
| "japaneseHeritage": japanese_heritage, | |
| "japaneseHistory": japanese_history, | |
| "worldHistory": world_history, | |
| "artPsychology": art_psychology, | |
| "business": business, | |
| "science": science, | |
| "healthMedicine": medicine, | |
| "techEngineering": tech_eng | |
| } | |
| return result_dict | |
| def add_new_eval( | |
| input_file, | |
| model_type: str, | |
| model_name_textbox: str, | |
| revision_name_textbox: str, | |
| model_link: str, | |
| model_size: str, | |
| # upd_type: str, | |
| # question_type: str | |
| ): | |
| if input_file is None: | |
| warning_text = "Error! Empty file!" | |
| print(warning_text) | |
| return warning_text | |
| else: | |
| model_size = validate_model_size(model_size) | |
| # if upd_type == 'AAD': | |
| csv_path = CSV_RESULT_PATH | |
| # validity_check(input_file) | |
| csv_data = pd.read_csv(csv_path) | |
| result_dict = calculate_score(input_file) | |
| if revision_name_textbox == '': | |
| col = csv_data.shape[0] | |
| model_name = model_name_textbox | |
| else: | |
| model_name = revision_name_textbox | |
| model_name_list = csv_data['Model'] | |
| name_list = [name.split(']')[0][1:] for name in model_name_list] | |
| if revision_name_textbox not in name_list: | |
| col = csv_data.shape[0] | |
| else: | |
| col = name_list.index(revision_name_textbox) | |
| model_name_wo_link = model_name | |
| if model_link == '': | |
| model_name = model_name # no url | |
| else: | |
| model_name = '[' + model_name + '](' + model_link + ')' | |
| # add new data | |
| new_data = [ | |
| model_type, | |
| model_name, | |
| model_size, | |
| result_dict["overall"], | |
| result_dict["cultureSpecific"], | |
| result_dict["cultureAgnostic"], | |
| result_dict["japaneseArt"], | |
| result_dict["japaneseHeritage"], | |
| result_dict["japaneseHistory"], | |
| result_dict["worldHistory"], | |
| result_dict["artPsychology"], | |
| result_dict["business"], | |
| result_dict["science"], | |
| result_dict["healthMedicine"], | |
| result_dict["techEngineering"] | |
| ] | |
| # If the same data already exists, return an error. | |
| if new_data in csv_data.values.tolist(): | |
| warning_text = "Error! The same data already exists!" | |
| print(warning_text) | |
| return warning_text | |
| # If the same model name already exists, return an error. | |
| elif new_data[:5] in csv_data.values.tolist(): | |
| warning_text = "Error! The same data already exists! Please fill revision_name." | |
| print(warning_text) | |
| return warning_text | |
| csv_data.loc[col] = new_data | |
| csv_data = csv_data.to_csv(csv_path, index=False) | |
| absolute_result_path = os.path.abspath(csv_path) | |
| if not os.path.exists(absolute_result_path): | |
| raise FileNotFoundError(f"File {absolute_result_path} not found") | |
| repo.git_pull() | |
| repo.git_add(absolute_result_path) | |
| save_path = os.path.join(CSV_QUEUE_DIR, f"{model_name_wo_link}.json") | |
| with open(save_path, "wb") as f: | |
| f.write(input_file) | |
| absolute_queue_path = os.path.abspath(save_path) | |
| repo.git_add(absolute_queue_path) | |
| repo.git_commit(f"add {model_name_wo_link} results") | |
| repo.git_push() | |
| print(f"Success! Your {model_name_wo_link} has been added!") | |
| return 0 | |
| def get_baseline_df(): | |
| repo.git_pull() | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| df = df.sort_values(by="Overall", ascending=False) | |
| present_columns = MODEL_INFO + checkbox_group.value | |
| df = df[present_columns] | |
| return df | |
| def get_all_df(): | |
| repo.git_pull() | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| df = df.sort_values(by="Overall", ascending=False) | |
| return df | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown( | |
| LEADERBORAD_INTRODUCTION | |
| ) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| # table jmmmu bench | |
| with gr.TabItem("🏅 JMMMU Benchmark", elem_id="jmmmu-benchmark-tab-table", id=1): | |
| # selection for column part: | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=TASK_INFO, | |
| value=AVG_INFO, | |
| label="Evaluation Dimension", | |
| interactive=True, | |
| ) # user can select the evaluation dimension | |
| with gr.Row(): | |
| # selection for model size part: | |
| model_size = gr.CheckboxGroup( | |
| choices=MODEL_SIZE, | |
| value=MODEL_SIZE, | |
| label="Model Size", | |
| interactive=True, | |
| ) | |
| baseline_value = get_baseline_df() | |
| baseline_header = MODEL_INFO + checkbox_group.value | |
| baseline_datatype = ['markdown'] * 2 + ['number'] * len(checkbox_group.value) | |
| data_component = gr.components.Dataframe( | |
| value=baseline_value, | |
| headers=baseline_header, | |
| type="pandas", | |
| datatype=baseline_datatype, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| def on_filter_model_size_method_change(selected_model_size, selected_columns): | |
| updated_data = get_all_df() | |
| # model_size | |
| def custom_filter(row, model_size_filters): | |
| model_size = row['Model Size'] | |
| model_size = model_size.upper() | |
| if model_size == '-': | |
| size_filter = '-' in model_size_filters | |
| elif 'B' in model_size: | |
| size = float(model_size.replace('B', '')) | |
| size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10) | |
| else: | |
| size_filter = False | |
| return size_filter | |
| mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size) | |
| updated_data = updated_data[mask] | |
| # columns: | |
| selected_columns = [item for item in TASK_INFO if item in selected_columns] | |
| present_columns = MODEL_INFO + selected_columns | |
| updated_data = updated_data[present_columns] | |
| updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False) | |
| updated_headers = present_columns | |
| update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers] | |
| filter_component = gr.components.Dataframe( | |
| value=updated_data, | |
| headers=updated_headers, | |
| type="pandas", | |
| datatype=update_datatype, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| return filter_component | |
| model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component) | |
| checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component) | |
| # table 5 | |
| with gr.TabItem("🚀 Submit here! ", elem_id="jmmmu-benchmark-tab-table", id=5): | |
| with gr.Row(): | |
| gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_type = gr.Dropdown( | |
| choices=["LMM", "LLM"], | |
| label="Model type", | |
| multiselect=False, | |
| value="LMM", | |
| interactive=True, | |
| ) | |
| model_name_textbox = gr.Textbox( | |
| label="Model name", placeholder="LLaMA-7B" | |
| ) | |
| revision_name_textbox = gr.Textbox( | |
| label="Revision Model Name", placeholder="LLaMA-7B" | |
| ) | |
| model_link = gr.Textbox( | |
| label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf" | |
| ) | |
| model_size = gr.Textbox( | |
| label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')" | |
| ) | |
| with gr.Column(): | |
| input_file = gr.components.File(label="Click to Upload a JSON File", file_count="single", type='binary') | |
| submit_button = gr.Button("Submit Eval") | |
| submission_result = gr.Markdown() | |
| submit_button.click( | |
| add_new_eval, | |
| inputs = [ | |
| input_file, | |
| model_type, | |
| model_name_textbox, | |
| revision_name_textbox, | |
| model_link, | |
| model_size | |
| ], | |
| ) | |
| def refresh_data(): | |
| value = get_baseline_df() | |
| return value | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| data_run.click( | |
| refresh_data, outputs=[data_component] | |
| ) | |
| with gr.Accordion("Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| block.launch() | |