Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from huggingface_hub import login | |
| from modules.ui_components import CSS, create_header, create_results_section, create_action_buttons | |
| from configs.dataset_config import (create_dataset_section, create_mmlu_config_section, | |
| load_dataset_preview, update_interface_based_on_dataset, | |
| toggle_preview, update_subject_selection_ui, | |
| update_questions_interface, get_subject_mode_param, | |
| get_subject_names) | |
| from configs.models_config import (create_model_config_section, update_eval_button_state, | |
| get_model_configs) | |
| from run_evaluation import run_mmlu_evaluation | |
| from utils.state_management import (start_evaluation, finish_evaluation, | |
| cancel_evaluation, handle_evaluation_results) | |
| # Read token and login | |
| hf_token = os.getenv("HF_READ_WRITE_TOKEN") | |
| if hf_token: | |
| login(hf_token) | |
| else: | |
| print("⚠️ No HF_READ_WRITE_TOKEN found in environment") | |
| # --------------------------------------------------------------------------- | |
| # Gradio Interface | |
| # --------------------------------------------------------------------------- | |
| with gr.Blocks(css=CSS) as demo: | |
| # Create header | |
| header_components = create_header() | |
| # Create dataset section (Section A) | |
| dataset_components = create_dataset_section() | |
| # Create MMLU config section (Section B) | |
| mmlu_components = create_mmlu_config_section() | |
| # Create model config section (Section C) | |
| model_components = create_model_config_section() | |
| # Create results section | |
| results_components = create_results_section() | |
| # Create action buttons | |
| action_components = create_action_buttons() | |
| # State tracking | |
| evaluation_state = gr.State({"running": False}) | |
| preview_visibility = gr.State(False) | |
| # Connect dataset dropdown to show/hide appropriate configuration and load preview data | |
| dataset_components['dropdown'].change( | |
| fn=load_dataset_preview, | |
| inputs=[dataset_components['dropdown']], | |
| outputs=[ | |
| dataset_components['preview_data_state'], | |
| mmlu_components['specific_subjects'], | |
| mmlu_components['num_subjects_slider'] | |
| ], | |
| ).then( | |
| fn=update_interface_based_on_dataset, | |
| inputs=[ | |
| dataset_components['dropdown'], | |
| preview_visibility | |
| ], | |
| outputs=[ | |
| mmlu_components['container'], | |
| model_components['container'], | |
| results_components['container'], | |
| dataset_components['preview_toggle'], | |
| dataset_components['preview_container'], | |
| preview_visibility, | |
| dataset_components['preview_toggle'] | |
| ] | |
| ) | |
| # Connect preview toggle to show/hide dataset information | |
| dataset_components['preview_toggle'].click( | |
| fn=toggle_preview, | |
| inputs=[ | |
| dataset_components['dropdown'], | |
| preview_visibility, | |
| dataset_components['preview_data_state'] | |
| ], | |
| outputs=[ | |
| preview_visibility, | |
| dataset_components['preview_container'], | |
| dataset_components['preview_output'], | |
| dataset_components['preview_toggle'] | |
| ] | |
| ) | |
| # Connect subject selection mode to UI updates | |
| mmlu_components['subject_selection_mode'].change( | |
| fn=update_subject_selection_ui, | |
| inputs=[ | |
| mmlu_components['subject_selection_mode'] | |
| ], | |
| outputs=[ | |
| mmlu_components['num_subjects_container'], | |
| mmlu_components['specific_subjects_container'] | |
| ] | |
| ) | |
| # Update interface based on all_questions checkbox | |
| mmlu_components['all_questions_checkbox'].change( | |
| fn=update_questions_interface, | |
| inputs=[ | |
| mmlu_components['all_questions_checkbox'] | |
| ], | |
| outputs=[ | |
| mmlu_components['num_questions_slider'], | |
| mmlu_components['questions_info_text'] | |
| ] | |
| ) | |
| # Connect model config changes to validation | |
| for component in [ | |
| model_components['model1_dropdown'], | |
| model_components['model1_shots'], | |
| model_components['model1_regex'], | |
| model_components['model1_flash_attn'], | |
| model_components['model2_dropdown'], | |
| model_components['model2_shots'], | |
| model_components['model2_regex'], | |
| model_components['model2_flash_attn'] | |
| ]: | |
| component.change( | |
| fn=update_eval_button_state, | |
| inputs=[ | |
| model_components['model1_dropdown'], | |
| model_components['model1_shots'], | |
| model_components['model1_regex'], | |
| model_components['model1_flash_attn'], | |
| model_components['model2_dropdown'], | |
| model_components['model2_shots'], | |
| model_components['model2_regex'], | |
| model_components['model2_flash_attn'] | |
| ], | |
| outputs=[ | |
| model_components['error_message'], | |
| action_components['eval_button'] | |
| ] | |
| ) | |
| # Connect evaluation button with state tracking | |
| action_components['eval_button'].click( | |
| fn=start_evaluation, | |
| inputs=[evaluation_state], | |
| outputs=[ | |
| evaluation_state, | |
| mmlu_components['subject_selection_mode'], | |
| mmlu_components['num_subjects_slider'], | |
| mmlu_components['specific_subjects'], | |
| mmlu_components['all_questions_checkbox'], | |
| mmlu_components['num_questions_slider'], | |
| model_components['model1_dropdown'], | |
| model_components['model2_dropdown'], | |
| action_components['eval_button'], | |
| action_components['cancel_button'], | |
| results_components['output'], | |
| results_components['table'], | |
| results_components['table_container'] | |
| ] | |
| ).then( | |
| fn=lambda mode, num, subjects, all_q, num_q, m1, m1_shots, m1_regex, m1_flash, m2, m2_shots, m2_regex, m2_flash: | |
| run_mmlu_evaluation( | |
| get_subject_mode_param(mode), | |
| num, | |
| get_subject_names(subjects), | |
| all_q, | |
| num_q, | |
| get_model_configs( | |
| m1, m1_shots, m1_regex, m1_flash, | |
| m2, m2_shots, m2_regex, m2_flash | |
| ) | |
| ), | |
| inputs=[ | |
| mmlu_components['subject_selection_mode'], | |
| mmlu_components['num_subjects_slider'], | |
| mmlu_components['specific_subjects'], | |
| mmlu_components['all_questions_checkbox'], | |
| mmlu_components['num_questions_slider'], | |
| model_components['model1_dropdown'], | |
| model_components['model1_shots'], | |
| model_components['model1_regex'], | |
| model_components['model1_flash_attn'], | |
| model_components['model2_dropdown'], | |
| model_components['model2_shots'], | |
| model_components['model2_regex'], | |
| model_components['model2_flash_attn'] | |
| ], | |
| outputs=[results_components['tmp_data']] | |
| ).then( | |
| fn=handle_evaluation_results, | |
| inputs=[results_components['tmp_data']], # Change: Pass the dictionary to handle_evaluation_results | |
| outputs=[ | |
| results_components['output'], | |
| results_components['table'], | |
| action_components['eval_button'], | |
| action_components['cancel_button'], | |
| mmlu_components['subject_selection_mode'], | |
| mmlu_components['num_subjects_slider'], | |
| mmlu_components['all_questions_checkbox'], | |
| mmlu_components['num_questions_slider'], | |
| model_components['model1_dropdown'], | |
| results_components['table_container'] | |
| ] | |
| ).then( | |
| fn=finish_evaluation, | |
| inputs=[evaluation_state], | |
| outputs=[evaluation_state] | |
| ) | |
| # Connect cancel button | |
| action_components['cancel_button'].click( | |
| fn=cancel_evaluation, | |
| inputs=[evaluation_state], | |
| outputs=[ | |
| evaluation_state, | |
| mmlu_components['subject_selection_mode'], | |
| mmlu_components['num_subjects_slider'], | |
| mmlu_components['specific_subjects'], | |
| mmlu_components['all_questions_checkbox'], | |
| mmlu_components['num_questions_slider'], | |
| model_components['model1_dropdown'], | |
| model_components['model2_dropdown'], | |
| action_components['eval_button'], | |
| action_components['cancel_button'], | |
| results_components['output'], | |
| results_components['table'], | |
| results_components['table_container'] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |