H2H-eval-comparator

Sleeping

App Files Files Community

rohansampath commited on Feb 26

Commit

d093a4d

verified ·

1 Parent(s): 24af1c0

Create configs/dataset_config.py

Browse files

Files changed (1) hide show

configs/dataset_config.py +310 -0

configs/dataset_config.py ADDED Viewed

	@@ -0,0 +1,310 @@

+import gradio as gr
+import pandas as pd
+from dataset_previews import mmlupro_dataset_preview, format_preview_for_display
+def create_dataset_section():
+    """
+    Creates the dataset selection section (Section A) of the UI.
+    Returns:
+        dict: Dictionary containing UI components and containers.
+    """
+    # Dataset Selection Section
+    header = gr.Markdown("## (A) Select Dataset for Evaluation")
+    with gr.Row():
+        dataset_dropdown = gr.Dropdown(
+            choices=["(Select Dataset)", "MMLU-Pro"],
+            value="(Select Dataset)",
+            label="Dataset",
+            info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
+        )
+        preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary")
+    # Dataset Preview Container - Initially hidden
+    with gr.Column(visible=False) as dataset_preview_container:
+        gr.Markdown("## Dataset Preview", elem_id="preview_header")
+        preview_output = gr.DataFrame(
+            interactive=False,
+            wrap=True,
+            elem_id="preview_table"
+        )
+        # Add a divider
+        gr.Markdown("<div class='section-divider'></div>")
+    # Preview data state to store the loaded preview data
+    preview_data_state = gr.State(None)
+    # Return components dictionary
+    return {
+        'header': header,
+        'dropdown': dataset_dropdown,
+        'preview_toggle': preview_toggle,
+        'preview_container': dataset_preview_container,
+        'preview_output': preview_output,
+        'preview_data_state': preview_data_state
+    }
+def create_mmlu_config_section():
+    """
+    Creates the dataset configuration section (Section B) of the UI.
+    Returns:
+        dict: Dictionary containing UI components and containers.
+    """
+    with gr.Column(visible=False) as mmlu_config_container:
+        gr.Markdown("## (B) Select Dataset Configuration Options")
+        with gr.Row():
+            # Left column for subject selection
+            with gr.Column(scale=1):
+                with gr.Group(elem_classes=["config-box"]):
+                    gr.Markdown("### Choose Subjects")
+                    subject_selection_mode = gr.Radio(
+                        choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"],
+                        value="Evaluate All Subjects",
+                        label="Subject Selection Mode"
+                    )
+                    # Subject number slider - initially hidden
+                    with gr.Column(visible=False) as num_subjects_container:
+                        num_subjects_slider = gr.Slider(
+                            minimum=1,
+                            maximum=14,  # Will be updated dynamically based on preview data
+                            value=1,
+                            step=1,
+                            label="Number of Subjects",
+                            info="Number of subjects to evaluate. They will be loaded in alphabetical order."
+                        )
+                    # Subject checkboxes - initially hidden
+                    with gr.Column(visible=False) as specific_subjects_container:
+                        # Will be populated dynamically from the preview data
+                        specific_subjects = gr.CheckboxGroup(
+                            choices=[],  # Will be populated from preview data
+                            label="Select Specific Subjects",
+                            info="Select which specific subjects to evaluate"
+                        )
+            # Right column for sample configuration
+            with gr.Column(scale=1):
+                with gr.Group(elem_classes=["config-box"]):
+                    gr.Markdown("### Sample Configuration")
+                    all_questions_checkbox = gr.Checkbox(
+                        label="Evaluate All Questions",
+                        value=False,
+                        info="When checked, evaluates all available questions for each subject"
+                    )
+                    questions_info_text = gr.Markdown(visible=False, value="**All questions across the selected subjects will be evaluated**")
+                    # Questions per subject slider
+                    num_questions_slider = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=20,
+                        step=1,
+                        label="Questions per Subject",
+                        info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.",
+                        interactive=True
+                    )
+    # Return components dictionary
+    return {
+        'container': mmlu_config_container,
+        'subject_selection_mode': subject_selection_mode,
+        'num_subjects_container': num_subjects_container,
+        'num_subjects_slider': num_subjects_slider,
+        'specific_subjects_container': specific_subjects_container,
+        'specific_subjects': specific_subjects,
+        'all_questions_checkbox': all_questions_checkbox,
+        'questions_info_text': questions_info_text,
+        'num_questions_slider': num_questions_slider
+    }
+# Utility functions for dataset section
+def get_subject_choices_from_preview(preview_data):
+    """
+    Extracts subject choices from preview data.
+    Args:
+        preview_data (dict): Preview data containing subject counts.
+    Returns:
+        tuple: (formatted_subjects, subject_count)
+    """
+    if not preview_data or 'subject_counts' not in preview_data:
+        return [], 0
+    # Get subject counts from preview data
+    subject_counts = preview_data['subject_counts']
+    # Sort subjects alphabetically
+    subjects = sorted(subject_counts.keys())
+    # Format as "Subject (n=count)"
+    formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]
+    return formatted_subjects, len(subjects)
+def load_dataset_preview(dataset):
+    """
+    Loads preview data for the selected dataset.
+    Args:
+        dataset (str): Selected dataset name.
+    Returns:
+        tuple: (preview_data, specific_subjects_update, num_subjects_slider_update)
+    """
+    if dataset == "MMLU-Pro":
+        # Load the preview data
+        preview_data = mmlupro_dataset_preview(regenerate_preview=True)
+        # Extract subject choices and count
+        subject_choices, subject_count = get_subject_choices_from_preview(preview_data)
+        # Update the UI components
+        return (
+            preview_data,  # Store the preview data
+            gr.update(choices=subject_choices),  # Update checkbox choices
+            gr.update(maximum=subject_count, value=1)  # Update slider max
+        )
+    return None, gr.update(), gr.update()
+def update_interface_based_on_dataset(dataset, current_visibility,
+                                     mmlu_config_container, model_config_container,
+                                     results_container, preview_toggle,
+                                     dataset_preview_container):
+    """
+    Updates the interface based on dataset selection.
+    Args:
+        dataset (str): Selected dataset name.
+        current_visibility (bool): Current preview visibility state.
+        mmlu_config_container: MMLU config container component.
+        model_config_container: Model config container component.
+        results_container: Results container component.
+        preview_toggle: Preview toggle button.
+        dataset_preview_container: Dataset preview container.
+    Returns:
+        tuple: Updates for UI components.
+    """
+    if dataset == "MMLU-Pro":
+        return (
+            gr.update(visible=True),  # mmlu_config_container
+            gr.update(visible=True),  # model_config_container
+            gr.update(visible=True),  # results_container
+            gr.update(interactive=True),  # preview_toggle
+            gr.update(visible=False),  # dataset_preview_container - hide it initially
+            False,  # Reset preview_visibility to False
+            gr.update(value="Show Dataset Preview")  # Reset button text
+        )
+    else:
+        return (
+            gr.update(visible=False),  # mmlu_config_container
+            gr.update(visible=False),  # model_config_container
+            gr.update(visible=False),  # results_container
+            gr.update(interactive=False),  # preview_toggle
+            gr.update(visible=False),  # dataset_preview_container - hide when no dataset
+            False,  # Reset preview_visibility to False
+            gr.update(value="Show Dataset Preview")  # Reset button text
+        )
+def toggle_preview(dataset, preview_visibility, preview_data):
+    """
+    Toggles the dataset preview visibility.
+    Args:
+        dataset (str): Selected dataset name.
+        preview_visibility (bool): Current preview visibility state.
+        preview_data (dict): Preview data.
+    Returns:
+        tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update)
+    """
+    # Toggle the visibility state
+    is_visible = not preview_visibility
+    # Update button text based on new state
+    button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
+    # Format and show preview if becoming visible
+    if is_visible and dataset == "MMLU-Pro":
+        formatted_preview = format_preview_for_display(preview_data)
+        return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
+    elif is_visible:
+        # For other datasets (not implemented yet)
+        return is_visible, gr.update(visible=True), None, gr.update(value=button_text)
+    else:
+        # Hiding the preview
+        return is_visible, gr.update(visible=False), None, gr.update(value=button_text)
+def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container):
+    """
+    Updates UI based on subject selection mode.
+    Args:
+        mode (str): Selected subject selection mode.
+        num_subjects_container: Container for number of subjects slider.
+        specific_subjects_container: Container for specific subjects checkboxes.
+    Returns:
+        tuple: (num_subjects_container_update, specific_subjects_container_update)
+    """
+    if mode == "Evaluate All Subjects":
+        return gr.update(visible=False), gr.update(visible=False)
+    elif mode == "Choose Number of Subjects":
+        return gr.update(visible=True), gr.update(visible=False)
+    else:  # "Specify which Subjects to Evaluate"
+        return gr.update(visible=False), gr.update(visible=True)
+def update_questions_interface(checked, num_questions_slider, questions_info_text):
+    """
+    Updates questions interface based on "Evaluate All Questions" checkbox.
+    Args:
+        checked (bool): Whether "Evaluate All Questions" is checked.
+        num_questions_slider: Questions per subject slider component.
+        questions_info_text: Questions info text component.
+    Returns:
+        tuple: (num_questions_slider_update, questions_info_text_update)
+    """
+    if checked:
+        return gr.update(visible=False), gr.update(visible=True)
+    else:
+        return gr.update(visible=True), gr.update(visible=False)
+def get_subject_mode_param(mode):
+    """
+    Converts subject selection mode to parameter string.
+    Args:
+        mode (str): Subject selection mode.
+    Returns:
+        str: Parameter value for evaluation function.
+    """
+    if mode == "Evaluate All Subjects":
+        return "all"
+    elif mode == "Choose Number of Subjects":
+        return "number"
+    else:  # "Specify which Subjects to Evaluate"
+        return "specific"
+def get_subject_names(selected_subjects):
+    """
+    Extracts subject names from checkbox values.
+    Args:
+        selected_subjects (list): Selected subjects with counts.
+    Returns:
+        list: Clean subject names without count information.
+    """
+    # Extract just the subject name without the count
+    return [subject.split(" (")[0] for subject in selected_subjects]