Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -136,9 +136,9 @@ with gr.Blocks(css="""
|
|
| 136 |
h1 {
|
| 137 |
text-align: center;
|
| 138 |
}
|
| 139 |
-
.section-
|
| 140 |
-
|
| 141 |
-
margin
|
| 142 |
}
|
| 143 |
.config-box {
|
| 144 |
border: 1px solid #ddd;
|
|
@@ -158,7 +158,7 @@ with gr.Blocks(css="""
|
|
| 158 |
""")
|
| 159 |
|
| 160 |
# Dataset Selection Section
|
| 161 |
-
gr.Markdown("## (A) Select Dataset for Evaluation"
|
| 162 |
|
| 163 |
with gr.Row():
|
| 164 |
dataset_dropdown = gr.Dropdown(
|
|
@@ -177,20 +177,15 @@ with gr.Blocks(css="""
|
|
| 177 |
wrap=True,
|
| 178 |
elem_id="preview_table"
|
| 179 |
)
|
| 180 |
-
# Add
|
| 181 |
-
gr.Markdown("
|
| 182 |
-
gr.Markdown(" ")
|
| 183 |
|
| 184 |
-
#
|
| 185 |
-
gr.
|
| 186 |
-
gr.Markdown(" ", elem_classes=["section-spacing"])
|
| 187 |
|
| 188 |
# MMLU Config Container - Initially hidden until dataset is selected
|
| 189 |
with gr.Column(visible=False) as mmlu_config_container:
|
| 190 |
-
gr.Markdown("## (B) Select Dataset Configuration Options"
|
| 191 |
-
|
| 192 |
-
# Add more spacing
|
| 193 |
-
gr.Markdown(" ")
|
| 194 |
|
| 195 |
with gr.Row():
|
| 196 |
# Left column for subject selection
|
|
@@ -208,27 +203,18 @@ with gr.Blocks(css="""
|
|
| 208 |
with gr.Column(visible=False) as num_subjects_container:
|
| 209 |
num_subjects_slider = gr.Slider(
|
| 210 |
minimum=1,
|
| 211 |
-
maximum=14,
|
| 212 |
value=14,
|
| 213 |
step=1,
|
| 214 |
label="Number of Subjects",
|
| 215 |
-
info="Number of subjects to evaluate
|
| 216 |
)
|
| 217 |
|
| 218 |
# Subject checkboxes - initially hidden, shown when "Specify which Subjects to Evaluate" is selected
|
| 219 |
with gr.Column(visible=False) as specific_subjects_container:
|
| 220 |
-
#
|
| 221 |
-
# The actual subjects will come from the dataset preview
|
| 222 |
specific_subjects = gr.CheckboxGroup(
|
| 223 |
-
choices=[
|
| 224 |
-
"Biology (n=717)",
|
| 225 |
-
"Chemistry (n=500)",
|
| 226 |
-
"Physics (n=650)",
|
| 227 |
-
"Mathematics (n=800)",
|
| 228 |
-
"Computer Science (n=450)",
|
| 229 |
-
"History (n=300)",
|
| 230 |
-
"Literature (n=250)"
|
| 231 |
-
],
|
| 232 |
label="Select Specific Subjects",
|
| 233 |
info="Select which specific subjects to evaluate"
|
| 234 |
)
|
|
@@ -247,7 +233,7 @@ with gr.Blocks(css="""
|
|
| 247 |
info="Number of examples to use for few-shot learning (0-5)."
|
| 248 |
)
|
| 249 |
|
| 250 |
-
# Add
|
| 251 |
gr.Markdown(" ")
|
| 252 |
|
| 253 |
with gr.Row():
|
|
@@ -296,6 +282,39 @@ with gr.Blocks(css="""
|
|
| 296 |
# Track preview visibility state
|
| 297 |
preview_visibility = gr.State(False)
|
| 298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
# Function to show/hide configuration based on selected dataset
|
| 300 |
def update_interface_based_on_dataset(dataset, current_visibility):
|
| 301 |
if dataset == "MMLU-Pro":
|
|
@@ -317,24 +336,27 @@ with gr.Blocks(css="""
|
|
| 317 |
gr.update(value="Show Dataset Preview") # Reset button text
|
| 318 |
)
|
| 319 |
|
| 320 |
-
# Connect dataset dropdown to show/hide appropriate configuration
|
| 321 |
dataset_dropdown.change(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
fn=update_interface_based_on_dataset,
|
| 323 |
inputs=[dataset_dropdown, preview_visibility],
|
| 324 |
outputs=[mmlu_config_container, results_container, preview_toggle, dataset_preview_container, preview_visibility, preview_toggle]
|
| 325 |
)
|
| 326 |
|
| 327 |
# Function to toggle dataset preview visibility
|
| 328 |
-
def toggle_preview(dataset, preview_visibility):
|
| 329 |
# Toggle the visibility state
|
| 330 |
is_visible = not preview_visibility
|
| 331 |
|
| 332 |
# Update button text based on new state
|
| 333 |
button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
|
| 334 |
|
| 335 |
-
#
|
| 336 |
if is_visible and dataset == "MMLU-Pro":
|
| 337 |
-
preview_data = mmlupro_dataset_preview(regenerate_preview=False) # Change regenerate_preview=True if you want to regenerate the preview.
|
| 338 |
formatted_preview = format_preview_for_display(preview_data)
|
| 339 |
return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
|
| 340 |
elif is_visible:
|
|
@@ -347,7 +369,7 @@ with gr.Blocks(css="""
|
|
| 347 |
# Connect preview toggle to show/hide dataset information
|
| 348 |
preview_toggle.click(
|
| 349 |
fn=toggle_preview,
|
| 350 |
-
inputs=[dataset_dropdown, preview_visibility],
|
| 351 |
outputs=[preview_visibility, dataset_preview_container, preview_output, preview_toggle]
|
| 352 |
)
|
| 353 |
|
|
|
|
| 136 |
h1 {
|
| 137 |
text-align: center;
|
| 138 |
}
|
| 139 |
+
.section-divider {
|
| 140 |
+
border-top: 1px solid #ddd;
|
| 141 |
+
margin: 12px 0;
|
| 142 |
}
|
| 143 |
.config-box {
|
| 144 |
border: 1px solid #ddd;
|
|
|
|
| 158 |
""")
|
| 159 |
|
| 160 |
# Dataset Selection Section
|
| 161 |
+
gr.Markdown("## (A) Select Dataset for Evaluation")
|
| 162 |
|
| 163 |
with gr.Row():
|
| 164 |
dataset_dropdown = gr.Dropdown(
|
|
|
|
| 177 |
wrap=True,
|
| 178 |
elem_id="preview_table"
|
| 179 |
)
|
| 180 |
+
# Add a divider instead of lots of space
|
| 181 |
+
gr.Markdown("<div class='section-divider'></div>")
|
|
|
|
| 182 |
|
| 183 |
+
# Preview data state to store the loaded preview data
|
| 184 |
+
preview_data_state = gr.State(None)
|
|
|
|
| 185 |
|
| 186 |
# MMLU Config Container - Initially hidden until dataset is selected
|
| 187 |
with gr.Column(visible=False) as mmlu_config_container:
|
| 188 |
+
gr.Markdown("## (B) Select Dataset Configuration Options")
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
with gr.Row():
|
| 191 |
# Left column for subject selection
|
|
|
|
| 203 |
with gr.Column(visible=False) as num_subjects_container:
|
| 204 |
num_subjects_slider = gr.Slider(
|
| 205 |
minimum=1,
|
| 206 |
+
maximum=14, # Will be updated dynamically based on preview data
|
| 207 |
value=14,
|
| 208 |
step=1,
|
| 209 |
label="Number of Subjects",
|
| 210 |
+
info="Number of subjects to evaluate. They will be loaded in alphabetical order."
|
| 211 |
)
|
| 212 |
|
| 213 |
# Subject checkboxes - initially hidden, shown when "Specify which Subjects to Evaluate" is selected
|
| 214 |
with gr.Column(visible=False) as specific_subjects_container:
|
| 215 |
+
# Will be populated dynamically from the preview data
|
|
|
|
| 216 |
specific_subjects = gr.CheckboxGroup(
|
| 217 |
+
choices=[], # Will be populated from preview data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
label="Select Specific Subjects",
|
| 219 |
info="Select which specific subjects to evaluate"
|
| 220 |
)
|
|
|
|
| 233 |
info="Number of examples to use for few-shot learning (0-5)."
|
| 234 |
)
|
| 235 |
|
| 236 |
+
# Add a small space
|
| 237 |
gr.Markdown(" ")
|
| 238 |
|
| 239 |
with gr.Row():
|
|
|
|
| 282 |
# Track preview visibility state
|
| 283 |
preview_visibility = gr.State(False)
|
| 284 |
|
| 285 |
+
# Function to process dataset preview data
|
| 286 |
+
def get_subject_choices_from_preview(preview_data):
|
| 287 |
+
if not preview_data or 'subject_counts' not in preview_data:
|
| 288 |
+
return [], 0
|
| 289 |
+
|
| 290 |
+
# Get subject counts from preview data
|
| 291 |
+
subject_counts = preview_data['subject_counts']
|
| 292 |
+
|
| 293 |
+
# Sort subjects alphabetically
|
| 294 |
+
subjects = sorted(subject_counts.keys())
|
| 295 |
+
|
| 296 |
+
# Format as "Subject (n=count)"
|
| 297 |
+
formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]
|
| 298 |
+
|
| 299 |
+
return formatted_subjects, len(subjects)
|
| 300 |
+
|
| 301 |
+
# Function to load preview data and update UI
|
| 302 |
+
def load_dataset_preview(dataset):
|
| 303 |
+
if dataset == "MMLU-Pro":
|
| 304 |
+
# Load the preview data
|
| 305 |
+
preview_data = mmlupro_dataset_preview(regenerate_preview=False)
|
| 306 |
+
|
| 307 |
+
# Extract subject choices and count
|
| 308 |
+
subject_choices, subject_count = get_subject_choices_from_preview(preview_data)
|
| 309 |
+
|
| 310 |
+
# Update the UI components
|
| 311 |
+
return (
|
| 312 |
+
preview_data, # Store the preview data
|
| 313 |
+
gr.update(choices=subject_choices), # Update checkbox choices
|
| 314 |
+
gr.update(maximum=subject_count, value=min(subject_count, 14)) # Update slider max
|
| 315 |
+
)
|
| 316 |
+
return None, gr.update(), gr.update()
|
| 317 |
+
|
| 318 |
# Function to show/hide configuration based on selected dataset
|
| 319 |
def update_interface_based_on_dataset(dataset, current_visibility):
|
| 320 |
if dataset == "MMLU-Pro":
|
|
|
|
| 336 |
gr.update(value="Show Dataset Preview") # Reset button text
|
| 337 |
)
|
| 338 |
|
| 339 |
+
# Connect dataset dropdown to show/hide appropriate configuration and load preview data
|
| 340 |
dataset_dropdown.change(
|
| 341 |
+
fn=load_dataset_preview,
|
| 342 |
+
inputs=[dataset_dropdown],
|
| 343 |
+
outputs=[preview_data_state, specific_subjects, num_subjects_slider],
|
| 344 |
+
).then(
|
| 345 |
fn=update_interface_based_on_dataset,
|
| 346 |
inputs=[dataset_dropdown, preview_visibility],
|
| 347 |
outputs=[mmlu_config_container, results_container, preview_toggle, dataset_preview_container, preview_visibility, preview_toggle]
|
| 348 |
)
|
| 349 |
|
| 350 |
# Function to toggle dataset preview visibility
|
| 351 |
+
def toggle_preview(dataset, preview_visibility, preview_data):
|
| 352 |
# Toggle the visibility state
|
| 353 |
is_visible = not preview_visibility
|
| 354 |
|
| 355 |
# Update button text based on new state
|
| 356 |
button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
|
| 357 |
|
| 358 |
+
# Format and show preview if becoming visible
|
| 359 |
if is_visible and dataset == "MMLU-Pro":
|
|
|
|
| 360 |
formatted_preview = format_preview_for_display(preview_data)
|
| 361 |
return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
|
| 362 |
elif is_visible:
|
|
|
|
| 369 |
# Connect preview toggle to show/hide dataset information
|
| 370 |
preview_toggle.click(
|
| 371 |
fn=toggle_preview,
|
| 372 |
+
inputs=[dataset_dropdown, preview_visibility, preview_data_state],
|
| 373 |
outputs=[preview_visibility, dataset_preview_container, preview_output, preview_toggle]
|
| 374 |
)
|
| 375 |
|