rohansampath commited on
Commit
d093a4d
·
verified ·
1 Parent(s): 24af1c0

Create configs/dataset_config.py

Browse files
Files changed (1) hide show
  1. configs/dataset_config.py +310 -0
configs/dataset_config.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from dataset_previews import mmlupro_dataset_preview, format_preview_for_display
4
+
5
+ def create_dataset_section():
6
+ """
7
+ Creates the dataset selection section (Section A) of the UI.
8
+
9
+ Returns:
10
+ dict: Dictionary containing UI components and containers.
11
+ """
12
+ # Dataset Selection Section
13
+ header = gr.Markdown("## (A) Select Dataset for Evaluation")
14
+
15
+ with gr.Row():
16
+ dataset_dropdown = gr.Dropdown(
17
+ choices=["(Select Dataset)", "MMLU-Pro"],
18
+ value="(Select Dataset)",
19
+ label="Dataset",
20
+ info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
21
+ )
22
+ preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary")
23
+
24
+ # Dataset Preview Container - Initially hidden
25
+ with gr.Column(visible=False) as dataset_preview_container:
26
+ gr.Markdown("## Dataset Preview", elem_id="preview_header")
27
+ preview_output = gr.DataFrame(
28
+ interactive=False,
29
+ wrap=True,
30
+ elem_id="preview_table"
31
+ )
32
+ # Add a divider
33
+ gr.Markdown("<div class='section-divider'></div>")
34
+
35
+ # Preview data state to store the loaded preview data
36
+ preview_data_state = gr.State(None)
37
+
38
+ # Return components dictionary
39
+ return {
40
+ 'header': header,
41
+ 'dropdown': dataset_dropdown,
42
+ 'preview_toggle': preview_toggle,
43
+ 'preview_container': dataset_preview_container,
44
+ 'preview_output': preview_output,
45
+ 'preview_data_state': preview_data_state
46
+ }
47
+
48
+ def create_mmlu_config_section():
49
+ """
50
+ Creates the dataset configuration section (Section B) of the UI.
51
+
52
+ Returns:
53
+ dict: Dictionary containing UI components and containers.
54
+ """
55
+ with gr.Column(visible=False) as mmlu_config_container:
56
+ gr.Markdown("## (B) Select Dataset Configuration Options")
57
+
58
+ with gr.Row():
59
+ # Left column for subject selection
60
+ with gr.Column(scale=1):
61
+ with gr.Group(elem_classes=["config-box"]):
62
+ gr.Markdown("### Choose Subjects")
63
+
64
+ subject_selection_mode = gr.Radio(
65
+ choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"],
66
+ value="Evaluate All Subjects",
67
+ label="Subject Selection Mode"
68
+ )
69
+
70
+ # Subject number slider - initially hidden
71
+ with gr.Column(visible=False) as num_subjects_container:
72
+ num_subjects_slider = gr.Slider(
73
+ minimum=1,
74
+ maximum=14, # Will be updated dynamically based on preview data
75
+ value=1,
76
+ step=1,
77
+ label="Number of Subjects",
78
+ info="Number of subjects to evaluate. They will be loaded in alphabetical order."
79
+ )
80
+
81
+ # Subject checkboxes - initially hidden
82
+ with gr.Column(visible=False) as specific_subjects_container:
83
+ # Will be populated dynamically from the preview data
84
+ specific_subjects = gr.CheckboxGroup(
85
+ choices=[], # Will be populated from preview data
86
+ label="Select Specific Subjects",
87
+ info="Select which specific subjects to evaluate"
88
+ )
89
+
90
+ # Right column for sample configuration
91
+ with gr.Column(scale=1):
92
+ with gr.Group(elem_classes=["config-box"]):
93
+ gr.Markdown("### Sample Configuration")
94
+
95
+ all_questions_checkbox = gr.Checkbox(
96
+ label="Evaluate All Questions",
97
+ value=False,
98
+ info="When checked, evaluates all available questions for each subject"
99
+ )
100
+
101
+ questions_info_text = gr.Markdown(visible=False, value="**All questions across the selected subjects will be evaluated**")
102
+
103
+ # Questions per subject slider
104
+ num_questions_slider = gr.Slider(
105
+ minimum=1,
106
+ maximum=100,
107
+ value=20,
108
+ step=1,
109
+ label="Questions per Subject",
110
+ info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.",
111
+ interactive=True
112
+ )
113
+
114
+ # Return components dictionary
115
+ return {
116
+ 'container': mmlu_config_container,
117
+ 'subject_selection_mode': subject_selection_mode,
118
+ 'num_subjects_container': num_subjects_container,
119
+ 'num_subjects_slider': num_subjects_slider,
120
+ 'specific_subjects_container': specific_subjects_container,
121
+ 'specific_subjects': specific_subjects,
122
+ 'all_questions_checkbox': all_questions_checkbox,
123
+ 'questions_info_text': questions_info_text,
124
+ 'num_questions_slider': num_questions_slider
125
+ }
126
+
127
+ # Utility functions for dataset section
128
+ def get_subject_choices_from_preview(preview_data):
129
+ """
130
+ Extracts subject choices from preview data.
131
+
132
+ Args:
133
+ preview_data (dict): Preview data containing subject counts.
134
+
135
+ Returns:
136
+ tuple: (formatted_subjects, subject_count)
137
+ """
138
+ if not preview_data or 'subject_counts' not in preview_data:
139
+ return [], 0
140
+
141
+ # Get subject counts from preview data
142
+ subject_counts = preview_data['subject_counts']
143
+
144
+ # Sort subjects alphabetically
145
+ subjects = sorted(subject_counts.keys())
146
+
147
+ # Format as "Subject (n=count)"
148
+ formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]
149
+
150
+ return formatted_subjects, len(subjects)
151
+
152
+ def load_dataset_preview(dataset):
153
+ """
154
+ Loads preview data for the selected dataset.
155
+
156
+ Args:
157
+ dataset (str): Selected dataset name.
158
+
159
+ Returns:
160
+ tuple: (preview_data, specific_subjects_update, num_subjects_slider_update)
161
+ """
162
+ if dataset == "MMLU-Pro":
163
+ # Load the preview data
164
+ preview_data = mmlupro_dataset_preview(regenerate_preview=True)
165
+
166
+ # Extract subject choices and count
167
+ subject_choices, subject_count = get_subject_choices_from_preview(preview_data)
168
+
169
+ # Update the UI components
170
+ return (
171
+ preview_data, # Store the preview data
172
+ gr.update(choices=subject_choices), # Update checkbox choices
173
+ gr.update(maximum=subject_count, value=1) # Update slider max
174
+ )
175
+ return None, gr.update(), gr.update()
176
+
177
+ def update_interface_based_on_dataset(dataset, current_visibility,
178
+ mmlu_config_container, model_config_container,
179
+ results_container, preview_toggle,
180
+ dataset_preview_container):
181
+ """
182
+ Updates the interface based on dataset selection.
183
+
184
+ Args:
185
+ dataset (str): Selected dataset name.
186
+ current_visibility (bool): Current preview visibility state.
187
+ mmlu_config_container: MMLU config container component.
188
+ model_config_container: Model config container component.
189
+ results_container: Results container component.
190
+ preview_toggle: Preview toggle button.
191
+ dataset_preview_container: Dataset preview container.
192
+
193
+ Returns:
194
+ tuple: Updates for UI components.
195
+ """
196
+ if dataset == "MMLU-Pro":
197
+ return (
198
+ gr.update(visible=True), # mmlu_config_container
199
+ gr.update(visible=True), # model_config_container
200
+ gr.update(visible=True), # results_container
201
+ gr.update(interactive=True), # preview_toggle
202
+ gr.update(visible=False), # dataset_preview_container - hide it initially
203
+ False, # Reset preview_visibility to False
204
+ gr.update(value="Show Dataset Preview") # Reset button text
205
+ )
206
+ else:
207
+ return (
208
+ gr.update(visible=False), # mmlu_config_container
209
+ gr.update(visible=False), # model_config_container
210
+ gr.update(visible=False), # results_container
211
+ gr.update(interactive=False), # preview_toggle
212
+ gr.update(visible=False), # dataset_preview_container - hide when no dataset
213
+ False, # Reset preview_visibility to False
214
+ gr.update(value="Show Dataset Preview") # Reset button text
215
+ )
216
+
217
+ def toggle_preview(dataset, preview_visibility, preview_data):
218
+ """
219
+ Toggles the dataset preview visibility.
220
+
221
+ Args:
222
+ dataset (str): Selected dataset name.
223
+ preview_visibility (bool): Current preview visibility state.
224
+ preview_data (dict): Preview data.
225
+
226
+ Returns:
227
+ tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update)
228
+ """
229
+ # Toggle the visibility state
230
+ is_visible = not preview_visibility
231
+
232
+ # Update button text based on new state
233
+ button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
234
+
235
+ # Format and show preview if becoming visible
236
+ if is_visible and dataset == "MMLU-Pro":
237
+ formatted_preview = format_preview_for_display(preview_data)
238
+ return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
239
+ elif is_visible:
240
+ # For other datasets (not implemented yet)
241
+ return is_visible, gr.update(visible=True), None, gr.update(value=button_text)
242
+ else:
243
+ # Hiding the preview
244
+ return is_visible, gr.update(visible=False), None, gr.update(value=button_text)
245
+
246
+ def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container):
247
+ """
248
+ Updates UI based on subject selection mode.
249
+
250
+ Args:
251
+ mode (str): Selected subject selection mode.
252
+ num_subjects_container: Container for number of subjects slider.
253
+ specific_subjects_container: Container for specific subjects checkboxes.
254
+
255
+ Returns:
256
+ tuple: (num_subjects_container_update, specific_subjects_container_update)
257
+ """
258
+ if mode == "Evaluate All Subjects":
259
+ return gr.update(visible=False), gr.update(visible=False)
260
+ elif mode == "Choose Number of Subjects":
261
+ return gr.update(visible=True), gr.update(visible=False)
262
+ else: # "Specify which Subjects to Evaluate"
263
+ return gr.update(visible=False), gr.update(visible=True)
264
+
265
+ def update_questions_interface(checked, num_questions_slider, questions_info_text):
266
+ """
267
+ Updates questions interface based on "Evaluate All Questions" checkbox.
268
+
269
+ Args:
270
+ checked (bool): Whether "Evaluate All Questions" is checked.
271
+ num_questions_slider: Questions per subject slider component.
272
+ questions_info_text: Questions info text component.
273
+
274
+ Returns:
275
+ tuple: (num_questions_slider_update, questions_info_text_update)
276
+ """
277
+ if checked:
278
+ return gr.update(visible=False), gr.update(visible=True)
279
+ else:
280
+ return gr.update(visible=True), gr.update(visible=False)
281
+
282
+ def get_subject_mode_param(mode):
283
+ """
284
+ Converts subject selection mode to parameter string.
285
+
286
+ Args:
287
+ mode (str): Subject selection mode.
288
+
289
+ Returns:
290
+ str: Parameter value for evaluation function.
291
+ """
292
+ if mode == "Evaluate All Subjects":
293
+ return "all"
294
+ elif mode == "Choose Number of Subjects":
295
+ return "number"
296
+ else: # "Specify which Subjects to Evaluate"
297
+ return "specific"
298
+
299
+ def get_subject_names(selected_subjects):
300
+ """
301
+ Extracts subject names from checkbox values.
302
+
303
+ Args:
304
+ selected_subjects (list): Selected subjects with counts.
305
+
306
+ Returns:
307
+ list: Clean subject names without count information.
308
+ """
309
+ # Extract just the subject name without the count
310
+ return [subject.split(" (")[0] for subject in selected_subjects]