Spaces:
Sleeping
Sleeping
Update dataset_previews.py
Browse files- dataset_previews.py +2 -3
dataset_previews.py
CHANGED
|
@@ -18,22 +18,21 @@ def calculate_dataset_statistics():
|
|
| 18 |
test_df, val_df = load_mmlu_pro()
|
| 19 |
|
| 20 |
test_df = test_df.sort_values(['category', 'question_id'])
|
| 21 |
-
print ("QUESTIONS TOT", len(test_df))
|
| 22 |
|
| 23 |
all_subjects = sorted(test_df['category'].unique())
|
| 24 |
|
| 25 |
# Calculate total questions and questions per subject
|
| 26 |
-
total_questions =
|
| 27 |
subject_counts = {}
|
| 28 |
|
| 29 |
# Count options per question
|
| 30 |
options_counts = []
|
| 31 |
|
| 32 |
for subject in all_subjects:
|
|
|
|
| 33 |
test_samples = test_df[test_df['category'] == subject]
|
| 34 |
num_questions = len(test_samples)
|
| 35 |
subject_counts[subject] = num_questions
|
| 36 |
-
total_questions += num_questions
|
| 37 |
print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
|
| 38 |
|
| 39 |
# Count options for each question
|
|
|
|
| 18 |
test_df, val_df = load_mmlu_pro()
|
| 19 |
|
| 20 |
test_df = test_df.sort_values(['category', 'question_id'])
|
|
|
|
| 21 |
|
| 22 |
all_subjects = sorted(test_df['category'].unique())
|
| 23 |
|
| 24 |
# Calculate total questions and questions per subject
|
| 25 |
+
total_questions = len(test_df)
|
| 26 |
subject_counts = {}
|
| 27 |
|
| 28 |
# Count options per question
|
| 29 |
options_counts = []
|
| 30 |
|
| 31 |
for subject in all_subjects:
|
| 32 |
+
print("Subject", subject)
|
| 33 |
test_samples = test_df[test_df['category'] == subject]
|
| 34 |
num_questions = len(test_samples)
|
| 35 |
subject_counts[subject] = num_questions
|
|
|
|
| 36 |
print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
|
| 37 |
|
| 38 |
# Count options for each question
|