Upload folder using huggingface_hub
Browse files- __pycache__/content.cpython-310.pyc +0 -0
- __pycache__/scorer.cpython-310.pyc +0 -0
- app.py +22 -37
- scorer.py +2 -2
__pycache__/content.cpython-310.pyc
ADDED
|
Binary file (4.97 kB). View file
|
|
|
__pycache__/scorer.cpython-310.pyc
ADDED
|
Binary file (2.11 kB). View file
|
|
|
app.py
CHANGED
|
@@ -31,35 +31,27 @@ YEAR_VERSION = "2024"
|
|
| 31 |
|
| 32 |
os.makedirs("scored", exist_ok=True)
|
| 33 |
|
| 34 |
-
|
| 35 |
-
snapshot_download(
|
| 36 |
-
repo_id="autogenCTF/CTFAIA",
|
| 37 |
-
repo_type="dataset",
|
| 38 |
-
local_dir='./CTFAIA',
|
| 39 |
-
local_dir_use_symlinks=True,
|
| 40 |
-
token=TOKEN
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
-
def print_files_and_sizes(directory):
|
| 44 |
-
for root, dirs, files in os.walk(directory):
|
| 45 |
-
for file in files:
|
| 46 |
-
file_path = os.path.join(root, file)
|
| 47 |
-
file_size = os.path.getsize(file_path)
|
| 48 |
-
print(f"File: {file_path} Size: {file_size} bytes")
|
| 49 |
-
|
| 50 |
-
def get_all_folders(directory):
|
| 51 |
-
folders = []
|
| 52 |
-
for item in os.listdir(directory):
|
| 53 |
-
item_path = os.path.join(directory, item)
|
| 54 |
-
if os.path.isdir(item_path):
|
| 55 |
-
folders.append(str(item))
|
| 56 |
-
return folders
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
|
|
|
|
|
|
| 61 |
eval_results = {}
|
| 62 |
for dataset_version in all_version:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
eval_results[dataset_version] = load_dataset(
|
| 64 |
RESULTS_DATASET, dataset_version,
|
| 65 |
token=TOKEN,
|
|
@@ -67,9 +59,6 @@ for dataset_version in all_version:
|
|
| 67 |
ignore_verifications=True
|
| 68 |
)
|
| 69 |
|
| 70 |
-
contact_infos = load_dataset(CONTACT_DATASET, token=TOKEN, download_mode="force_redownload",
|
| 71 |
-
ignore_verifications=True)
|
| 72 |
-
|
| 73 |
|
| 74 |
def get_dataframe_from_results(eval_results, split):
|
| 75 |
local_df = eval_results[split]
|
|
@@ -124,8 +113,9 @@ def add_new_eval(
|
|
| 124 |
print("Adding new eval")
|
| 125 |
|
| 126 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 127 |
-
if model.lower() in set(
|
| 128 |
-
[
|
|
|
|
| 129 |
return format_warning("This model has been already submitted.")
|
| 130 |
|
| 131 |
if path_to_file is None:
|
|
@@ -141,10 +131,7 @@ def add_new_eval(
|
|
| 141 |
)
|
| 142 |
|
| 143 |
# Gold answers
|
| 144 |
-
gold_results =
|
| 145 |
-
print_files_and_sizes('./CTFAIA/' + dataset_version)
|
| 146 |
-
gold_dataset = load_dataset('./CTFAIA/' + dataset_version)
|
| 147 |
-
gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
| 148 |
|
| 149 |
# Compute score
|
| 150 |
file_path = path_to_file.name
|
|
@@ -159,7 +146,6 @@ def add_new_eval(
|
|
| 159 |
except Exception:
|
| 160 |
return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
|
| 161 |
|
| 162 |
-
print(task)
|
| 163 |
if "final_answer" not in task:
|
| 164 |
raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
|
| 165 |
answer = task["final_answer"]
|
|
@@ -186,7 +172,7 @@ def add_new_eval(
|
|
| 186 |
num_questions["all"] += 1
|
| 187 |
num_questions[level] += 1
|
| 188 |
for task_name, task in gold_results[val_or_test].items():
|
| 189 |
-
level = task['Level']
|
| 190 |
total_scores["all"] += 10
|
| 191 |
total_scores[level] += 10
|
| 192 |
|
|
@@ -212,7 +198,6 @@ def add_new_eval(
|
|
| 212 |
"score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
|
| 213 |
}
|
| 214 |
eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
|
| 215 |
-
print(eval_results)
|
| 216 |
eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
|
| 217 |
|
| 218 |
contact_info = {
|
|
|
|
| 31 |
|
| 32 |
os.makedirs("scored", exist_ok=True)
|
| 33 |
|
| 34 |
+
all_version = ['2024', '20240423']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
contact_infos = load_dataset(
|
| 37 |
+
CONTACT_DATASET,
|
| 38 |
+
token=TOKEN,
|
| 39 |
+
download_mode="force_redownload",
|
| 40 |
+
ignore_verifications=True
|
| 41 |
+
)
|
| 42 |
|
| 43 |
+
all_gold_dataset = {}
|
| 44 |
+
all_gold_results = {}
|
| 45 |
eval_results = {}
|
| 46 |
for dataset_version in all_version:
|
| 47 |
+
all_gold_dataset[dataset_version] = load_dataset(
|
| 48 |
+
INTERNAL_DATA_DATASET,
|
| 49 |
+
dataset_version
|
| 50 |
+
)
|
| 51 |
+
all_gold_results[dataset_version] = {
|
| 52 |
+
split: {row["task_name"]: row for row in all_gold_dataset[dataset_version][split]}
|
| 53 |
+
for split in ["test", "validation"]
|
| 54 |
+
}
|
| 55 |
eval_results[dataset_version] = load_dataset(
|
| 56 |
RESULTS_DATASET, dataset_version,
|
| 57 |
token=TOKEN,
|
|
|
|
| 59 |
ignore_verifications=True
|
| 60 |
)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def get_dataframe_from_results(eval_results, split):
|
| 64 |
local_df = eval_results[split]
|
|
|
|
| 113 |
print("Adding new eval")
|
| 114 |
|
| 115 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 116 |
+
if model.lower() in set(
|
| 117 |
+
[m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
|
| 118 |
+
[o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
|
| 119 |
return format_warning("This model has been already submitted.")
|
| 120 |
|
| 121 |
if path_to_file is None:
|
|
|
|
| 131 |
)
|
| 132 |
|
| 133 |
# Gold answers
|
| 134 |
+
gold_results = all_gold_results[dataset_version]
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
# Compute score
|
| 137 |
file_path = path_to_file.name
|
|
|
|
| 146 |
except Exception:
|
| 147 |
return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
|
| 148 |
|
|
|
|
| 149 |
if "final_answer" not in task:
|
| 150 |
raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
|
| 151 |
answer = task["final_answer"]
|
|
|
|
| 172 |
num_questions["all"] += 1
|
| 173 |
num_questions[level] += 1
|
| 174 |
for task_name, task in gold_results[val_or_test].items():
|
| 175 |
+
level = int(task['Level'])
|
| 176 |
total_scores["all"] += 10
|
| 177 |
total_scores[level] += 10
|
| 178 |
|
|
|
|
| 198 |
"score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
|
| 199 |
}
|
| 200 |
eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
|
|
|
|
| 201 |
eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
|
| 202 |
|
| 203 |
contact_info = {
|
scorer.py
CHANGED
|
@@ -41,8 +41,8 @@ def question_scorer(
|
|
| 41 |
if user_task["final_answer"] == val["Final answer"]:
|
| 42 |
score = val["Total score"]
|
| 43 |
else:
|
| 44 |
-
for i, item in enumerate(val["score"]):
|
| 45 |
-
if user_task["score_answer"][i] in
|
| 46 |
score = item["score"]
|
| 47 |
return score
|
| 48 |
|
|
|
|
| 41 |
if user_task["final_answer"] == val["Final answer"]:
|
| 42 |
score = val["Total score"]
|
| 43 |
else:
|
| 44 |
+
for i, item in enumerate(val["score"]["question"]):
|
| 45 |
+
if user_task["score_answer"][i] in val["score"]["answer"][i] and val["score"]["score"][i] > score:
|
| 46 |
score = item["score"]
|
| 47 |
return score
|
| 48 |
|