Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
1489ff1
1
Parent(s):
a03f0fa
debugging the codebase
Browse files- app.py +0 -1
- pyproject.toml +2 -2
- requirements.txt +2 -1
- src/leaderboard/filter_models.py +0 -2
- src/submission/check_validity.py +0 -1
- src/tools/plots.py +7 -2
app.py
CHANGED
|
@@ -141,7 +141,6 @@ def load_and_create_plots():
|
|
| 141 |
plot_df = create_plot_df(create_scores_df(raw_data))
|
| 142 |
return plot_df
|
| 143 |
|
| 144 |
-
print(leaderboard_df.columns)
|
| 145 |
|
| 146 |
demo = gr.Blocks(css=custom_css)
|
| 147 |
with demo:
|
|
|
|
| 141 |
plot_df = create_plot_df(create_scores_df(raw_data))
|
| 142 |
return plot_df
|
| 143 |
|
|
|
|
| 144 |
|
| 145 |
demo = gr.Blocks(css=custom_css)
|
| 146 |
with demo:
|
pyproject.toml
CHANGED
|
@@ -44,10 +44,10 @@ tqdm = "4.65.0"
|
|
| 44 |
transformers = "4.40.0"
|
| 45 |
tokenizers = ">=0.15.0"
|
| 46 |
gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
|
| 47 |
-
gradio = "4.
|
| 48 |
isort = "^5.13.2"
|
| 49 |
ruff = "^0.3.5"
|
| 50 |
-
gradio-leaderboard = "
|
| 51 |
|
| 52 |
[build-system]
|
| 53 |
requires = ["poetry-core"]
|
|
|
|
| 44 |
transformers = "4.40.0"
|
| 45 |
tokenizers = ">=0.15.0"
|
| 46 |
gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
|
| 47 |
+
gradio = " 4.20.0"
|
| 48 |
isort = "^5.13.2"
|
| 49 |
ruff = "^0.3.5"
|
| 50 |
+
gradio-leaderboard = "0.0.7"
|
| 51 |
|
| 52 |
[build-system]
|
| 53 |
requires = ["poetry-core"]
|
requirements.txt
CHANGED
|
@@ -14,4 +14,5 @@ tqdm==4.65.0
|
|
| 14 |
transformers==4.40.0
|
| 15 |
tokenizers>=0.15.0
|
| 16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
| 17 |
-
|
|
|
|
|
|
| 14 |
transformers==4.40.0
|
| 15 |
tokenizers>=0.15.0
|
| 16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
| 17 |
+
gradio==4.20.0
|
| 18 |
+
gradio_leaderboard==0.0.7
|
src/leaderboard/filter_models.py
CHANGED
|
@@ -139,8 +139,6 @@ def flag_models(leaderboard_data: list[dict]):
|
|
| 139 |
else:
|
| 140 |
# Merges and moes are flagged
|
| 141 |
flag_key = "merged"
|
| 142 |
-
|
| 143 |
-
print(f"model check: {flag_key}")
|
| 144 |
|
| 145 |
# Reverse the logic: Check for non-flagged models instead
|
| 146 |
if flag_key in FLAGGED_MODELS:
|
|
|
|
| 139 |
else:
|
| 140 |
# Merges and moes are flagged
|
| 141 |
flag_key = "merged"
|
|
|
|
|
|
|
| 142 |
|
| 143 |
# Reverse the logic: Check for non-flagged models instead
|
| 144 |
if flag_key in FLAGGED_MODELS:
|
src/submission/check_validity.py
CHANGED
|
@@ -170,7 +170,6 @@ def get_model_tags(model_card, model: str):
|
|
| 170 |
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
|
| 171 |
# Hardcoding because of gating problem
|
| 172 |
if "Qwen/Qwen1.5-32B" in model:
|
| 173 |
-
print("HERE NSHJNKJSNJLAS")
|
| 174 |
is_moe_from_model_card = False
|
| 175 |
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
| 176 |
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
|
|
|
| 170 |
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
|
| 171 |
# Hardcoding because of gating problem
|
| 172 |
if "Qwen/Qwen1.5-32B" in model:
|
|
|
|
| 173 |
is_moe_from_model_card = False
|
| 174 |
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
| 175 |
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
src/tools/plots.py
CHANGED
|
@@ -16,8 +16,11 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
|
|
| 16 |
:param results_df: A DataFrame containing result information including metric scores and dates.
|
| 17 |
:return: A new DataFrame containing the maximum scores until each date for every metric.
|
| 18 |
"""
|
|
|
|
|
|
|
| 19 |
# Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
|
| 20 |
results_df = pd.DataFrame(raw_data)
|
|
|
|
| 21 |
# results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
|
| 22 |
results_df.sort_values(by="date", inplace=True)
|
| 23 |
|
|
@@ -34,7 +37,7 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
|
|
| 34 |
# We ignore models that are flagged/no longer on the hub/not finished
|
| 35 |
to_ignore = (
|
| 36 |
not row["still_on_hub"]
|
| 37 |
-
or row["not_flagged"]
|
| 38 |
or current_model in FLAGGED_MODELS
|
| 39 |
or row["status"] != "FINISHED"
|
| 40 |
)
|
|
@@ -68,7 +71,6 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
|
|
| 68 |
"""
|
| 69 |
# Initialize the list to store DataFrames
|
| 70 |
dfs = []
|
| 71 |
-
|
| 72 |
# Iterate over the cols and create a new DataFrame for each column
|
| 73 |
for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
|
| 74 |
d = scores_df[col].reset_index(drop=True)
|
|
@@ -77,6 +79,9 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
|
|
| 77 |
|
| 78 |
# Concatenate all the created DataFrames
|
| 79 |
concat_df = pd.concat(dfs, ignore_index=True)
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Sort values by 'date'
|
| 82 |
concat_df.sort_values(by="date", inplace=True)
|
|
|
|
| 16 |
:param results_df: A DataFrame containing result information including metric scores and dates.
|
| 17 |
:return: A new DataFrame containing the maximum scores until each date for every metric.
|
| 18 |
"""
|
| 19 |
+
print(raw_data[0])
|
| 20 |
+
print(raw_data[0].date)
|
| 21 |
# Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
|
| 22 |
results_df = pd.DataFrame(raw_data)
|
| 23 |
+
print(results_df.columns)
|
| 24 |
# results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
|
| 25 |
results_df.sort_values(by="date", inplace=True)
|
| 26 |
|
|
|
|
| 37 |
# We ignore models that are flagged/no longer on the hub/not finished
|
| 38 |
to_ignore = (
|
| 39 |
not row["still_on_hub"]
|
| 40 |
+
or not row["not_flagged"]
|
| 41 |
or current_model in FLAGGED_MODELS
|
| 42 |
or row["status"] != "FINISHED"
|
| 43 |
)
|
|
|
|
| 71 |
"""
|
| 72 |
# Initialize the list to store DataFrames
|
| 73 |
dfs = []
|
|
|
|
| 74 |
# Iterate over the cols and create a new DataFrame for each column
|
| 75 |
for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
|
| 76 |
d = scores_df[col].reset_index(drop=True)
|
|
|
|
| 79 |
|
| 80 |
# Concatenate all the created DataFrames
|
| 81 |
concat_df = pd.concat(dfs, ignore_index=True)
|
| 82 |
+
# print("Columns in DataFrame:", concat_df.columns)
|
| 83 |
+
# if "date" not in concat_df.columns:
|
| 84 |
+
# raise ValueError("Date column missing from DataFrame. Cannot proceed with sorting.")
|
| 85 |
|
| 86 |
# Sort values by 'date'
|
| 87 |
concat_df.sort_values(by="date", inplace=True)
|