Maria Castellanos
commited on
Commit
·
764fa75
1
Parent(s):
a095a04
Interim LB v1
Browse files- app.py +6 -6
- final_lb.py +7 -5
- utils.py +1 -1
app.py
CHANGED
|
@@ -110,7 +110,7 @@ def update_current_dataframe():
|
|
| 110 |
logger.info("Fetching latest dataset for leaderboard...")
|
| 111 |
current_df, current_df_raw = fetch_dataset_df()
|
| 112 |
logger.debug(f"Dataset version updated")
|
| 113 |
-
time.sleep(
|
| 114 |
|
| 115 |
threading.Thread(target=update_current_dataframe, daemon=True).start()
|
| 116 |
|
|
@@ -119,7 +119,7 @@ threading.Thread(target=update_current_dataframe, daemon=True).start()
|
|
| 119 |
|
| 120 |
with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
| 121 |
theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 122 |
-
timer = gr.Timer(
|
| 123 |
data_version = gr.State(value=0)
|
| 124 |
def increment_data_version(current_version):
|
| 125 |
logger.debug("Incrementing data version counter... to " + str(current_version + 1))
|
|
@@ -137,8 +137,8 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
|
| 137 |
Go to the **Leaderboard** to check out how the challenge is going.
|
| 138 |
To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
|
| 139 |
|
| 140 |
-
|
| 141 |
-
The submission cutoff for this leaderboard will be
|
| 142 |
|
| 143 |
"""
|
| 144 |
)
|
|
@@ -307,7 +307,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
|
| 307 |
select_columns=LB_AVG,
|
| 308 |
search_columns=["user"],
|
| 309 |
render=True,
|
| 310 |
-
every=
|
| 311 |
)
|
| 312 |
# Set up button to download leaderboard as csv file
|
| 313 |
download_lb = gr.DownloadButton(
|
|
@@ -330,7 +330,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
|
| 330 |
select_columns=LB_COLS,
|
| 331 |
search_columns=["user"],
|
| 332 |
render=True,
|
| 333 |
-
every=
|
| 334 |
)
|
| 335 |
# Auto-refresh
|
| 336 |
def refresh_if_changed():
|
|
|
|
| 110 |
logger.info("Fetching latest dataset for leaderboard...")
|
| 111 |
current_df, current_df_raw = fetch_dataset_df()
|
| 112 |
logger.debug(f"Dataset version updated")
|
| 113 |
+
time.sleep(60) # Check for updates every 60 sec
|
| 114 |
|
| 115 |
threading.Thread(target=update_current_dataframe, daemon=True).start()
|
| 116 |
|
|
|
|
| 119 |
|
| 120 |
with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
| 121 |
theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 122 |
+
timer = gr.Timer(30) # Run every 30 seconds
|
| 123 |
data_version = gr.State(value=0)
|
| 124 |
def increment_data_version(current_version):
|
| 125 |
logger.debug("Incrementing data version counter... to " + str(current_version + 1))
|
|
|
|
| 137 |
Go to the **Leaderboard** to check out how the challenge is going.
|
| 138 |
To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
|
| 139 |
|
| 140 |
+
We are releasing an intermediate leaderboard on December 2nd so stay tuned!
|
| 141 |
+
The submission cutoff for this leaderboard will be **Dec 1st at 11:59PM GMT.**
|
| 142 |
|
| 143 |
"""
|
| 144 |
)
|
|
|
|
| 307 |
select_columns=LB_AVG,
|
| 308 |
search_columns=["user"],
|
| 309 |
render=True,
|
| 310 |
+
every=60,
|
| 311 |
)
|
| 312 |
# Set up button to download leaderboard as csv file
|
| 313 |
download_lb = gr.DownloadButton(
|
|
|
|
| 330 |
select_columns=LB_COLS,
|
| 331 |
search_columns=["user"],
|
| 332 |
render=True,
|
| 333 |
+
every=60,
|
| 334 |
)
|
| 335 |
# Auto-refresh
|
| 336 |
def refresh_if_changed():
|
final_lb.py
CHANGED
|
@@ -5,7 +5,7 @@ from utils import (
|
|
| 5 |
map_metric_to_stats,
|
| 6 |
fetch_dataset_df,
|
| 7 |
)
|
| 8 |
-
from about import ENDPOINTS, LB_COLS, LB_AVG, results_repo_test
|
| 9 |
|
| 10 |
from loguru import logger
|
| 11 |
import pandas as pd
|
|
@@ -14,7 +14,7 @@ from pathlib import Path
|
|
| 14 |
|
| 15 |
ALL_EPS = ['Average'] + ENDPOINTS
|
| 16 |
|
| 17 |
-
def build_leaderboard(df_results, df_results_raw):
|
| 18 |
per_ep = {}
|
| 19 |
for ep in ALL_EPS:
|
| 20 |
df = df_results[df_results["Endpoint"] == ep].copy()
|
|
@@ -67,6 +67,8 @@ def build_leaderboard(df_results, df_results_raw):
|
|
| 67 |
per_ep[ep] = avg_leaderboard_clean[avg_cols]
|
| 68 |
|
| 69 |
else:
|
|
|
|
|
|
|
| 70 |
# Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
|
| 71 |
df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
|
| 72 |
df = df.sort_values(by="submission time", ascending=False, kind="stable")
|
|
@@ -97,14 +99,14 @@ def validate_model_details(tag):
|
|
| 97 |
else:
|
| 98 |
return safe_tag
|
| 99 |
|
| 100 |
-
def prepare_lb_csv(save_folder:str):
|
| 101 |
logger.info("Fetching data")
|
| 102 |
df_latest, df_latest_raw = fetch_dataset_df(
|
| 103 |
download_raw=True,
|
| 104 |
test_repo=results_repo_test
|
| 105 |
)
|
| 106 |
logger.info("Building leaderboard")
|
| 107 |
-
per_ep_df = build_leaderboard(df_latest, df_latest_raw)
|
| 108 |
logger.info("Saving leaderboard")
|
| 109 |
for ep in ALL_EPS:
|
| 110 |
df_lb = per_ep_df[ep]
|
|
@@ -113,4 +115,4 @@ def prepare_lb_csv(save_folder:str):
|
|
| 113 |
return
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
| 116 |
-
prepare_lb_csv("intermediate_lbs")
|
|
|
|
| 5 |
map_metric_to_stats,
|
| 6 |
fetch_dataset_df,
|
| 7 |
)
|
| 8 |
+
from about import ENDPOINTS, LB_COLS, LB_AVG, results_repo_test
|
| 9 |
|
| 10 |
from loguru import logger
|
| 11 |
import pandas as pd
|
|
|
|
| 14 |
|
| 15 |
ALL_EPS = ['Average'] + ENDPOINTS
|
| 16 |
|
| 17 |
+
def build_leaderboard(df_results, df_results_raw, avg_only=True):
|
| 18 |
per_ep = {}
|
| 19 |
for ep in ALL_EPS:
|
| 20 |
df = df_results[df_results["Endpoint"] == ep].copy()
|
|
|
|
| 67 |
per_ep[ep] = avg_leaderboard_clean[avg_cols]
|
| 68 |
|
| 69 |
else:
|
| 70 |
+
if avg_only:
|
| 71 |
+
continue
|
| 72 |
# Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
|
| 73 |
df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
|
| 74 |
df = df.sort_values(by="submission time", ascending=False, kind="stable")
|
|
|
|
| 99 |
else:
|
| 100 |
return safe_tag
|
| 101 |
|
| 102 |
+
def prepare_lb_csv(save_folder:str, avg_only:bool):
|
| 103 |
logger.info("Fetching data")
|
| 104 |
df_latest, df_latest_raw = fetch_dataset_df(
|
| 105 |
download_raw=True,
|
| 106 |
test_repo=results_repo_test
|
| 107 |
)
|
| 108 |
logger.info("Building leaderboard")
|
| 109 |
+
per_ep_df = build_leaderboard(df_latest, df_latest_raw, avg_only)
|
| 110 |
logger.info("Saving leaderboard")
|
| 111 |
for ep in ALL_EPS:
|
| 112 |
df_lb = per_ep_df[ep]
|
|
|
|
| 115 |
return
|
| 116 |
|
| 117 |
if __name__ == "__main__":
|
| 118 |
+
prepare_lb_csv("intermediate_lbs", avg_only=True)
|
utils.py
CHANGED
|
@@ -112,7 +112,7 @@ def fetch_dataset_df(download_raw=False, test_repo=results_repo_validation): # C
|
|
| 112 |
feature_schema = Features(raw_metric_features | other_features_raw)
|
| 113 |
logger.info("Fetching raw bootstrapping dataset from Hugging Face Hub...")
|
| 114 |
# Because the raw file is so long, we have to load it with delay and multiple retries
|
| 115 |
-
max_retries =
|
| 116 |
base_delay = 5
|
| 117 |
for attempt in range(max_retries):
|
| 118 |
try:
|
|
|
|
| 112 |
feature_schema = Features(raw_metric_features | other_features_raw)
|
| 113 |
logger.info("Fetching raw bootstrapping dataset from Hugging Face Hub...")
|
| 114 |
# Because the raw file is so long, we have to load it with delay and multiple retries
|
| 115 |
+
max_retries = 10
|
| 116 |
base_delay = 5
|
| 117 |
for attempt in range(max_retries):
|
| 118 |
try:
|