Maria Castellanos commited on
Commit
764fa75
·
1 Parent(s): a095a04

Interim LB v1

Browse files
Files changed (3) hide show
  1. app.py +6 -6
  2. final_lb.py +7 -5
  3. utils.py +1 -1
app.py CHANGED
@@ -110,7 +110,7 @@ def update_current_dataframe():
110
  logger.info("Fetching latest dataset for leaderboard...")
111
  current_df, current_df_raw = fetch_dataset_df()
112
  logger.debug(f"Dataset version updated")
113
- time.sleep(30) # Check for updates every 30 sec
114
 
115
  threading.Thread(target=update_current_dataframe, daemon=True).start()
116
 
@@ -119,7 +119,7 @@ threading.Thread(target=update_current_dataframe, daemon=True).start()
119
 
120
  with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
121
  theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
122
- timer = gr.Timer(10) # Run every 10 seconds
123
  data_version = gr.State(value=0)
124
  def increment_data_version(current_version):
125
  logger.debug("Incrementing data version counter... to " + str(current_version + 1))
@@ -137,8 +137,8 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
137
  Go to the **Leaderboard** to check out how the challenge is going.
138
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
139
 
140
- **We are releasing an intermediate leaderboard on December 1st so stay tuned!
141
- The submission cutoff for this leaderboard will be November 30th at 6pm EST. **
142
 
143
  """
144
  )
@@ -307,7 +307,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
307
  select_columns=LB_AVG,
308
  search_columns=["user"],
309
  render=True,
310
- every=30,
311
  )
312
  # Set up button to download leaderboard as csv file
313
  download_lb = gr.DownloadButton(
@@ -330,7 +330,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
330
  select_columns=LB_COLS,
331
  search_columns=["user"],
332
  render=True,
333
- every=30,
334
  )
335
  # Auto-refresh
336
  def refresh_if_changed():
 
110
  logger.info("Fetching latest dataset for leaderboard...")
111
  current_df, current_df_raw = fetch_dataset_df()
112
  logger.debug(f"Dataset version updated")
113
+ time.sleep(60) # Check for updates every 60 sec
114
 
115
  threading.Thread(target=update_current_dataframe, daemon=True).start()
116
 
 
119
 
120
  with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
121
  theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
122
+ timer = gr.Timer(30) # Run every 30 seconds
123
  data_version = gr.State(value=0)
124
  def increment_data_version(current_version):
125
  logger.debug("Incrementing data version counter... to " + str(current_version + 1))
 
137
  Go to the **Leaderboard** to check out how the challenge is going.
138
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
139
 
140
+ We are releasing an intermediate leaderboard on December 2nd so stay tuned!
141
+ The submission cutoff for this leaderboard will be **Dec 1st at 11:59PM GMT.**
142
 
143
  """
144
  )
 
307
  select_columns=LB_AVG,
308
  search_columns=["user"],
309
  render=True,
310
+ every=60,
311
  )
312
  # Set up button to download leaderboard as csv file
313
  download_lb = gr.DownloadButton(
 
330
  select_columns=LB_COLS,
331
  search_columns=["user"],
332
  render=True,
333
+ every=60,
334
  )
335
  # Auto-refresh
336
  def refresh_if_changed():
final_lb.py CHANGED
@@ -5,7 +5,7 @@ from utils import (
5
  map_metric_to_stats,
6
  fetch_dataset_df,
7
  )
8
- from about import ENDPOINTS, LB_COLS, LB_AVG, results_repo_test, results_repo_validation
9
 
10
  from loguru import logger
11
  import pandas as pd
@@ -14,7 +14,7 @@ from pathlib import Path
14
 
15
  ALL_EPS = ['Average'] + ENDPOINTS
16
 
17
- def build_leaderboard(df_results, df_results_raw):
18
  per_ep = {}
19
  for ep in ALL_EPS:
20
  df = df_results[df_results["Endpoint"] == ep].copy()
@@ -67,6 +67,8 @@ def build_leaderboard(df_results, df_results_raw):
67
  per_ep[ep] = avg_leaderboard_clean[avg_cols]
68
 
69
  else:
 
 
70
  # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
71
  df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
72
  df = df.sort_values(by="submission time", ascending=False, kind="stable")
@@ -97,14 +99,14 @@ def validate_model_details(tag):
97
  else:
98
  return safe_tag
99
 
100
- def prepare_lb_csv(save_folder:str):
101
  logger.info("Fetching data")
102
  df_latest, df_latest_raw = fetch_dataset_df(
103
  download_raw=True,
104
  test_repo=results_repo_test
105
  )
106
  logger.info("Building leaderboard")
107
- per_ep_df = build_leaderboard(df_latest, df_latest_raw)
108
  logger.info("Saving leaderboard")
109
  for ep in ALL_EPS:
110
  df_lb = per_ep_df[ep]
@@ -113,4 +115,4 @@ def prepare_lb_csv(save_folder:str):
113
  return
114
 
115
  if __name__ == "__main__":
116
- prepare_lb_csv("intermediate_lbs")
 
5
  map_metric_to_stats,
6
  fetch_dataset_df,
7
  )
8
+ from about import ENDPOINTS, LB_COLS, LB_AVG, results_repo_test
9
 
10
  from loguru import logger
11
  import pandas as pd
 
14
 
15
  ALL_EPS = ['Average'] + ENDPOINTS
16
 
17
+ def build_leaderboard(df_results, df_results_raw, avg_only=True):
18
  per_ep = {}
19
  for ep in ALL_EPS:
20
  df = df_results[df_results["Endpoint"] == ep].copy()
 
67
  per_ep[ep] = avg_leaderboard_clean[avg_cols]
68
 
69
  else:
70
+ if avg_only:
71
+ continue
72
  # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
73
  df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
74
  df = df.sort_values(by="submission time", ascending=False, kind="stable")
 
99
  else:
100
  return safe_tag
101
 
102
+ def prepare_lb_csv(save_folder:str, avg_only:bool):
103
  logger.info("Fetching data")
104
  df_latest, df_latest_raw = fetch_dataset_df(
105
  download_raw=True,
106
  test_repo=results_repo_test
107
  )
108
  logger.info("Building leaderboard")
109
+ per_ep_df = build_leaderboard(df_latest, df_latest_raw, avg_only)
110
  logger.info("Saving leaderboard")
111
  for ep in ALL_EPS:
112
  df_lb = per_ep_df[ep]
 
115
  return
116
 
117
  if __name__ == "__main__":
118
+ prepare_lb_csv("intermediate_lbs", avg_only=True)
utils.py CHANGED
@@ -112,7 +112,7 @@ def fetch_dataset_df(download_raw=False, test_repo=results_repo_validation): # C
112
  feature_schema = Features(raw_metric_features | other_features_raw)
113
  logger.info("Fetching raw bootstrapping dataset from Hugging Face Hub...")
114
  # Because the raw file is so long, we have to load it with delay and multiple retries
115
- max_retries = 3
116
  base_delay = 5
117
  for attempt in range(max_retries):
118
  try:
 
112
  feature_schema = Features(raw_metric_features | other_features_raw)
113
  logger.info("Fetching raw bootstrapping dataset from Hugging Face Hub...")
114
  # Because the raw file is so long, we have to load it with delay and multiple retries
115
+ max_retries = 10
116
  base_delay = 5
117
  for attempt in range(max_retries):
118
  try: