Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

App Files Files Community

hmacdope commited on 16 days ago

Commit

b9a3c9e

1 Parent(s): 26bb373

update leaderboard code

Browse files

Files changed (4) hide show

cld.py +1 -1
final_lb.py +0 -143
intermediate_leaderboard.py +124 -0
utils.py +22 -6

cld.py CHANGED Viewed

@@ -6,7 +6,7 @@ from itertools import product
 # Make large CLD alphabet
 single_chars = list(ascii_lowercase) + list(ascii_uppercase)
-underscore_chars = [''.join(p) for p in product(['_'], single_chars)]
 CLD_ALPHABET = single_chars + underscore_chars
 def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:

 # Make large CLD alphabet
 single_chars = list(ascii_lowercase) + list(ascii_uppercase)
+underscore_chars = [''.join(p) for p in product(['@'], single_chars)]
 CLD_ALPHABET = single_chars + underscore_chars
 def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:

final_lb.py DELETED Viewed

@@ -1,143 +0,0 @@
-''' Code to generate intermediate and final leadeboard '''
-from cld import add_cld_to_leaderboard
-from utils import (
-    check_page_exists,
-    map_metric_to_stats,
-    fetch_dataset_df,
-)
-from about import ENDPOINTS, LB_COLS, results_repo_test
-from loguru import logger
-import pandas as pd
-import numpy as np
-from pathlib import Path
-ALL_EPS = ['Average'] + ENDPOINTS
-def build_leaderboard(df_results, df_results_raw, avg_only=True):
-    per_ep = {}
-    for ep in ALL_EPS:
-        df = df_results[df_results["Endpoint"] == ep].copy()
-        if df is None:
-            print(f"[refresh] {ep} returned None; using empty DF")
-        if df.empty:
-            per_ep[ep] = pd.DataFrame(columns=LB_COLS) # Empty df
-            continue
-        # Make model details clickable
-        df['model details'] = df['model_report'].apply(lambda x: validate_model_details(x)).astype(str)
-        if ep == "Average":
-            # MA-RAE is the average of the RAE per endpoint
-            df = df.rename(columns={"mean_RAE": "mean_MA-RAE",
-                                    "std_RAE": "std_MA-RAE"})
-            # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
-            df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
-            df = df.sort_values(by="submission time", ascending=False, kind="stable")
-            df = df.drop_duplicates(subset=['hf_username'], keep='first')
-            # Sort by MAE-RAE
-            sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
-            sorted_df = map_metric_to_stats(sorted_df, average=True)
-            # Make sure Hugging Face username exists, if not, delete the row
-            sorted_df['user_real'] = sorted_df['hf_username'].apply(validate_hf_username)
-            sorted_df_clean = sorted_df[sorted_df['user_real']].reset_index(drop=True)
-            # Add ranking column
-            sorted_df_clean['rank'] = np.arange(1, len(sorted_df_clean) + 1)
-            avg_leaderboard = sorted_df_clean.copy()
-            # Clean raw data as well
-            df_raw = df_results_raw[df_results_raw["Endpoint"] == ep].copy()
-            df_raw = df_raw.rename(columns={"RAE": "MA-RAE"})
-            df_raw['hf_username'] = df_raw['hf_username'].apply(lambda s: s.lower())
-            df_raw = df_raw.sort_values(by="submission_time", ascending=False, kind="stable")
-            df_raw = df_raw.drop_duplicates(subset=['hf_username','Sample'], keep='first')
-            valid_usernames = sorted_df_clean['hf_username'].unique()
-            df_raw_clean = df_raw[df_raw['hf_username'].isin(valid_usernames)].reset_index(drop=True)
-            # Make sure order of raw dataframe is the same as sorted dataframe
-            username_order = sorted_df['hf_username'].unique()
-            df_raw_sorted = df_raw_clean.copy()
-            df_raw_sorted['hf_username'] = pd.Categorical(
-                df_raw_sorted['hf_username'],
-                categories=username_order,
-                ordered=True
-            )
-            df_raw_sorted = df_raw_sorted.sort_values(
-                by=['hf_username', 'Sample'],
-                ascending=[True, True]
-            )
-            df_raw_sorted['hf_username'] = df_raw_sorted['hf_username'].astype(str)
-            df_raw_sorted = df_raw_sorted.reset_index(drop=True)
-            avg_leaderboard = add_cld_to_leaderboard(
-                sorted_df_clean,
-                df_raw_sorted,
-                "MA-RAE",
-            )
-            avg_cols = ["rank",
-                        "user",
-                        "CLD",
-                        "MA-RAE",
-                        "R2",
-                        "Spearman R",
-                        "Kendall's Tau",
-                        "model details"]
-            per_ep[ep] = avg_leaderboard[avg_cols]
-        else:
-            if avg_only:
-                continue
-            # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
-            df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
-            df = df.sort_values(by="submission time", ascending=False, kind="stable")
-            df = df.drop_duplicates(subset=['hf_username'], keep='first')
-            sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
-            sorted_df = map_metric_to_stats(sorted_df)
-            # Make sure Hugging Face username exists, if not, delete the row
-            sorted_df['user_real'] = sorted_df['hf_username'].apply(validate_hf_username)
-            sorted_df_clean = sorted_df[sorted_df['user_real']]
-            per_ep[ep] = sorted_df_clean[LB_COLS]
-    logger.info("Finished building leaderboard data.")
-    return per_ep
-def validate_hf_username(username):
-    username = str(username).strip()
-    hf_url = f"https://huggingface.co/{username}"
-    return check_page_exists(hf_url, delay=1)
-def validate_model_details(tag):
-    if tag is None:
-        return "Not submitted"
-    safe_tag = str(tag).strip()
-    if not safe_tag.startswith("https://"):
-        return "Invalid link"
-    is_real_url = check_page_exists(safe_tag, delay=2)
-    if not is_real_url:
-        return "Invalid link"
-    else:
-        return safe_tag
-def prepare_lb_csv(save_folder:str, avg_only:bool):
-    logger.info("Fetching data")
-    df_latest, df_latest_raw = fetch_dataset_df(
-        download_raw=True,
-        test_repo=results_repo_test
-    )
-    logger.info("Building leaderboard")
-    per_ep_df = build_leaderboard(df_latest, df_latest_raw, avg_only)
-    logger.info("Saving leaderboard")
-    for ep in ALL_EPS:
-        if ep != "Average" and avg_only:
-            continue
-        df_lb = per_ep_df[ep]
-        save_path = Path(save_folder) / f"{ep}_leaderboard.csv"
-        df_lb.to_csv(save_path, index=False)
-    return
-if __name__ == "__main__":
-    prepare_lb_csv("intermediate_lbs", avg_only=True)

intermediate_leaderboard.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from statsmodels.stats.multicomp import pairwise_tukeyhsd
+from cld import cld
+from utils import (
+    check_page_exists,
+    map_metric_to_stats,
+    fetch_dataset_df,
+)
+from about import ENDPOINTS, LB_COLS, results_repo_test, METRICS
+import pandas as pd
+def validate_hf_username(username):
+    username = str(username).strip()
+    hf_url = f"https://huggingface.co/{username}"
+    return check_page_exists(hf_url, delay=1)
+    # return True  # For testing purposes, assume all usernames are valid
+def validate_model_details(tag):
+    if tag is None:
+        return "Not submitted"
+    safe_tag = str(tag).strip()
+    if not safe_tag.startswith("https://"):
+        return "Invalid link"
+    is_real_url = check_page_exists(safe_tag, delay=2)
+    if not is_real_url:
+        return "Invalid link"
+    else:
+        return safe_tag
+def make_intermediate_lb():
+    df_latest, df_latest_raw = fetch_dataset_df(
+        download_raw=True,
+        test_repo=results_repo_test
+    )
+    # HF username validation
+    hf_usernames = df_latest_raw["hf_username"].unique()
+    valid_hf_usernames = {username: validate_hf_username(username) for username in hf_usernames}
+    # print all users and their validation status
+    for username, is_valid in valid_hf_usernames.items():
+        print(f"Username: {username}, Valid: {is_valid}")
+    df_latest_raw["hf_user_valid"] = df_latest_raw["hf_username"].map(valid_hf_usernames)
+    # drop invalid usernames
+    df_latest_raw = df_latest_raw[df_latest_raw["hf_user_valid"]].reset_index(drop=True)
+    # make sure to only keep the latest submission per user for the 'Average' endpoint
+    df_latest_raw["submission_time"] = pd.to_datetime(df_latest_raw["submission_time"])
+    df_latest_raw = df_latest_raw.query("Endpoint == 'Average'")
+    df_latest_raw['latest_time_per_user'] = df_latest_raw.groupby('user')['submission_time'].transform('max')
+    latest_submissions_df = df_latest_raw[df_latest_raw['submission_time'] == df_latest_raw['latest_time_per_user']].copy()
+    latest_submissions_df = latest_submissions_df.sort_values(
+        ['RAE','user', 'Sample'], ascending=True
+    ).reset_index(drop=True)
+    # Get the unique users in the order of their first appearance
+    unique_users_ordered = latest_submissions_df['user'].unique()
+    # Create a mapping dictionary: original_user -> prefixed_user
+    user_mapping = {}
+    for idx, user in enumerate(unique_users_ordered):
+        # The prefix is the index starting from 0, formatted to be 3 digits (001, 002, etc.)
+        # We use idx + 1 to start the sequence from 001 instead of 000
+        prefix = f"{idx + 1:03d}"
+        prefixed_user = f"{prefix}___{user}"
+        user_mapping[user] = prefixed_user
+    # Apply the mapping to create a new column with prefixed usernames
+    latest_submissions_df['user'] = latest_submissions_df['user'].map(user_mapping)
+    # Perform Tukey's HSD test
+    tukey = pairwise_tukeyhsd(endog=latest_submissions_df['RAE'], groups=latest_submissions_df['user'], alpha=0.05)
+    tukey_df = pd.DataFrame(data=tukey._results_table.data[1:],
+                            columns=tukey._results_table.data[0])
+    # add CLDs
+    cld_dict = cld(tukey_df)
+    cld_df = pd.DataFrame(cld_dict.items(),columns=["group","letter"]).sort_values("group")
+    cld_df.letter = [",".join(x) for x in cld_df.letter]
+    cld_df["user"] = cld_df.group
+    cld_df["user_fixed"] = cld_df.group.str.split("___").str[1]
+    # clean up CLD letters for extended alphabet (i.e with @ symbols)
+    def clean_up(ser):
+        ser = ser.split(",")
+        # rejoin for late in alphabet
+        if "@" in ser and len(ser) == 2:
+            let = "@" + ser[1]
+        elif "@" in ser and len(ser) == 4:
+            let = "@" + ser[2] + "," + "@" + ser[3]
+        else:
+            let = ",".join(ser)
+        return let
+    cld_df["fixed_letter"] = cld_df["letter"].apply(lambda x: clean_up(x))
+    # gather means and stds for each metric for each user
+    for metric in METRICS:
+        metric_stats = latest_submissions_df.groupby('user')[metric].agg(['mean', 'std']).reset_index()
+        metric_stats = metric_stats.rename(columns={'mean': f'{metric}_mean', 'std': f'{metric}_std'})
+        metric_stats[f"{metric}_display"] = metric_stats.apply(
+            lambda row: f"{row[f'{metric}_mean']:.4f} ± {row[f'{metric}_std']:.4f}", axis=1
+        )
+        cld_df = cld_df.merge(metric_stats[['user', f'{metric}_mean', f'{metric}_std', f'{metric}_display']], on='user', how='left')
+    cld_subset = cld_df[['user_fixed', 'fixed_letter'] + [f'{metric}_display' for metric in METRICS]]
+    cld_subset = cld_subset.rename(columns={'user_fixed': 'user', 'fixed_letter': 'CLD'})
+    print(cld_subset.head())
+    cld_subset.to_csv("leaderboard_cld_results.csv", index=False)
+if __name__ == "__main__":
+    make_intermediate_lb()

utils.py CHANGED Viewed

@@ -9,8 +9,11 @@ from loguru import logger
 import time
 import requests
-def check_page_exists(url: str, delay=0.2):
-    """Checks if a web page exists at the given URL.
     Parameters
     ----------
@@ -18,6 +21,10 @@ def check_page_exists(url: str, delay=0.2):
         Url of the page
     delay : float, optional
         Seconds to wait until submitting another request, by default 0.2
     Returns
     -------
@@ -25,17 +32,26 @@ def check_page_exists(url: str, delay=0.2):
        If the page exists
     """
     safe_url = str(url).strip()
     # Attempt to fix url
     if not safe_url.startswith(('http://', 'https://')):
         safe_url = f"https://{safe_url}"
     try:
         response = requests.get(safe_url, timeout=5)
         if response.status_code == 429:
-             print(f"Warning: Rate limit hit on {safe_url}. Waiting for 5 seconds...")
-             time.sleep(5)
-             return check_page_exists(safe_url, delay=delay)
         return response.status_code == 200
     except requests.exceptions.RequestException as e:

 import time
 import requests
+import requests
+import time
+def check_page_exists(url: str, delay=0.2, max_retries=3, current_retries=0):
+    """Checks if a web page exists at the given URL with a retry limit for 429 errors.
     Parameters
     ----------
         Url of the page
     delay : float, optional
         Seconds to wait until submitting another request, by default 0.2
+    max_retries : int, optional
+        Maximum number of times to retry on a 429 error, by default 3
+    current_retries : int, optional
+        Current number of retries performed (internal counter), by default 0
     Returns
     -------
        If the page exists
     """
     safe_url = str(url).strip()
     # Attempt to fix url
     if not safe_url.startswith(('http://', 'https://')):
         safe_url = f"https://{safe_url}"
     try:
         response = requests.get(safe_url, timeout=5)
+        # Check for Rate Limit Error and retry if under the limit
         if response.status_code == 429:
+            if current_retries < max_retries:
+                print(f"Warning: Rate limit hit on {safe_url}. Attempt {current_retries + 1}/{max_retries}. Waiting for 5 seconds...")
+                time.sleep(5)
+                # Recurse with an incremented retry counter
+                return check_page_exists(safe_url, delay=delay, max_retries=max_retries, current_retries=current_retries + 1)
+            else:
+                print(f"Error: Max retries ({max_retries}) reached for rate limit on {safe_url}.")
+                return False # Give up after max retries
+        # Return True only for a successful status code (200)
         return response.status_code == 200
     except requests.exceptions.RequestException as e: