Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

App Files Files Community

Maria Castellanos commited on Sep 13

Commit

20ed309

1 Parent(s): 9638dbd

validations and improvements

Browse files

Files changed (5) hide show

_static/challenge_logo.png +0 -0
about.py +6 -2
app.py +91 -32
evaluate.py +158 -60
utils.py +15 -0

_static/challenge_logo.png CHANGED Viewed

about.py CHANGED Viewed

@@ -15,13 +15,17 @@ ENDPOINTS = ["LogD",
 LB_COLS0 = ["endpoint",
             "user",
             "MAE",
             "R2",
             "Spearman R",
             "Kendall's Tau",
             "submission_time",
             "model_report"]
-LB_COLS = ["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"]
-LB_DTYPES = ['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown']
 TOKEN = os.environ.get("HF_TOKEN")
 CACHE_PATH=os.getenv("HF_HOME", ".")

 LB_COLS0 = ["endpoint",
             "user",
             "MAE",
+            "RAE",
             "R2",
             "Spearman R",
             "Kendall's Tau",
+            "data coverage (%)",
             "submission_time",
             "model_report"]
+LB_COLS = ["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details",
+           "data coverage (%)"]
+LB_AVG = ["user", "MA-RAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"] # Delete some columns for overall LB?
+LB_DTYPES = ['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown', 'number']
 TOKEN = os.environ.get("HF_TOKEN")
 CACHE_PATH=os.getenv("HF_HOME", ".")

app.py CHANGED Viewed

@@ -6,8 +6,7 @@ import pandas as pd
 from evaluate import submit_data, evaluate_data
 from utils import make_tag_clickable, make_user_clickable, fetch_dataset_df
-from datetime import datetime
-from about import ENDPOINTS, LB_COLS, LB_DTYPES
 ALL_EPS = ['Average'] + ENDPOINTS
@@ -22,11 +21,19 @@ def build_leaderboard(df_results):
             per_ep[ep] = pd.DataFrame(columns=LB_COLS) # Empty df
             continue
-        # Make user and model details clickable
-        df['user'] = df['user'].apply(lambda x: make_user_clickable(x)).astype(str)
         df['model details'] = df['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str)
-        per_ep[ep] = df[LB_COLS]
     return per_ep
@@ -49,38 +56,36 @@ def gradio_interface():
         ### Header
         with gr.Row():
-            with gr.Column(scale=8):  # bigger text area
                 gr.Markdown("""
-                    ## Welcome to the OpenADMET + XXX Blind Challenge!
                     Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data 🧑‍🔬
                     Go to the **Leaderboard** to check out how the challenge is going.
                     To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
                     """
                     )
-            with gr.Column(scale=1):  # smaller side column for logo
                 gr.Image(
                     value="./_static/challenge_logo.png",
                     show_label=False,
                     show_download_button=False,
-                    width="10vw",  # Take up the width of the column (2/8 = 1/4)
                 )
         # --- Welcome markdown message ---
         welcome_md = """
-        # 💊 OpenADMET + XXX
         ## Computational Blind Challenge in ADMET
         This challenge is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery,
-        hosted by **OpenADMET** in collaboration with **XXX**.
         ## Why are ADMET properties important in drug discovery?
         Small molecules continue to be the bricks and mortar of drug discovery globally, accounting for ~75% of FDA approvals over the last decade.
-        Oral bioavailability, easily tunable properties, modulation of a wide range of mechanisms,
-        and ease of manufacturing make small molecules highly attractive as therapeutic agents, a trend that is not expected to drastically change,
-        despite increased interest in biologics.  Indeed, newer small molecule modalities such as degraders, molecular glues, and antibody-drug conjugates
-        (to name a few) make understanding small molecule properties more important than ever.
         It is fairly difficult to predict the lifetime and distribution of small molecules within the body. Additionally,
         interaction with off-targets can cause safety issues and toxicity. Collectively these *Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--properties
@@ -90,7 +95,17 @@ def gradio_interface():
         that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
         Read more about our strategy to transform drug discovery on our [website](https://openadmet.org/community/blogs/whatisopenadmet/).
-        For this blind challenge we selected ten (10) crucial endpoints for the community to predict:
         - LogD
         - Kinetic Solubility **KSOL**: uM
         - Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg
@@ -102,16 +117,18 @@ def gradio_interface():
         - Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg
         - Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
         ## ✅ How to Participate
         1. **Register**: Create an account with Hugging Face.
-        2. **Download the Public Dataset**: Clone the XXX dataset [link]
         3. **Train Your Model**: Use the provided training data for each ADMET property of your choice.
         4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions.
-        5. Join the discussion on the [Challenge Discord](link)!
         ## 📊 Data:
-        The training set will have the following variables:
         | Column                       |    Unit   | data type |  Description |
         |:-----------------------------|-----------|-----------|:-------------|
@@ -128,15 +145,28 @@ def gradio_interface():
         | RLM CLint                    | mL/min/kg |   float   | Rat Liver Microsomal Stability |
         | MGMB.                        | % Unbound |   float   | Mouse Gastrocnemius Muscle Binding |
-        At test time, we will only provide the Molecule Name and Smiles. Make sure your submission file has the same columns!
         ## 📝 Evaluation
-        The challenge will be judged based on the judging criteria outlined here.
-        - TBD
         📅 **Timeline**:
-        - TBD
         ---
@@ -166,7 +196,7 @@ def gradio_interface():
                     lboard_dict['Average'] = Leaderboard(
                         value=build_leaderboard(current_df)['Average'],
                         datatype=LB_DTYPES,
-                        select_columns=LB_COLS,
                         search_columns=["user"],
                         render=True,
                         every=15,
@@ -207,10 +237,11 @@ def gradio_interface():
                         gr.Markdown(
                             """
                             ## Participant Information
-                            To participate, we **only** require a Hugging Face username, which will be displayed on the leaderboard.
-                            Other information is optional but helps us track participation.
                             If you wish to be included in Challenge discussions, please provide your Discord username and email.
-                            If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation.
                             We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation,
                             you need to submit the link before the challenge deadline in order to be considered for the final leaderboard.
@@ -221,7 +252,17 @@ def gradio_interface():
                         username_input = gr.Textbox(
                             label="Username",
                             placeholder="Enter your Hugging Face username",
-                            info="This will be displayed on the leaderboard."
                         )
                     with gr.Column():
                         # Info to track participant, that will not be displayed publicly
@@ -247,6 +288,10 @@ def gradio_interface():
                             label="Model Report",
                             placeholder="Link to a report describing your method (optional)",
                         )
                 with gr.Row():
                     with gr.Column():
@@ -256,9 +301,14 @@ def gradio_interface():
                             Upload a single CSV file containing your predictions for all ligands in the test set.
                             Only your latest submission will be considered.
-                            You can download the ligand test set here (lik/to/download/smiles/csv).
                             """
                         )
                     with gr.Column():
                         predictions_file = gr.File(label="Single file with ADMET predictions (.csv)",
                                                 file_types=[".csv"],
@@ -270,12 +320,21 @@ def gradio_interface():
                     outputs=user_state
                 )
-                submit_btn = gr.Button("Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
                 submit_btn.click(
                     submit_data,
-                    inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation, model_tag],
                     outputs=[message, filename],
                 ).success(
                     fn=lambda m: gr.update(value=m, visible=True),

 from evaluate import submit_data, evaluate_data
 from utils import make_tag_clickable, make_user_clickable, fetch_dataset_df
+from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
 ALL_EPS = ['Average'] + ENDPOINTS
             per_ep[ep] = pd.DataFrame(columns=LB_COLS) # Empty df
             continue
+        # Make user and model details clickable if it's a huggingface user
+        df['user'] = df.apply(
+            lambda row: make_user_clickable(row['user']) if not row['anonymous'] else row['user'],
+            axis=1).astype(str)
         df['model details'] = df['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str)
+        if ep == "Average":
+            df["MA-RAE"] = df["RAE"]  # The average of the RAE per endpoint
+            sorted_df = df.sort_values(by='MA-RAE', ascending=True, kind="stable")
+            per_ep[ep] = sorted_df[LB_AVG]
+        else:
+            sorted_df = df.sort_values(by="MAE", ascending=True, kind="stable")
+            per_ep[ep] = sorted_df[LB_COLS]
     return per_ep
         ### Header
         with gr.Row():
+            with gr.Column(scale=7):  # bigger text area
                 gr.Markdown("""
+                    ## Welcome to the OpenADMET + ExpansionRx Blind Challenge!
                     Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data 🧑‍🔬
                     Go to the **Leaderboard** to check out how the challenge is going.
                     To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
                     """
                     )
+            with gr.Column(scale=2):  # smaller side column for logo
                 gr.Image(
                     value="./_static/challenge_logo.png",
                     show_label=False,
                     show_download_button=False,
+                    width="5vw",  # Take up the width of the column (2/8 = 1/4)
                 )
         # --- Welcome markdown message ---
         welcome_md = """
+        # 💊 OpenADMET + ExpansionRx
         ## Computational Blind Challenge in ADMET
         This challenge is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery,
+        hosted by **OpenADMET** in collaboration with **ExpansionRx**.
         ## Why are ADMET properties important in drug discovery?
         Small molecules continue to be the bricks and mortar of drug discovery globally, accounting for ~75% of FDA approvals over the last decade.
+        Oral bioavailability, easily tunable properties, modulation of a wide range of mechanisms, and ease of manufacturing make small molecules highly attractive as therapeutic agents.
+        Moreover, emerging small molecule modalities such as degraders, expression modulators, molecular glues, and antibody-drug conjugates (to name a few) have vastly expanded what we thought small molecules were capable of.
         It is fairly difficult to predict the lifetime and distribution of small molecules within the body. Additionally,
         interaction with off-targets can cause safety issues and toxicity. Collectively these *Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--properties
         that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
         Read more about our strategy to transform drug discovery on our [website](https://openadmet.org/community/blogs/whatisopenadmet/).
+        Critical to our mission is developing open datasets and running community blind challenges to assess the current state of the art in ADMET modeling.
+        Building on the sucess of the recent [ASAP-Polaris-OpenADMET blind challenge](https://chemrxiv.org/engage/chemrxiv/article-details/68ac00d1728bf9025e22fe45) in computational methods for drug discovery,
+        we bring you a brand new challenge in collaboration with **ExpansionRx**. During a recent series of drug discovery campaigns for RNA mediated diseases,
+        ExpansionRX collected a variety of ADMET data for off-targets and properties of interest, which they are generously sharing with the community for this challenge.
+        ## 🧪 The Challenge
+        Participants will be tasked with solving real-world ADMET prediction problems ExpansionRx faced during lead optimization.
+        Specifically, you will be asked to predict the ADMET properties of late-stage molecules based on earlier-stage data from the same campaigns.
+        For this challenge we selected ten (10) crucial endpoints for the community to predict:
         - LogD
         - Kinetic Solubility **KSOL**: uM
         - Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg
         - Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg
         - Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
+        Find more information about these endpoints on our [blog](https://openadmet.org/community/blogs/challenge_announcement2/).
         ## ✅ How to Participate
         1. **Register**: Create an account with Hugging Face.
+        2. **Download the Public Dataset**: Clone the ExpansionRx dataset [link]
         3. **Train Your Model**: Use the provided training data for each ADMET property of your choice.
         4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions.
+        5. Join the discussion on the [Challenge Discord](https://discord.gg/MY5cEFHH3D)!
         ## 📊 Data:
+        The training set contains the following parameters:
         | Column                       |    Unit   | data type |  Description |
         |:-----------------------------|-----------|-----------|:-------------|
         | RLM CLint                    | mL/min/kg |   float   | Rat Liver Microsomal Stability |
         | MGMB.                        | % Unbound |   float   | Mouse Gastrocnemius Muscle Binding |
+        You can download the training data from the [Hugging Face dataset](https://huggingface.co/datasets/OpenADMET/openadmet-challenge-training-set).
+        The test set will remained blinded until the challenge submission deadline. You will be tasked with predicting the same set of ADMET endpoints for the test set molecules.
         ## 📝 Evaluation
+        The challenge will be judged based on the following criteria:
+        - We welcome submissions of any kind, including machine learning and physics-based approaches. You can also employ pre-training approaches as you see fit,
+        as well as incorporate data from external sources into your models and submissions.
+        - In the spirit of open science and open source we would love to see code showing how you created your submission if possible, in the form of a Github Repository.
+        If not possible due to IP or other constraints you must at a minimum provide a short report written methodology based on the template [here](link to google doc).
+        **Make sure your lat submission before the deadline includes a link to a report or to a Github repository.**
+        - Each participant can submit as many times as they like, up to a limit of 5 times/day. **Only your latest submission will be considered for the final leaderboard.**
+        - The endpoints will be judged individually by mean absolute error (**MAE**), while an overall leaderboard will be judged by the macro-averaged relative absolute error (**MA-RAE**).
+        - For endpoints that are not already on a log scale (e.g LogD) they will be transformed to log scale to minimize the impact of outliers on evaluation.
+        - We will estimate errors on the metrics using bootstrapping and use the statistical testing workflow outlined in [this paper](https://chemrxiv.org/engage/chemrxiv/article-details/672a91bd7be152b1d01a926b) to determine if model performance is statistically distinct.
         📅 **Timeline**:
+        - **September 12:** Challenge announcement
+        - **September XX:** Sample data release
+        - **October 27:** Challenge starts
+        - **October-November:** Online Q&A sessions and support via the Discord channel
+        - **January 19, 2026:** Submission closes
+        - **January 26, 2026:** Winners announced
         ---
                     lboard_dict['Average'] = Leaderboard(
                         value=build_leaderboard(current_df)['Average'],
                         datatype=LB_DTYPES,
+                        select_columns=LB_AVG,
                         search_columns=["user"],
                         render=True,
                         every=15,
                         gr.Markdown(
                             """
                             ## Participant Information
+                            To participate, **we require a Hugging Face username**, which will be used to track multiple submissions.
+                            Your username will be displayed on the leaderboard, unless you check the *anonymous* box. If you want to remain anonymous, please provide an alias to be used for the leaderboard (we'll keep the username hidden).
                             If you wish to be included in Challenge discussions, please provide your Discord username and email.
+                            If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation (and check the box below).
                             We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation,
                             you need to submit the link before the challenge deadline in order to be considered for the final leaderboard.
                         username_input = gr.Textbox(
                             label="Username",
                             placeholder="Enter your Hugging Face username",
+                            # info="This will be displayed on the leaderboard."
+                        )
+                        user_alias = gr.Textbox(
+                            label="Optional Alias",
+                            placeholder="Enter an identifying alias for the leaderboard if you wish to remain anonymous",
+                            # info="This will be displayed on the leaderboard."
+                        )
+                        anon_checkbox = gr.Checkbox(
+                            label="I want to submit anonymously",
+                            info="If checked, your username will be replaced with 'anonymous' on the leaderboard.",
+                            value=False,
                         )
                     with gr.Column():
                         # Info to track participant, that will not be displayed publicly
                             label="Model Report",
                             placeholder="Link to a report describing your method (optional)",
                         )
+                        paper_checkbox = gr.Checkbox(
+                            label="I want to be included in a future publication detailing the Challenge results",
+                            value=False,
+                        )
                 with gr.Row():
                     with gr.Column():
                             Upload a single CSV file containing your predictions for all ligands in the test set.
                             Only your latest submission will be considered.
+                            You can download a CSV template with the ligands in the test set here.
                             """
                         )
+                        download_btn = gr.DownloadButton(
+                            label="📥 Download Test Set Template",
+                            value="data/test_set-example.csv",
+                            variant="secondary",
+                            )
                     with gr.Column():
                         predictions_file = gr.File(label="Single file with ADMET predictions (.csv)",
                                                 file_types=[".csv"],
                     outputs=user_state
                 )
+                submit_btn = gr.Button("📤 Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
                 submit_btn.click(
                     submit_data,
+                    inputs=[predictions_file,
+                            user_state,
+                            participant_name,
+                            discord_username,
+                            email,
+                            affiliation,
+                            model_tag,
+                            user_alias,
+                            anon_checkbox,
+                            paper_checkbox],
                     outputs=[message, filename],
                 ).success(
                     fn=lambda m: gr.update(value=m, visible=True),

evaluate.py CHANGED Viewed

@@ -1,27 +1,84 @@
 import gradio as gr
 import pandas as pd
 from pathlib import Path
-from scipy.stats import spearmanr, kendalltau
-from sklearn.metrics import mean_absolute_error, r2_score
 from typing import Optional
 from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
 from huggingface_hub import hf_hub_download
 import datetime
 import io
 import json, tempfile
-import pydantic
-class ParticipantRecord(pydantic.BaseModel):
-    hf_username: Optional[str] = pydantic.Field(default=None, description="Hugging Face username")
-    participant_name: Optional[str] = pydantic.Field(default=None, description="Participant's real name")
-    discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
-    email: Optional[str] = pydantic.Field(default=None, description="Email address")
-    affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
-    model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
-class SubmissionMetadata(pydantic.BaseModel):
     submission_time_utc: str
     user: str
     original_filename: str
@@ -29,12 +86,6 @@ class SubmissionMetadata(pydantic.BaseModel):
     participant: ParticipantRecord
-def _safeify_username(username: str) -> str:
-    return str(username.strip()).replace("/", "_").replace(" ", "_")
-def _unsafify_username(username: str) -> str:
-    return str(username.strip()).replace("/", "_").replace(" ", "_")
 def submit_data(predictions_file: str,
                 user_state,
                 participant_name: str = "",
@@ -42,13 +93,15 @@ def submit_data(predictions_file: str,
                 email: str = "",
                 affiliation: str = "",
                 model_tag: str = "",
 ):
     if user_state is None:
         raise gr.Error("Username or alias is required for submission.")
     file_path = Path(predictions_file).resolve()
     if not file_path.exists():
         raise gr.Error("Uploaded file object does not have a valid file path.")
@@ -60,29 +113,13 @@ def submit_data(predictions_file: str,
     if results_df.empty:
         return gr.Error("The uploaded file is empty.")
-    if not set(ENDPOINTS).issubset(set(results_df.columns)):
-        return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
-    # TODO, much more validation logic needed depending on the state of final data
-    # Build destination filename in the dataset
-    ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
-    safe_user = _safeify_username(user_state)
-    destination_csv = f"submissions/{safe_user}_{ts}.csv"
-    destination_json = destination_csv.replace(".csv", ".json")
-    # Upload the CSV file
-    API.upload_file(
-        path_or_fileobj=str(file_path),
-        path_in_repo=destination_csv,
-        repo_id=submissions_repo,
-        repo_type="dataset",
-        commit_message=f"Add submission for {safe_user} at {ts}"
-    )
-    # Optional participant record
     try:
         participant_record = ParticipantRecord(
             hf_username=user_state,
             participant_name=participant_name,
@@ -90,11 +127,15 @@ def submit_data(predictions_file: str,
             email=email,
             affiliation=affiliation,
             model_tag=model_tag,
         )
-    except pydantic.ValidationError as e:
         return f"❌ Error in participant information: {str(e)}"
     try:
         meta = SubmissionMetadata(
             submission_time_utc=ts,
@@ -103,11 +144,23 @@ def submit_data(predictions_file: str,
             evaluated=False,
             participant=participant_record
         )
-    except pydantic.ValidationError as e:
-        return f"❌ Error in metadata information: {str(e)}"
-    meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
     API.upload_file(
         path_or_fileobj=meta_bytes,
         path_in_repo=destination_json,
@@ -135,7 +188,7 @@ def evaluate_data(filename: str) -> None:
         test_path = hf_hub_download(
             repo_id=test_repo,
             repo_type="dataset",
-            filename="data/challenge_mock_test_set.csv", #Replace later with "test_dataset.csv" later!!,
         )
     except Exception as e:
         raise gr.Error(f"Failed to download test file: {e}")
@@ -163,13 +216,18 @@ def evaluate_data(filename: str) -> None:
         username = meta.participant.hf_username
         timestamp = meta.submission_time_utc
         report = meta.participant.model_tag
     except Exception as e:
         raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
     # Write results to results dataset
-    results_df['user'] = username
     results_df['submission_time'] = timestamp
     results_df['model_report'] = report
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
@@ -192,29 +250,69 @@ def calculate_metrics(
         results_dataframe: pd.DataFrame,
         test_dataframe: pd.DataFrame
     ):
-    def metrics_per_ep(pred, true):
-        mae = mean_absolute_error(true, pred)
-        r2 = r2_score(true, pred)
-        spr, _ = spearmanr(true, pred)
-        ktau, _ = kendalltau(true, pred)
-        return mae, r2, spr, ktau
-    df_results = pd.DataFrame(columns=["endpoint", "MAE", "R2", "Spearman R", "Kendall's Tau"])
     for i, measurement in enumerate(ENDPOINTS):
-        df_pred = results_dataframe[['Molecule Name', measurement]].dropna()
-        df_true = test_dataframe[['Molecule Name', measurement]].dropna()
-        # Make sure both have the same order
-        pred = df_pred.sort_values(by='Molecule Name')[measurement]
-        true = df_true.sort_values(by='Molecule Name')[measurement]
-        mae, r2, spearman, ktau = metrics_per_ep(pred, true)
         df_results.loc[i, 'endpoint'] = measurement
         df_results.loc[i, 'MAE'] = mae
         df_results.loc[i, 'R2'] = r2
         df_results.loc[i, 'Spearman R'] = spearman
         df_results.loc[i, "Kendall's Tau"] = ktau
-    num_cols = ["MAE", "R2", "Spearman R", "Kendall's Tau"]
     df_results[num_cols] = df_results[num_cols].apply(pd.to_numeric, errors="coerce")
     means = df_results[num_cols].mean()
     avg_row = {"endpoint": "Average", **means.to_dict()}

 import gradio as gr
 import pandas as pd
 from pathlib import Path
 from typing import Optional
 from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
+from utils import metrics_per_ep
 from huggingface_hub import hf_hub_download
 import datetime
 import io
 import json, tempfile
+import re
+from pydantic import (
+    BaseModel,
+    Field,
+    model_validator,
+    field_validator,
+    ValidationError
+)
+HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
+def _safeify_username(username: str) -> str:
+    return str(username.strip()).replace("/", "_").replace(" ", "_")
+def _unsafify_username(username: str) -> str:
+    return str(username.strip()).replace("/", "_").replace(" ", "_")
+def _check_required_columns(df: pd.DataFrame, name: str, cols: list[str]):
+    missing = [c for c in cols if c not in df.columns]
+    if missing:
+        raise ValueError(f"{name} is missing required columns: {missing}")
+class ParticipantRecord(BaseModel):
+    hf_username: str = Field(description="Hugging Face username")
+    display_name: Optional[str] = Field(description="Name to display on leaderboard")
+    participant_name: Optional[str] = Field(default=None, description="Participant's real name")
+    discord_username: Optional[str] = Field(default=None, description="Discord username")
+    email: Optional[str] = Field(default=None, description="Email address")
+    affiliation: Optional[str] = Field(default=None, description="Affiliation")
+    model_tag: Optional[str] = Field(default=None, description="Link to model description")
+    anonymous: bool = Field(default=False, description="Whether to display username as 'anonymous'")
+    consent_publication: bool = Field(default=False, description="Consent to be included in publications")
+    @field_validator("hf_username")
+    @classmethod
+    def validate_hf_username(cls, v: str) -> str:
+        v = v.strip()
+        if not HF_USERNAME_RE.match(v):
+            raise gr.Error("Invalid Hugging Face username (letters, numbers, -, _; min 2, max ~39).")
+        return v
+    @field_validator("display_name")
+    @classmethod
+    def validate_display_name(cls, v: Optional[str]) -> Optional[str]:
+        if v is None:
+            return None
+        v = v.strip()
+        if not v:
+            return None
+        if len(v) > 20:
+            raise ValueError("Display name is too long (max 20 chars).")
+        return v
+    @field_validator("model_tag", mode="before")
+    @classmethod
+    def normalize_url(cls, v):
+        if v is None:
+            return v
+        s = str(v).strip()
+        if not s:
+            return None
+        if "://" not in s:
+            s = "https://" + s
+        return s
+    @model_validator(mode="after")
+    def require_display_name_if_anonymous(self) -> "ParticipantRecord":
+        if self.anonymous and not self.display_name:
+            raise ValueError("Alias is required when anonymous box is checked.")
+        return self
+class SubmissionMetadata(BaseModel):
     submission_time_utc: str
     user: str
     original_filename: str
     participant: ParticipantRecord
 def submit_data(predictions_file: str,
                 user_state,
                 participant_name: str = "",
                 email: str = "",
                 affiliation: str = "",
                 model_tag: str = "",
+                user_display: str = "",
+                anon_checkbox: bool = False,
+                paper_checkbox: bool = False
 ):
     if user_state is None:
         raise gr.Error("Username or alias is required for submission.")
     file_path = Path(predictions_file).resolve()
     if not file_path.exists():
         raise gr.Error("Uploaded file object does not have a valid file path.")
     if results_df.empty:
         return gr.Error("The uploaded file is empty.")
+    missing = set(ENDPOINTS) - set(results_df.columns)
+    if missing:
+        return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
+    # Save participant record
     try:
         participant_record = ParticipantRecord(
             hf_username=user_state,
             participant_name=participant_name,
             email=email,
             affiliation=affiliation,
             model_tag=model_tag,
+            display_name=user_display,
+            anonymous=anon_checkbox,
+            consent_publication=paper_checkbox
         )
+    except ValidationError as e:
         return f"❌ Error in participant information: {str(e)}"
+    # Build destination filename in the dataset
+    ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
     try:
         meta = SubmissionMetadata(
             submission_time_utc=ts,
             evaluated=False,
             participant=participant_record
         )
+    except ValidationError as e:
+        return f"❌ Error in metadata information: {str(e)}"
+    safe_user = _safeify_username(user_state)
+    destination_csv = f"submissions/{safe_user}_{ts}.csv"
+    destination_json = destination_csv.replace(".csv", ".json")
+    # Upload the CSV file
+    API.upload_file(
+        path_or_fileobj=str(file_path),
+        path_in_repo=destination_csv,
+        repo_id=submissions_repo,
+        repo_type="dataset",
+        commit_message=f"Add submission for {safe_user} at {ts}"
+    )
+    # Upload the metadata JSON file
+    meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
     API.upload_file(
         path_or_fileobj=meta_bytes,
         path_in_repo=destination_json,
         test_path = hf_hub_download(
             repo_id=test_repo,
             repo_type="dataset",
+            filename="data/challenge_mock_test_set.csv", #Replace later with "test_dataset.csv",
         )
     except Exception as e:
         raise gr.Error(f"Failed to download test file: {e}")
         username = meta.participant.hf_username
         timestamp = meta.submission_time_utc
         report = meta.participant.model_tag
+        if meta.participant.anonymous:
+            display_name = meta.participant.display_name
+        else:
+            display_name = username
     except Exception as e:
         raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
     # Write results to results dataset
+    results_df['user'] = display_name
     results_df['submission_time'] = timestamp
     results_df['model_report'] = report
+    results_df['anonymous'] = meta.participant.anonymous
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
         results_dataframe: pd.DataFrame,
         test_dataframe: pd.DataFrame
     ):
+    import numpy as np
+    # Do some checks
+    # 1) Check all columns are present
+    _check_required_columns(results_dataframe, "Results file", ["Molecule Name"] + ENDPOINTS)
+    _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
+    # 2) Check all Molecules in the test set are present in the predictions
+    merged_df = pd.merge(test_dataframe, results_dataframe, on=['Molecule Name'], how='left', indicator=True)
+    if not (merged_df['_merge'] == 'both').all():
+        raise gr.Error("The predictions file is missing some molecules present in the test set. Please ensure all molecules are included.")
+    # TODO: What to do when a molecule is duplicated in the Predictions file?
+    df_results = pd.DataFrame(columns=["endpoint", "MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"])
     for i, measurement in enumerate(ENDPOINTS):
+        df_pred = results_dataframe[['Molecule Name', measurement]].copy()
+        df_true = test_dataframe[['Molecule Name', measurement]].copy()
+        # coerce numeric columns
+        df_pred[measurement] = pd.to_numeric(df_pred[measurement], errors="coerce")
+        df_true[measurement] = pd.to_numeric(df_true[measurement], errors="coerce")
+        if df_pred[measurement].isnull().all():
+            # TODO: Allow missing endpoints or raise an error?
+            raise gr.Error(f"All predictions are missing for endpoint {measurement}. Please provide valid predictions.")
+        # Drop NaNs and calculate coverage
+        merged = (
+            df_pred.rename(columns={measurement: f"{measurement}_pred"})
+                .merge(
+                    df_true.rename(columns={measurement: f"{measurement}_true"}),
+                    on="Molecule Name",
+                    how="inner",
+                )
+                .dropna(subset=[f"{measurement}_pred", f"{measurement}_true"])
+        )
+        n_total = merged[f"{measurement}_true"].notna().sum()     # Valid test set points
+        n_pairs = len(merged)                         # actual pairs with predictions
+        coverage = (n_pairs / n_total * 100.0) if n_total else 0.0
+        merged = merged.sort_values("Molecule Name", kind="stable")
+        # validate pairs
+        if n_pairs < 10:
+            mae = rae = r2 = spearman = ktau = np.nan
+        else:
+            y_pred = merged[f"{measurement}_pred"].to_numpy()
+            y_true = merged[f"{measurement}_true"].to_numpy()
+            # Force log scale for all endpoints except LogD (for outliers)
+            if measurement != "LogD":
+                y_pred = np.log10(y_pred)
+                y_true = np.log10(y_true)
+            mae, rae, r2, spearman, ktau = metrics_per_ep(y_pred, y_true)
         df_results.loc[i, 'endpoint'] = measurement
         df_results.loc[i, 'MAE'] = mae
+        df_results.loc[i, 'RAE'] = rae
         df_results.loc[i, 'R2'] = r2
         df_results.loc[i, 'Spearman R'] = spearman
         df_results.loc[i, "Kendall's Tau"] = ktau
+        df_results.loc[i, 'data coverage (%)'] = coverage
+    # Average results
+    num_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau", "data coverage (%)"]
     df_results[num_cols] = df_results[num_cols].apply(pd.to_numeric, errors="coerce")
     means = df_results[num_cols].mean()
     avg_row = {"endpoint": "Average", **means.to_dict()}

utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import pandas as pd
 from datasets import load_dataset
 from about import results_repo
 from about import LB_COLS0
@@ -31,3 +32,17 @@ def fetch_dataset_df():
     )
     latest.rename(columns={"submission_time": "submission time"}, inplace=True)
     return latest

 import pandas as pd
+import numpy as np
 from datasets import load_dataset
 from about import results_repo
 from about import LB_COLS0
     )
     latest.rename(columns={"submission_time": "submission time"}, inplace=True)
     return latest
+def metrics_per_ep(pred, true):
+    from scipy.stats import spearmanr, kendalltau
+    from sklearn.metrics import mean_absolute_error, r2_score
+    mae = mean_absolute_error(true, pred)
+    rae = mae / np.mean(np.abs(true - np.mean(true)))
+    if np.nanstd(true) == 0:
+        r2=np.nan
+    else:
+        r2 = r2_score(true, pred)
+    spr, _ = spearmanr(true, pred)
+    ktau, _ = kendalltau(true, pred)
+    return mae, rae, r2, spr, ktau