|
|
import gradio as gr |
|
|
from gradio_leaderboard import Leaderboard |
|
|
from gradio.themes.utils import sizes |
|
|
import pandas as pd |
|
|
|
|
|
from evaluate import submit_data, evaluate_data |
|
|
from utils import ( |
|
|
make_tag_clickable, |
|
|
make_user_clickable, |
|
|
fetch_dataset_df, |
|
|
map_metric_to_stats, |
|
|
) |
|
|
from datasets import load_dataset |
|
|
import tempfile |
|
|
from loguru import logger |
|
|
from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES |
|
|
import time |
|
|
import threading |
|
|
|
|
|
|
|
|
|
|
|
ALL_EPS = ['Average'] + ENDPOINTS |
|
|
|
|
|
def build_leaderboard(df_results): |
|
|
logger.info("Rebuilding leaderboard data...") |
|
|
per_ep = {} |
|
|
for ep in ALL_EPS: |
|
|
df = df_results[df_results["Endpoint"] == ep].copy() |
|
|
if df is None: |
|
|
print(f"[refresh] {ep} returned None; using empty DF") |
|
|
if df.empty: |
|
|
per_ep[ep] = pd.DataFrame(columns=LB_COLS) |
|
|
continue |
|
|
|
|
|
|
|
|
df['user'] = df.apply( |
|
|
lambda row: make_user_clickable(row['user']) if not row['anonymous'] else row['user'], |
|
|
axis=1).astype(str) |
|
|
df['model details'] = df['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str) |
|
|
|
|
|
if ep == "Average": |
|
|
|
|
|
df = df.rename(columns={"mean_RAE": "mean_MA-RAE", |
|
|
"std_RAE": "std_MA-RAE"}) |
|
|
sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable") |
|
|
sorted_df = map_metric_to_stats(sorted_df, average=True) |
|
|
per_ep[ep] = sorted_df[LB_AVG] |
|
|
else: |
|
|
sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable") |
|
|
sorted_df = map_metric_to_stats(sorted_df) |
|
|
per_ep[ep] = sorted_df[LB_COLS] |
|
|
logger.info("Finished rebuilding leaderboard data.") |
|
|
return per_ep |
|
|
|
|
|
|
|
|
current_df = fetch_dataset_df() |
|
|
|
|
|
|
|
|
data_version_counter = 0 |
|
|
|
|
|
|
|
|
|
|
|
def update_current_dataframe(): |
|
|
global current_df |
|
|
while True: |
|
|
logger.info("Fetching latest dataset for leaderboard...") |
|
|
current_df = fetch_dataset_df() |
|
|
logger.debug(f"Dataset version updated") |
|
|
time.sleep(30) |
|
|
|
|
|
threading.Thread(target=update_current_dataframe, daemon=True).start() |
|
|
|
|
|
|
|
|
|
|
|
def gradio_interface(): |
|
|
|
|
|
with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False, |
|
|
theme=gr.themes.Default(text_size=sizes.text_lg)) as demo: |
|
|
timer = gr.Timer(30) |
|
|
data_version = gr.State(0) |
|
|
def increment_data_version(current_version): |
|
|
return current_version + 1 |
|
|
|
|
|
timer.tick(fn=increment_data_version, inputs=[data_version], outputs=data_version) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=7): |
|
|
gr.Markdown(""" |
|
|
## Welcome to the OpenADMET + ExpansionRx Blind Challenge! |
|
|
Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data π§βπ¬ |
|
|
|
|
|
Go to the **Leaderboard** to check out how the challenge is going. |
|
|
To participate, head out to the **Submit** tab and upload your results as a `CSV` file. |
|
|
|
|
|
""" |
|
|
) |
|
|
with gr.Column(scale=2): |
|
|
gr.Image( |
|
|
value="./_static/challenge_logo.png", |
|
|
show_label=False, |
|
|
show_download_button=False, |
|
|
width="5vw", |
|
|
) |
|
|
|
|
|
|
|
|
welcome_md = """ |
|
|
# π OpenADMET + ExpansionRx |
|
|
## Computational Blind Challenge in ADMET |
|
|
|
|
|
This challenge is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery, |
|
|
hosted by **OpenADMET** in collaboration with **ExpansionRx**. |
|
|
|
|
|
|
|
|
## Why are ADMET properties important in drug discovery? |
|
|
Small molecules continue to be the bricks and mortar of drug discovery globally, accounting for ~75% of FDA approvals over the last decade. |
|
|
Oral bioavailability, easily tunable properties, modulation of a wide range of mechanisms, and ease of manufacturing make small molecules highly attractive as therapeutic agents. |
|
|
Moreover, emerging small molecule modalities such as degraders, expression modulators, molecular glues, and antibody-drug conjugates (to name a few) have vastly expanded what we thought small molecules were capable of. |
|
|
|
|
|
It is fairly difficult to predict the lifetime and distribution of small molecules within the body. Additionally, |
|
|
interaction with off-targets can cause safety issues and toxicity. Collectively these *Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--properties |
|
|
sit in the middle of the assay cascade and can make or break preclinical candidate molecules. |
|
|
|
|
|
**OpenADMET** aims to address these challenges through an open science effort to build predictive models of ADMET properties by characterizing the proteins and mechanisms |
|
|
that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models. |
|
|
Read more about our strategy to transform drug discovery on our [website](https://openadmet.ghost.io/what-is-openadmet/). |
|
|
|
|
|
Critical to our mission is developing open datasets and running community blind challenges to assess the current state of the art in ADMET modeling. |
|
|
Building on the sucess of the recent [ASAP-Polaris-OpenADMET blind challenge](https://chemrxiv.org/engage/chemrxiv/article-details/68ac00d1728bf9025e22fe45) in computational methods for drug discovery, |
|
|
we bring you a brand new challenge in collaboration with **ExpansionRx**. During a recent series of drug discovery campaigns for RNA mediated diseases, |
|
|
ExpansionRX collected a variety of ADMET data for off-targets and properties of interest, which they are generously sharing with the community for this challenge. |
|
|
|
|
|
## π§ͺ The Challenge |
|
|
|
|
|
Participants will be tasked with solving real-world ADMET prediction problems ExpansionRx faced during lead optimization. |
|
|
Specifically, you will be asked to predict the ADMET properties of late-stage molecules based on earlier-stage data from the same campaigns. |
|
|
For this challenge we selected nine (9) crucial endpoints for the community to predict: |
|
|
|
|
|
- LogD |
|
|
- Kinetic Solubility **KSOL**: uM |
|
|
- Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg |
|
|
- Human Liver Microsomal (**HLM**) *Clint*: mL/min/kg |
|
|
- Caco-2 Efflux Ratio |
|
|
- Caco-2 Papp A>B (10^-6 cm/s) |
|
|
- Mouse Plasma Protein Binding (**MPPB**): % Unbound |
|
|
- Mouse Brain Protein Binding (**MBPB**): % Unbound |
|
|
- Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound |
|
|
|
|
|
Find more information about these endpoints on our [blog](https://openadmet.ghost.io/openadmet-expansionrx-blind-challenge/). |
|
|
|
|
|
**UPDATE:** The Challenge is now live! Data available at the following Hugging Face Datasets |
|
|
Training: https://huggingface.co/datasets/openadmet/openadmet-expansionrx-challenge-train-data |
|
|
Test: https://huggingface.co/datasets/openadmet/openadmet-expansionrx-challenge-test-data-blinded |
|
|
|
|
|
You can also watch a [Webinar](https://www.youtube.com/watch?v=9v0Ej_FL6k0) introducing the challenge run with [Collaborative Drug Discovery](https://www.collaborativedrug.com/). |
|
|
We also have a [form](https://forms.gle/KiviZ7AaGcuqtrwH8) you can fill out for access to a CDD vault containing the challenge data and access to some other tools. |
|
|
|
|
|
## β
How to Participate |
|
|
1. **Register**: Create an account with Hugging Face. |
|
|
2. **Walk through the tutorials**: We have prepared a [Tutorial](https://github.com/OpenADMET/ExpansionRx-Challenge-Tutorial/blob/main/expansion_tutorial.ipynb) showing how to train a model and submit to the leaderboard. |
|
|
3. **Download the Public Dataset**: Download the ExpansionRx [training](https://huggingface.co/datasets/openadmet/openadmet-expansionrx-challenge-train-data) and [blinded test](https://huggingface.co/datasets/openadmet/openadmet-expansionrx-challenge-test-data-blinded) sets from Hugging Face. |
|
|
4. **Train Your Model**: Use the provided training data for each ADMET property of your choice. |
|
|
5. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions. |
|
|
6. Join the discussion on the [Challenge Discord](https://discord.gg/MY5cEFHH3D)! |
|
|
|
|
|
## π Data: |
|
|
|
|
|
The training set contains the following parameters: |
|
|
|
|
|
| Column | Unit | Type | Description | |
|
|
|:---------------------------- |:----------: |:--------: |:----------------------------------------------| |
|
|
| Molecule Name | | str | Identifier for the molecule | |
|
|
| Smiles | | str | Text representation of the 2D molecular structure | |
|
|
| LogD | | float | LogD | |
|
|
| KSol | uM | float | Kinetic Solubility | |
|
|
| MLM CLint | mL/min/kg | float | Mouse Liver Microsomal | |
|
|
| HLM CLint | mL/min/kg | float | Human Liver Microsomal | |
|
|
| Caco-2 Permeability Efflux | | float | Caco-2 Permeability Efflux Ratio | |
|
|
| Caco-2 Permeability Papp A>B | 10^-6 cm/s | float | Caco-2 Permeability Papp A>B | |
|
|
| MPPB | % Unbound | float | Mouse Plasma Protein Binding | |
|
|
| MBPB | % Unbound | float | Mouse Brain Protein Binding | |
|
|
| MGMB. | % Unbound | float | Mouse Gastrocnemius Muscle Binding | |
|
|
|
|
|
You can download the training data from the [Hugging Face dataset](https://huggingface.co/datasets/openadmet/openadmet-challenge-train-data). |
|
|
The test set will remained blinded until the challenge submission deadline. You will be tasked with predicting the same set of ADMET endpoints for the test set molecules. |
|
|
|
|
|
The training and blinded test set will also be made available on the [CDD Vault](https://www.collaborativedrug.com/). An account to access the CDD Vault can be requested by emailing **[email protected]**. |
|
|
Note that by joining the Vault, your account will be visible to other participants, so this option is **not recommended for those wishing to remain anonymous.** |
|
|
|
|
|
## π Evaluation |
|
|
The challenge will be judged based on the following criteria: |
|
|
- We welcome submissions of any kind, including machine learning and physics-based approaches. You can also employ pre-training approaches as you see fit, |
|
|
as well as incorporate data from external sources into your models and submissions. |
|
|
- In the spirit of open science and open source we would love to see code showing how you created your submission if possible, in the form of a Github Repository. |
|
|
If not possible due to IP or other constraints you must at a minimum provide a short report written methodology based on the template [here](https://docs.google.com/document/d/1bttGiBQcLiSXFngmzUdEqVchzPhj-hcYLtYMszaOqP8/edit?usp=sharing). |
|
|
**Make sure your lat submission before the deadline includes a link to a report or to a Github repository.** |
|
|
- Each participant can submit as many times as they like, up to a limit of once per day. **Only your latest submission will be considered for the final leaderboard.** |
|
|
- The endpoints will be judged individually by mean absolute error (**MAE**), while an overall leaderboard will be judged by the macro-averaged relative absolute error (**MA-RAE**). |
|
|
- For endpoints that are not already on a log scale (e.g LogD) they will be transformed to log scale to minimize the impact of outliers on evaluation. |
|
|
- We will estimate errors on the metrics using bootstrapping and use the statistical testing workflow outlined in [this paper](https://chemrxiv.org/engage/chemrxiv/article-details/672a91bd7be152b1d01a926b) to determine if model performance is statistically distinct. |
|
|
|
|
|
π
**Timeline**: |
|
|
- **September 16:** Challenge announcement |
|
|
- **October 14:** Second announcement and sample data release |
|
|
- **October 27:** Challenge starts |
|
|
- **October-November:** Online Q&A sessions and support via the Discord channel |
|
|
- **January 19, 2026:** Submission closes |
|
|
- **January 26, 2026:** Winners announced |
|
|
|
|
|
## Acknowledgements |
|
|
We gratefully acknowledge Jon Ainsley, Andrew Good, Elyse Bourque, Lakshminarayana Vogeti, Renato Skerlj, Tiansheng Wang, and Mark Ledeboer for generously |
|
|
providing the Expansion Therapeutics dataset used in this challenge as an in-kind contribution. |
|
|
|
|
|
--- |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<style> |
|
|
/* bold only the "Overall" tab label */ |
|
|
#lb_subtabs [role="tab"][aria-controls="all_tab"] { |
|
|
font-weight: 700 !important; |
|
|
} |
|
|
</style> |
|
|
<style> |
|
|
#welcome-md table { |
|
|
width: 60%; |
|
|
border-collapse: collapse; |
|
|
font-size: 0.95rem; |
|
|
line-height: 1.2; |
|
|
} |
|
|
#welcome-md th, #welcome-md td { |
|
|
padding: 6px 10px; |
|
|
border: 1px solid rgba(0,0,0,0.9); |
|
|
vertical-align: middle; |
|
|
} |
|
|
#welcome-md thead th { |
|
|
background: var(--panel-background-fill, #f5f5f7); |
|
|
font-weight: 1000; |
|
|
} |
|
|
/* Header shading */ |
|
|
#welcome-md thead th:nth-child(2), |
|
|
#welcome-md thead th:nth-child(3) { |
|
|
text-align: center; |
|
|
} |
|
|
/* Zebra striping */ |
|
|
#welcome-md tbody tr:nth-child(odd) { background: rgba(0,0,0,0.03); } |
|
|
#welcome-md tbody tr:hover { background: rgba(0,0,0,0.06); } |
|
|
/* Align columns */ |
|
|
#welcome-md td:nth-child(2), |
|
|
#welcome-md td:nth-child(3) { text-align: center; white-space: nowrap; } |
|
|
</style> |
|
|
""") |
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
lboard_dict = {} |
|
|
|
|
|
with gr.TabItem("π About"): |
|
|
gr.Markdown(welcome_md, elem_id="welcome-md") |
|
|
|
|
|
with gr.TabItem("π Leaderboard", elem_id="lb_subtabs"): |
|
|
gr.Markdown(""" |
|
|
View the leaderboard for each ADMET endpoint by selecting the appropiate tab. |
|
|
|
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
with gr.TabItem('OVERALL', elem_id="all_tab"): |
|
|
lboard_dict['Average'] = Leaderboard( |
|
|
value=build_leaderboard(current_df)['Average'], |
|
|
datatype=LB_DTYPES, |
|
|
select_columns=LB_AVG, |
|
|
search_columns=["user"], |
|
|
render=True, |
|
|
every=30, |
|
|
) |
|
|
|
|
|
for endpoint in ENDPOINTS: |
|
|
with gr.TabItem(endpoint): |
|
|
lboard_dict[endpoint] = Leaderboard( |
|
|
value=build_leaderboard(current_df)[endpoint], |
|
|
datatype=LB_DTYPES, |
|
|
select_columns=LB_COLS, |
|
|
search_columns=["user"], |
|
|
render=True, |
|
|
every=30, |
|
|
) |
|
|
|
|
|
def refresh_if_changed(): |
|
|
per_ep = build_leaderboard(current_df) |
|
|
|
|
|
return [per_ep[ep] for ep in ALL_EPS] |
|
|
|
|
|
data_version.change(fn=refresh_if_changed, outputs=[lboard_dict[ep] for ep in ALL_EPS]) |
|
|
|
|
|
with gr.TabItem("βοΈ Submit"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
# ADMET Endpoints Submission |
|
|
Upload your prediction files here as a csv file. |
|
|
""" |
|
|
) |
|
|
filename = gr.State(value=None) |
|
|
eval_state = gr.State(value=None) |
|
|
user_state = gr.State(value=None) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown( |
|
|
""" |
|
|
## Participant Information |
|
|
To participate, **we require a Hugging Face username**, which will be used to track multiple submissions. |
|
|
Your username will be displayed on the leaderboard, unless you check the *anonymous* box. If you want to remain anonymous, please provide an alias to be used for the leaderboard (we'll keep the username hidden). |
|
|
|
|
|
If you wish to be included in Challenge discussions, please provide your Discord username and email. |
|
|
If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation (and check the box below). |
|
|
|
|
|
We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation, |
|
|
you need to submit the link before the challenge deadline in order to be considered for the final leaderboard. |
|
|
|
|
|
""" |
|
|
) |
|
|
|
|
|
username_input = gr.Textbox( |
|
|
label="Username", |
|
|
placeholder="Enter your Hugging Face username", |
|
|
|
|
|
) |
|
|
user_alias = gr.Textbox( |
|
|
label="Optional Alias", |
|
|
placeholder="Enter an identifying alias for the leaderboard if you wish to remain anonymous", |
|
|
|
|
|
) |
|
|
anon_checkbox = gr.Checkbox( |
|
|
label="I want to submit anonymously", |
|
|
info="If checked, your username will be replaced with the given *alias* on the leaderboard.", |
|
|
value=False, |
|
|
) |
|
|
with gr.Column(): |
|
|
|
|
|
participant_name = gr.Textbox( |
|
|
label="Participant Name", |
|
|
placeholder="Enter your name (optional)", |
|
|
info="This will not be displayed on the leaderboard but will be used for tracking participation." |
|
|
) |
|
|
discord_username= gr.Textbox( |
|
|
label="Discord Username", |
|
|
placeholder="Enter your Discord username (optional)", |
|
|
info="Enter the username you will use for the Discord channel (if you are planning to engage in the discussion)." |
|
|
) |
|
|
email = gr.Textbox( |
|
|
label="Email", |
|
|
placeholder="Enter your email (optional)", |
|
|
) |
|
|
affiliation = gr.Textbox( |
|
|
label="Affiliation", |
|
|
placeholder="Enter your school/company affiliation (optional)", |
|
|
) |
|
|
model_tag = gr.Textbox( |
|
|
label="Model Report", |
|
|
placeholder="Link to a report describing your method (optional)", |
|
|
) |
|
|
paper_checkbox = gr.Checkbox( |
|
|
label="I want to be included in a future publication detailing the Challenge results", |
|
|
value=False, |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown( |
|
|
""" |
|
|
## Submission Instructions |
|
|
After training your model with the [ExpansionRx trainining set](https://huggingface.co/datasets/openadmet/openadmet-challenge-train-data), |
|
|
please upload a single CSV file containing your predictions for all compounds in the test set. |
|
|
Only your latest submission will be considered. |
|
|
|
|
|
Download a CSV file with the compounds in the test set here: |
|
|
|
|
|
**NOTE: Submission can sometimes take a few minutes to process** |
|
|
**Please be patient and wait for the status message to update and your submission to reach the leaderboard.** |
|
|
""" |
|
|
) |
|
|
|
|
|
download_btn = gr.DownloadButton( |
|
|
label="π₯ Download Test Set Compounds", |
|
|
value="./data/expansion_data_test_blinded.csv", |
|
|
variant="secondary", |
|
|
) |
|
|
with gr.Column(): |
|
|
predictions_file = gr.File(label="Single file with ADMET predictions (.csv)", |
|
|
file_types=[".csv"], |
|
|
file_count="single",) |
|
|
|
|
|
username_input.change( |
|
|
fn=lambda x: x if x.strip() else None, |
|
|
inputs=username_input, |
|
|
outputs=user_state |
|
|
) |
|
|
|
|
|
submit_btn = gr.Button("π€ Submit Predictions") |
|
|
message = gr.Textbox(label="Status", lines=1, visible=False) |
|
|
|
|
|
submit_btn.click( |
|
|
submit_data, |
|
|
inputs=[predictions_file, |
|
|
user_state, |
|
|
participant_name, |
|
|
discord_username, |
|
|
email, |
|
|
affiliation, |
|
|
model_tag, |
|
|
user_alias, |
|
|
anon_checkbox, |
|
|
paper_checkbox], |
|
|
outputs=[message, filename], |
|
|
).success( |
|
|
fn=lambda m: gr.update(value=m, visible=True), |
|
|
inputs=[message], |
|
|
outputs=[message], |
|
|
).success( |
|
|
fn=evaluate_data, |
|
|
inputs=[filename], |
|
|
outputs=[eval_state] |
|
|
) |
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
logger.info("Starting Gradio app...") |
|
|
gradio_interface().launch(ssr_mode=False) |
|
|
logger.info("Gradio app closed.") |